mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-19 03:27:07 +08:00
Merge ef89079712a5dcd53f1d50a89529aea1ac1d9a13 into 254f6b986720c92ddf97fbb1a6a6465da8e87e29
This commit is contained in:
commit
ff84ebd669
@ -38,3 +38,42 @@ def test_encode_and_decode(embed_dtype: str, endianness: str):
|
||||
name_1="new",
|
||||
tol=1e-2,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("embed_dtype", EMBED_DTYPE_TO_TORCH_DTYPE.keys())
|
||||
@torch.inference_mode()
|
||||
def test_binary2tensor_no_warning(embed_dtype: str):
|
||||
"""Test that binary2tensor does not emit UserWarning about non-writable buffers.
|
||||
|
||||
This addresses issue #26781 where torch.frombuffer on non-writable bytes
|
||||
would emit: "UserWarning: The given buffer is not writable..."
|
||||
"""
|
||||
import warnings
|
||||
|
||||
tensor = torch.rand(10, 20, device="cpu", dtype=torch.float32)
|
||||
binary = tensor2binary(tensor, embed_dtype, "native")
|
||||
|
||||
# Capture warnings during binary2tensor call
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter("always")
|
||||
result = binary2tensor(binary, tensor.shape, embed_dtype, "native")
|
||||
|
||||
# Filter for the specific UserWarning about non-writable buffers
|
||||
buffer_warnings = [
|
||||
warning
|
||||
for warning in w
|
||||
if issubclass(warning.category, UserWarning)
|
||||
and "not writable" in str(warning.message)
|
||||
]
|
||||
assert len(buffer_warnings) == 0, (
|
||||
f"Expected no warnings about non-writable buffers, got: {buffer_warnings}"
|
||||
)
|
||||
|
||||
# Verify the result is correct
|
||||
result_float = result.to(torch.float32)
|
||||
if embed_dtype in ["float32", "float16"]:
|
||||
torch.testing.assert_close(tensor, result_float, atol=0.001, rtol=0.001)
|
||||
elif embed_dtype == "bfloat16":
|
||||
torch.testing.assert_close(tensor, result_float, atol=0.01, rtol=0.01)
|
||||
else: # fp8
|
||||
torch.testing.assert_close(tensor, result_float, atol=0.1, rtol=0.1)
|
||||
|
||||
@ -66,7 +66,7 @@ logger = init_logger(__name__)
|
||||
temp_dir = tempfile.gettempdir()
|
||||
|
||||
|
||||
def enable_hf_transfer():
|
||||
def enable_hf_transfer() -> None:
|
||||
"""automatically activates hf_transfer"""
|
||||
if "HF_HUB_ENABLE_HF_TRANSFER" not in os.environ:
|
||||
try:
|
||||
@ -87,7 +87,9 @@ class DisabledTqdm(tqdm):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
def get_lock(model_name_or_path: str | Path, cache_dir: str | None = None):
|
||||
def get_lock(
|
||||
model_name_or_path: str | Path, cache_dir: str | None = None
|
||||
) -> filelock.FileLock:
|
||||
lock_dir = cache_dir or temp_dir
|
||||
model_name_or_path = str(model_name_or_path)
|
||||
os.makedirs(os.path.dirname(lock_dir), exist_ok=True)
|
||||
@ -178,11 +180,11 @@ def maybe_download_from_modelscope(
|
||||
return None
|
||||
|
||||
|
||||
def _shared_pointers(tensors):
|
||||
ptrs = defaultdict(list)
|
||||
def _shared_pointers(tensors: dict[str, torch.Tensor]) -> list[list[str]]:
|
||||
ptrs: dict[int, list[str]] = defaultdict(list)
|
||||
for k, v in tensors.items():
|
||||
ptrs[v.data_ptr()].append(k)
|
||||
failing = []
|
||||
failing: list[list[str]] = []
|
||||
for _, names in ptrs.items():
|
||||
if len(names) > 1:
|
||||
failing.append(names)
|
||||
@ -602,7 +604,7 @@ def filter_files_not_needed_for_inference(hf_weights_files: list[str]) -> list[s
|
||||
_BAR_FORMAT = "{desc}: {percentage:3.0f}% Completed | {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]\n" # noqa: E501
|
||||
|
||||
|
||||
def enable_tqdm(use_tqdm_on_load: bool):
|
||||
def enable_tqdm(use_tqdm_on_load: bool) -> bool:
|
||||
return use_tqdm_on_load and (
|
||||
not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0
|
||||
)
|
||||
|
||||
@ -19,7 +19,7 @@ def set_random_seed(seed: int | None) -> None:
|
||||
def set_weight_attrs(
|
||||
weight: torch.Tensor,
|
||||
weight_attrs: dict[str, Any] | None,
|
||||
):
|
||||
) -> None:
|
||||
"""Set attributes on a weight tensor.
|
||||
|
||||
This method is used to set attributes on a weight tensor. This method
|
||||
@ -50,7 +50,9 @@ def set_weight_attrs(
|
||||
setattr(weight, key, value)
|
||||
|
||||
|
||||
def replace_parameter(layer: torch.nn.Module, param_name: str, new_data: torch.Tensor):
|
||||
def replace_parameter(
|
||||
layer: torch.nn.Module, param_name: str, new_data: torch.Tensor
|
||||
) -> None:
|
||||
"""
|
||||
Replace a parameter of a layer while maintaining the ability to reload the weight.
|
||||
Called within implementations of the `process_weights_after_loading` method.
|
||||
|
||||
@ -107,7 +107,10 @@ def binary2tensor(
|
||||
torch_dtype = EMBED_DTYPE_TO_TORCH_DTYPE[embed_dtype]
|
||||
np_dtype = EMBED_DTYPE_TO_NUMPY_DTYPE_VIEW[embed_dtype]
|
||||
|
||||
np_array = np.frombuffer(binary, dtype=np_dtype).reshape(shape)
|
||||
# Use bytearray to create a mutable copy of the binary data.
|
||||
# This ensures np.frombuffer returns a writable array, avoiding
|
||||
# UserWarning from torch.from_numpy on read-only arrays.
|
||||
np_array = np.frombuffer(bytearray(binary), dtype=np_dtype).reshape(shape)
|
||||
|
||||
if endianness != "native" and endianness != sys_byteorder:
|
||||
np_array = np_array.byteswap()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user