mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-19 07:57:08 +08:00
Use bytearray to create a mutable copy of the binary data before passing to np.frombuffer. This ensures the numpy array is writable, avoiding UserWarning from torch.from_numpy on read-only arrays. The warning occurred because base64.b64decode returns immutable bytes, and np.frombuffer on immutable bytes returns a read-only array. When converted to a torch tensor via torch.from_numpy, PyTorch would emit: "UserWarning: The given buffer is not writable..." This fix maintains the efficient numpy-based conversion while ensuring compatibility with all embed_dtype formats (float32, float16, bfloat16, fp8_e4m3, fp8_e5m2) that numpy doesn't natively support. Fixes #26781 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> Signed-off-by: yurekami <yurekami@users.noreply.github.com>
80 lines
2.9 KiB
Python
80 lines
2.9 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
import pytest
|
|
import torch
|
|
|
|
from tests.models.utils import check_embeddings_close
|
|
from vllm.utils.serial_utils import (
|
|
EMBED_DTYPE_TO_TORCH_DTYPE,
|
|
ENDIANNESS,
|
|
binary2tensor,
|
|
tensor2binary,
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize("endianness", ENDIANNESS)
|
|
@pytest.mark.parametrize("embed_dtype", EMBED_DTYPE_TO_TORCH_DTYPE.keys())
|
|
@torch.inference_mode()
|
|
def test_encode_and_decode(embed_dtype: str, endianness: str):
|
|
for i in range(10):
|
|
tensor = torch.rand(2, 3, 5, 7, 11, 13, device="cpu", dtype=torch.float32)
|
|
shape = tensor.shape
|
|
binary = tensor2binary(tensor, embed_dtype, endianness)
|
|
new_tensor = binary2tensor(binary, shape, embed_dtype, endianness).to(
|
|
torch.float32
|
|
)
|
|
|
|
if embed_dtype in ["float32", "float16"]:
|
|
torch.testing.assert_close(tensor, new_tensor, atol=0.001, rtol=0.001)
|
|
elif embed_dtype == "bfloat16":
|
|
torch.testing.assert_close(tensor, new_tensor, atol=0.01, rtol=0.01)
|
|
else: # for fp8
|
|
torch.testing.assert_close(tensor, new_tensor, atol=0.1, rtol=0.1)
|
|
|
|
check_embeddings_close(
|
|
embeddings_0_lst=tensor.view(1, -1),
|
|
embeddings_1_lst=new_tensor.view(1, -1),
|
|
name_0="gt",
|
|
name_1="new",
|
|
tol=1e-2,
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize("embed_dtype", EMBED_DTYPE_TO_TORCH_DTYPE.keys())
|
|
@torch.inference_mode()
|
|
def test_binary2tensor_no_warning(embed_dtype: str):
|
|
"""Test that binary2tensor does not emit UserWarning about non-writable buffers.
|
|
|
|
This addresses issue #26781 where torch.frombuffer on non-writable bytes
|
|
would emit: "UserWarning: The given buffer is not writable..."
|
|
"""
|
|
import warnings
|
|
|
|
tensor = torch.rand(10, 20, device="cpu", dtype=torch.float32)
|
|
binary = tensor2binary(tensor, embed_dtype, "native")
|
|
|
|
# Capture warnings during binary2tensor call
|
|
with warnings.catch_warnings(record=True) as w:
|
|
warnings.simplefilter("always")
|
|
result = binary2tensor(binary, tensor.shape, embed_dtype, "native")
|
|
|
|
# Filter for the specific UserWarning about non-writable buffers
|
|
buffer_warnings = [
|
|
warning
|
|
for warning in w
|
|
if issubclass(warning.category, UserWarning)
|
|
and "not writable" in str(warning.message)
|
|
]
|
|
assert len(buffer_warnings) == 0, (
|
|
f"Expected no warnings about non-writable buffers, got: {buffer_warnings}"
|
|
)
|
|
|
|
# Verify the result is correct
|
|
result_float = result.to(torch.float32)
|
|
if embed_dtype in ["float32", "float16"]:
|
|
torch.testing.assert_close(tensor, result_float, atol=0.001, rtol=0.001)
|
|
elif embed_dtype == "bfloat16":
|
|
torch.testing.assert_close(tensor, result_float, atol=0.01, rtol=0.01)
|
|
else: # fp8
|
|
torch.testing.assert_close(tensor, result_float, atol=0.1, rtol=0.1)
|