[Bugfix] fix bf16 multimodal model hash (#23623)

Signed-off-by: Yuekai Zhang <zhangyuekai@foxmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Roger Wang <hey@rogerw.io>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
Yuekai Zhang 2025-08-26 23:47:50 +08:00 committed by GitHub
parent 379f828fba
commit 513298f1b4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -43,7 +43,19 @@ class MultiModalHasher:
return cls.item_to_bytes(
"image", np.asarray(convert_image_mode(obj, "RGBA")))
if isinstance(obj, torch.Tensor):
return cls.item_to_bytes("tensor", obj.cpu().numpy())
tensor_obj: torch.Tensor = obj.cpu()
tensor_dtype = tensor_obj.dtype
if tensor_dtype == torch.bfloat16:
tensor_obj = tensor_obj.contiguous()
tensor_obj = tensor_obj.view(
(tensor_obj.numel(), )).view(torch.uint8)
return cls.item_to_bytes(
"tensor", {
"original_dtype": str(tensor_dtype),
"original_shape": tuple(tensor_obj.shape),
"data": tensor_obj.numpy()
})
return cls.item_to_bytes("tensor", tensor_obj.numpy())
if isinstance(obj, np.ndarray):
# If the array is non-contiguous, we need to copy it first
arr_data = obj.data if obj.flags.c_contiguous else obj.tobytes()