mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 12:05:57 +08:00
[Misc] Add assertion and helpful message for marlin24 compressed models (#11388)
This commit is contained in:
parent
2e726680b3
commit
b866cdbd05
@ -61,6 +61,10 @@ class CompressedTensorsW4A16Sparse24(CompressedTensorsScheme):
|
||||
params_dtype: torch.dtype, weight_loader: Callable,
|
||||
**kwargs):
|
||||
|
||||
assert params_dtype == torch.float16, (
|
||||
"float16 is required for marlin24 compressd models. Set dtype=torch.float16" # noqa: E501
|
||||
)
|
||||
|
||||
pack_factor = 32 // self.quant_type.size_bits
|
||||
output_size_per_partition = sum(output_partition_sizes)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user