mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-20 05:55:01 +08:00
[Misc] Add assertion and helpful message for marlin24 compressed models (#11388)
This commit is contained in:
parent
2e726680b3
commit
b866cdbd05
@ -61,6 +61,10 @@ class CompressedTensorsW4A16Sparse24(CompressedTensorsScheme):
|
|||||||
params_dtype: torch.dtype, weight_loader: Callable,
|
params_dtype: torch.dtype, weight_loader: Callable,
|
||||||
**kwargs):
|
**kwargs):
|
||||||
|
|
||||||
|
assert params_dtype == torch.float16, (
|
||||||
|
"float16 is required for marlin24 compressd models. Set dtype=torch.float16" # noqa: E501
|
||||||
|
)
|
||||||
|
|
||||||
pack_factor = 32 // self.quant_type.size_bits
|
pack_factor = 32 // self.quant_type.size_bits
|
||||||
output_size_per_partition = sum(output_partition_sizes)
|
output_size_per_partition = sum(output_partition_sizes)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user