mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 06:35:00 +08:00
[BugFix] Fix failing test quantization/test_compressed_tensors.py::test_compressed_tensors_fp8_block_enabled (#26436)
Signed-off-by: morrison-turnansky <mturnans@redhat.com>
This commit is contained in:
parent
b82f4307c9
commit
e1ba235668
@ -597,7 +597,7 @@ class VllmConfig:
|
|||||||
# https://github.com/vllm-project/vllm/issues/25094
|
# https://github.com/vllm-project/vllm/issues/25094
|
||||||
if has_blocked_weights():
|
if has_blocked_weights():
|
||||||
custom_ops = self.compilation_config.custom_ops
|
custom_ops = self.compilation_config.custom_ops
|
||||||
if "none" not in custom_ops and "-quant_fp8" not in custom_ops:
|
if "-quant_fp8" not in custom_ops:
|
||||||
custom_ops.append("+quant_fp8")
|
custom_ops.append("+quant_fp8")
|
||||||
|
|
||||||
def update_sizes_for_sequence_parallelism(self, possible_sizes: list) -> list:
|
def update_sizes_for_sequence_parallelism(self, possible_sizes: list) -> list:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user