[BugFix] Fix failing test quantization/test_compressed_tensors.py::test_compressed_tensors_fp8_block_enabled (#26436)

Signed-off-by: morrison-turnansky <mturnans@redhat.com>
This commit is contained in:
Morrison Turnansky 2025-10-08 16:04:12 -04:00 committed by GitHub
parent b82f4307c9
commit e1ba235668
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -597,7 +597,7 @@ class VllmConfig:
# https://github.com/vllm-project/vllm/issues/25094 # https://github.com/vllm-project/vllm/issues/25094
if has_blocked_weights(): if has_blocked_weights():
custom_ops = self.compilation_config.custom_ops custom_ops = self.compilation_config.custom_ops
if "none" not in custom_ops and "-quant_fp8" not in custom_ops: if "-quant_fp8" not in custom_ops:
custom_ops.append("+quant_fp8") custom_ops.append("+quant_fp8")
def update_sizes_for_sequence_parallelism(self, possible_sizes: list) -> list: def update_sizes_for_sequence_parallelism(self, possible_sizes: list) -> list: