From 21802c4b6d96ba1b6a85913c01a3784b55a12248 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Mon, 7 Apr 2025 19:28:14 -0600 Subject: [PATCH] [ROCm][Bugfix][FP8] Make fp8 quant respect fused modules mapping (#16031) Signed-off-by: mgoin --- vllm/model_executor/layers/quantization/fp8.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 4435644c4f84e..512d64496bd49 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -116,7 +116,9 @@ class Fp8Config(QuantizationConfig): from vllm.attention.layer import Attention # Avoid circular import if isinstance(layer, LinearBase): - if is_layer_skipped(prefix, self.ignored_layers): + if is_layer_skipped(prefix=prefix, + ignored_layers=self.ignored_layers, + fused_mapping=self.packed_modules_mapping): return UnquantizedLinearMethod() return Fp8LinearMethod(self) elif isinstance(layer, FusedMoE):