From a258ad8bcc0014c04d11a9bc8c6591b379c31b68 Mon Sep 17 00:00:00 2001 From: Jinzhen Lin Date: Sun, 17 Aug 2025 08:41:23 +0800 Subject: [PATCH] [Bugfix] fix qwen3 moe fp8 accuracy issue (#23031) Signed-off-by: Jinzhen Lin --- vllm/model_executor/layers/quantization/fp8.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index a49744913251..f07be0855492 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -125,6 +125,10 @@ class Fp8Config(QuantizationConfig): ignored_layers = cls.get_from_keys_or(config, ["ignored_layers"], None) weight_block_size = cls.get_from_keys_or(config, ["weight_block_size"], None) + if not ignored_layers: + ignored_layers = cls.get_from_keys_or(config, + ["modules_to_not_convert"], + None) return cls(is_checkpoint_fp8_serialized=is_checkpoint_fp8_serialized, activation_scheme=activation_scheme, ignored_layers=ignored_layers,