[Bugfix] Fix dynamic FP8 quantization for Mixtral (#4793)

2025-12-14 17:36:55 +08:00 · 2024-05-13 16:00:27 -07:00 · 2024-05-13 16:00:27 -07:00 · 33d3914b1e
commit 33d3914b1e
parent 1356df53bd
1 changed files with 1 additions and 1 deletions
--- a/vllm/model_executor/models/mixtral.py
+++ b/vllm/model_executor/models/mixtral.py
@ -95,7 +95,7 @@ class MixtralMoE(nn.Module):
                                     params_dtype=self.params_dtype,
                                     quant_config=None)
-        if self.use_fp8:
+        if self.use_fp8 and self.quant_config.is_checkpoint_fp8_serialized:
            params_dtype = torch.float8_e4m3fn
        self.w13_weight = nn.Parameter(