mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 17:36:55 +08:00
[Bugfix] Fix dynamic FP8 quantization for Mixtral (#4793)
This commit is contained in:
parent
1356df53bd
commit
33d3914b1e
@ -95,7 +95,7 @@ class MixtralMoE(nn.Module):
|
|||||||
params_dtype=self.params_dtype,
|
params_dtype=self.params_dtype,
|
||||||
quant_config=None)
|
quant_config=None)
|
||||||
|
|
||||||
if self.use_fp8:
|
if self.use_fp8 and self.quant_config.is_checkpoint_fp8_serialized:
|
||||||
params_dtype = torch.float8_e4m3fn
|
params_dtype = torch.float8_e4m3fn
|
||||||
|
|
||||||
self.w13_weight = nn.Parameter(
|
self.w13_weight = nn.Parameter(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user