mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-10 14:04:31 +08:00
Fix unknown attribute of topk_indices_dtype in CompressedTensorsW8A8Fp8MoECutlassMethod (#20507)
Co-authored-by: Lucia (Lu) Fang <fanglu@meta.com>
This commit is contained in:
parent
906e05d840
commit
8aeaa910a2
@ -368,6 +368,7 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
|
||||
"weights")
|
||||
self.input_quant = self.quant_config.target_scheme_map["Linear"].get(
|
||||
"input_activations")
|
||||
self.topk_indices_dtype = None
|
||||
|
||||
per_tensor = (self.weight_quant.strategy == QuantizationStrategy.TENSOR
|
||||
and self.input_quant.strategy
|
||||
@ -738,6 +739,7 @@ class CompressedTensorsW8A8Fp8MoECutlassMethod(CompressedTensorsMoEMethod):
|
||||
|
||||
from vllm.model_executor.layers.fused_moe.cutlass_moe import (
|
||||
cutlass_moe_fp8)
|
||||
self.topk_indices_dtype = None
|
||||
self.fused_experts = cutlass_moe_fp8 # type: ignore
|
||||
self.disable_expert_map = False
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user