From 8aeaa910a2c514022ab7521ce74eec0734886a1b Mon Sep 17 00:00:00 2001 From: Lucia Fang <116399278+luccafong@users.noreply.github.com> Date: Sat, 5 Jul 2025 14:03:20 +0800 Subject: [PATCH] Fix unknown attribute of topk_indices_dtype in CompressedTensorsW8A8Fp8MoECutlassMethod (#20507) Co-authored-by: Lucia (Lu) Fang --- .../quantization/compressed_tensors/compressed_tensors_moe.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py index 48eeda5450b0b..ef67cc0eda466 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py @@ -368,6 +368,7 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod): "weights") self.input_quant = self.quant_config.target_scheme_map["Linear"].get( "input_activations") + self.topk_indices_dtype = None per_tensor = (self.weight_quant.strategy == QuantizationStrategy.TENSOR and self.input_quant.strategy @@ -738,6 +739,7 @@ class CompressedTensorsW8A8Fp8MoECutlassMethod(CompressedTensorsMoEMethod): from vllm.model_executor.layers.fused_moe.cutlass_moe import ( cutlass_moe_fp8) + self.topk_indices_dtype = None self.fused_experts = cutlass_moe_fp8 # type: ignore self.disable_expert_map = False