diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index c6c908f73a253..98733f101acb3 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -381,7 +381,7 @@ class FusedMoEMethodBase(QuantizeMethodBase): # Note : We may want to use FP8 dispatch even otherwise just to # reduce datamovement use_fp8_dispatch = (quant_dtype == current_platform.fp8_dtype() - and act_quant_block_size + and act_quant_block_size[1] == DEEPEP_QUANT_BLOCK_SIZE) # Note (varun): Whether to use FP8 dispatch or not needs some