diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 98733f101acb3..4ed10e60b13ac 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -380,6 +380,7 @@ class FusedMoEMethodBase(QuantizeMethodBase): # Note : We may want to use FP8 dispatch even otherwise just to # reduce datamovement + assert act_quant_block_size is not None use_fp8_dispatch = (quant_dtype == current_platform.fp8_dtype() and act_quant_block_size[1] == DEEPEP_QUANT_BLOCK_SIZE)