From 2b5ad9f233627252860b1639711612a4db8c4554 Mon Sep 17 00:00:00 2001 From: Varun Sundar Rabindranath Date: Wed, 18 Jun 2025 11:15:48 -0700 Subject: [PATCH] fixes - use-fp8-dispatch Signed-off-by: Varun Sundar Rabindranath --- vllm/model_executor/layers/fused_moe/layer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index c6c908f73a253..98733f101acb3 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -381,7 +381,7 @@ class FusedMoEMethodBase(QuantizeMethodBase): # Note : We may want to use FP8 dispatch even otherwise just to # reduce datamovement use_fp8_dispatch = (quant_dtype == current_platform.fp8_dtype() - and act_quant_block_size + and act_quant_block_size[1] == DEEPEP_QUANT_BLOCK_SIZE) # Note (varun): Whether to use FP8 dispatch or not needs some