fixes - use-fp8-dispatch

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
2026-06-10 23:02:22 +08:00 · 2025-06-18 11:15:48 -07:00 · 2025-06-18 11:15:48 -07:00 · 2b5ad9f233
commit 2b5ad9f233
parent 299f829180
1 changed files with 1 additions and 1 deletions
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@ -381,7 +381,7 @@ class FusedMoEMethodBase(QuantizeMethodBase):
            # Note : We may want to use FP8 dispatch even otherwise just to
            # reduce datamovement
            use_fp8_dispatch = (quant_dtype == current_platform.fp8_dtype()
-                                and act_quant_block_size
+                                and act_quant_block_size[1]
                                == DEEPEP_QUANT_BLOCK_SIZE)

            # Note (varun): Whether to use FP8 dispatch or not needs some