fixes - use-fp8-dispatch

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
This commit is contained in:
Varun Sundar Rabindranath 2025-06-18 11:15:48 -07:00 committed by Tyler Michael Smith
parent 299f829180
commit 2b5ad9f233

View File

@ -381,7 +381,7 @@ class FusedMoEMethodBase(QuantizeMethodBase):
# Note : We may want to use FP8 dispatch even otherwise just to
# reduce datamovement
use_fp8_dispatch = (quant_dtype == current_platform.fp8_dtype()
and act_quant_block_size
and act_quant_block_size[1]
== DEEPEP_QUANT_BLOCK_SIZE)
# Note (varun): Whether to use FP8 dispatch or not needs some