mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-25 11:07:11 +08:00
fixes - use-fp8-dispatch
Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
This commit is contained in:
parent
299f829180
commit
2b5ad9f233
@ -381,7 +381,7 @@ class FusedMoEMethodBase(QuantizeMethodBase):
|
||||
# Note : We may want to use FP8 dispatch even otherwise just to
|
||||
# reduce datamovement
|
||||
use_fp8_dispatch = (quant_dtype == current_platform.fp8_dtype()
|
||||
and act_quant_block_size
|
||||
and act_quant_block_size[1]
|
||||
== DEEPEP_QUANT_BLOCK_SIZE)
|
||||
|
||||
# Note (varun): Whether to use FP8 dispatch or not needs some
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user