mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-23 13:14:34 +08:00
[Fix] Add SM check to flashinfer MOE backend (#29144)
Signed-off-by: jiahanc <173873397+jiahanc@users.noreply.github.com> Signed-off-by: mgoin <mgoin64@gmail.com> Co-authored-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
4587063267
commit
5f96c00c55
@ -282,6 +282,16 @@ def get_flashinfer_moe_backend() -> FlashinferMoeBackend:
|
||||
|
||||
flashinfer_moe_backend = envs.VLLM_FLASHINFER_MOE_BACKEND
|
||||
if flashinfer_moe_backend in backend_map:
|
||||
if (
|
||||
flashinfer_moe_backend == "latency"
|
||||
and not current_platform.is_device_capability(100)
|
||||
):
|
||||
logger.info_once(
|
||||
"Flashinfer TRTLLM MOE backend is only supported on "
|
||||
"SM100 and later, using CUTLASS backend instead",
|
||||
scope="local",
|
||||
)
|
||||
return FlashinferMoeBackend.CUTLASS
|
||||
return backend_map[flashinfer_moe_backend]
|
||||
elif current_platform.is_device_capability(90):
|
||||
return FlashinferMoeBackend.CUTLASS
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user