[Fix] Add SM check to flashinfer MOE backend (#29144)

Signed-off-by: jiahanc <173873397+jiahanc@users.noreply.github.com>
Signed-off-by: mgoin <mgoin64@gmail.com>
Co-authored-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
jiahanc 2025-11-22 16:39:30 -08:00 committed by GitHub
parent 4587063267
commit 5f96c00c55
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -282,6 +282,16 @@ def get_flashinfer_moe_backend() -> FlashinferMoeBackend:
flashinfer_moe_backend = envs.VLLM_FLASHINFER_MOE_BACKEND
if flashinfer_moe_backend in backend_map:
if (
flashinfer_moe_backend == "latency"
and not current_platform.is_device_capability(100)
):
logger.info_once(
"Flashinfer TRTLLM MOE backend is only supported on "
"SM100 and later, using CUTLASS backend instead",
scope="local",
)
return FlashinferMoeBackend.CUTLASS
return backend_map[flashinfer_moe_backend]
elif current_platform.is_device_capability(90):
return FlashinferMoeBackend.CUTLASS