mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 05:15:42 +08:00
[Bugfix] Only use triton_kernels for MXFP4 on SM90 and SM100 (#29339)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
71df2a57ef
commit
c17610e2ba
@ -132,12 +132,15 @@ def get_mxfp4_backend(with_lora_support: bool) -> Mxfp4Backend:
|
||||
)
|
||||
|
||||
# If FlashInfer is not available, try either Marlin or Triton
|
||||
if (
|
||||
envs.VLLM_MXFP4_USE_MARLIN
|
||||
or current_platform.get_device_capability()[0] < 9
|
||||
or not has_triton_kernels()
|
||||
or not is_torch_equal_or_newer("2.8.0")
|
||||
):
|
||||
triton_kernels_supported = (
|
||||
has_triton_kernels()
|
||||
and is_torch_equal_or_newer("2.8.0")
|
||||
# NOTE: triton_kernels are only confirmed to work on SM90 and SM100
|
||||
# SM110 fails with this error: https://github.com/vllm-project/vllm/issues/29317
|
||||
# SM120 needs this fix: https://github.com/triton-lang/triton/pull/8498
|
||||
and (9, 0) <= current_platform.get_device_capability() < (11, 0)
|
||||
)
|
||||
if envs.VLLM_MXFP4_USE_MARLIN or not triton_kernels_supported:
|
||||
logger.info_once("Using Marlin backend")
|
||||
return Mxfp4Backend.MARLIN
|
||||
else:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user