From 9a5e96523be33f7ddd5aa56421b1e41000c0f2e2 Mon Sep 17 00:00:00 2001 From: Xin Yang <105740670+xyang16@users.noreply.github.com> Date: Thu, 18 Dec 2025 08:42:22 -0800 Subject: [PATCH] [LoRA] Set default MXFP4 LoRA backend to Marlin (#30598) Signed-off-by: Xin Yang Co-authored-by: Cyrus Leung --- vllm/model_executor/layers/quantization/mxfp4.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py index e96e87d15787d..832925825c453 100644 --- a/vllm/model_executor/layers/quantization/mxfp4.py +++ b/vllm/model_executor/layers/quantization/mxfp4.py @@ -95,12 +95,12 @@ def get_mxfp4_backend_with_lora() -> Mxfp4Backend: # SM120 needs this fix: https://github.com/triton-lang/triton/pull/8498 and (9, 0) <= current_platform.get_device_capability() < (11, 0) ) - if envs.VLLM_MXFP4_USE_MARLIN or not triton_kernels_supported: - logger.info_once("[get_mxfp4_backend_with_lora] Using Marlin backend") - return Mxfp4Backend.MARLIN + if envs.VLLM_MXFP4_USE_MARLIN is False and triton_kernels_supported: + logger.info_once("[get_mxfp4_backend_with_lora] Using Triton backend") + return Mxfp4Backend.TRITON - logger.info_once("[get_mxfp4_backend_with_lora] Using Triton backend") - return Mxfp4Backend.TRITON + logger.info_once("[get_mxfp4_backend_with_lora] Using Marlin backend") + return Mxfp4Backend.MARLIN def get_mxfp4_backend(with_lora_support: bool) -> Mxfp4Backend: