mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-04 00:49:10 +08:00
[LoRA] Set default MXFP4 LoRA backend to Marlin (#30598)
Signed-off-by: Xin Yang <xyangx@amazon.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
parent
326e7c3105
commit
9a5e96523b
@ -95,12 +95,12 @@ def get_mxfp4_backend_with_lora() -> Mxfp4Backend:
|
|||||||
# SM120 needs this fix: https://github.com/triton-lang/triton/pull/8498
|
# SM120 needs this fix: https://github.com/triton-lang/triton/pull/8498
|
||||||
and (9, 0) <= current_platform.get_device_capability() < (11, 0)
|
and (9, 0) <= current_platform.get_device_capability() < (11, 0)
|
||||||
)
|
)
|
||||||
if envs.VLLM_MXFP4_USE_MARLIN or not triton_kernels_supported:
|
if envs.VLLM_MXFP4_USE_MARLIN is False and triton_kernels_supported:
|
||||||
logger.info_once("[get_mxfp4_backend_with_lora] Using Marlin backend")
|
logger.info_once("[get_mxfp4_backend_with_lora] Using Triton backend")
|
||||||
return Mxfp4Backend.MARLIN
|
return Mxfp4Backend.TRITON
|
||||||
|
|
||||||
logger.info_once("[get_mxfp4_backend_with_lora] Using Triton backend")
|
logger.info_once("[get_mxfp4_backend_with_lora] Using Marlin backend")
|
||||||
return Mxfp4Backend.TRITON
|
return Mxfp4Backend.MARLIN
|
||||||
|
|
||||||
|
|
||||||
def get_mxfp4_backend(with_lora_support: bool) -> Mxfp4Backend:
|
def get_mxfp4_backend(with_lora_support: bool) -> Mxfp4Backend:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user