From 3b5567a2099ee8c6a153ad5cf61397b99d200eb6 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Fri, 28 Feb 2025 23:09:14 -0800 Subject: [PATCH] [V1][Minor] Do not print attn backend twice (#13985) Signed-off-by: Woosuk Kwon --- vllm/platforms/cuda.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 2a4cac46c0667..bffa113cab899 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -178,7 +178,8 @@ class CudaPlatformBase(Platform): block_size) else: if use_v1: - logger.info("Using FlashMLA backend on V1 engine.") + logger.info_once( + "Using FlashMLA backend on V1 engine.") return ("vllm.v1.attention.backends.mla." "flashmla.FlashMLABackend") else: @@ -187,14 +188,14 @@ class CudaPlatformBase(Platform): "flashmla.FlashMLABackend") if use_v1: - logger.info("Using Triton MLA backend on V1 engine.") + logger.info_once("Using Triton MLA backend on V1 engine.") return ("vllm.v1.attention.backends.mla." "triton_mla.TritonMLABackend") else: logger.info("Using Triton MLA backend.") return "vllm.attention.backends.triton_mla.TritonMLABackend" if use_v1: - logger.info("Using Flash Attention backend on V1 engine.") + logger.info_once("Using Flash Attention backend on V1 engine.") return ("vllm.v1.attention.backends.flash_attn." "FlashAttentionBackend") if selected_backend == _Backend.FLASHINFER: