From 79e0db60ee15933fc5afa9486a57489c894ed20f Mon Sep 17 00:00:00 2001 From: yurekami Date: Wed, 24 Dec 2025 15:16:10 +0900 Subject: [PATCH] Use --attention-backend flag instead of VLLM_ATTENTION_BACKEND env var MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per reviewer feedback, the VLLM_ATTENTION_BACKEND environment variable is being deprecated in favor of the --attention-backend CLI flag. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 Signed-off-by: yurekami --- vllm/v1/worker/cp_utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vllm/v1/worker/cp_utils.py b/vllm/v1/worker/cp_utils.py index 2627187290ff3..2e3c93c4ca24a 100644 --- a/vllm/v1/worker/cp_utils.py +++ b/vllm/v1/worker/cp_utils.py @@ -34,8 +34,8 @@ def check_attention_cp_compatibility(vllm_config: VllmConfig) -> None: f"current attention backend " f"'{layer_impl.__class__.__name__}'.\n\n" f"To resolve this issue, try one of the following:\n" - f" 1. Use a different attention backend by setting:\n" - f" export VLLM_ATTENTION_BACKEND=\n" + f" 1. Use a different attention backend by specifying:\n" + f" --attention-backend \n" f" 2. Set cp_kv_cache_interleave_size to 1\n" f" 3. Disable speculative decoding" ) @@ -48,8 +48,8 @@ def check_attention_cp_compatibility(vllm_config: VllmConfig) -> None: f"'{layer_impl.__class__.__name__}' does not support this " f"feature.\n\n" f"To resolve this issue, try one of the following:\n" - f" 1. Use a compatible attention backend by setting:\n" - f" export VLLM_ATTENTION_BACKEND=\n" + f" 1. Use a compatible attention backend by specifying:\n" + f" --attention-backend \n" f" Compatible backends: FLASH_ATTN, FLASHINFER, " f"TRITON_MLA, FLASH_MLA, FLASH_ATTN_MLA, CUTLASS_MLA\n" f" 2. Disable DCP by removing the " @@ -66,8 +66,8 @@ def check_attention_cp_compatibility(vllm_config: VllmConfig) -> None: f"'{layer_impl.__class__.__name__}' does not support this " f"feature.\n\n" f"To resolve this issue, try one of the following:\n" - f" 1. Use a compatible attention backend by setting:\n" - f" export VLLM_ATTENTION_BACKEND=\n" + f" 1. Use a compatible attention backend by specifying:\n" + f" --attention-backend \n" f" 2. Disable PCP by removing the " f"--prefill-context-parallel-size flag\n\n" f"For more information, see:\n"