From 79e0db60ee15933fc5afa9486a57489c894ed20f Mon Sep 17 00:00:00 2001
From: yurekami <yurekami@users.noreply.github.com>
Date: Wed, 24 Dec 2025 15:16:10 +0900
Subject: [PATCH] Use --attention-backend flag instead of
 VLLM_ATTENTION_BACKEND env var
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per reviewer feedback, the VLLM_ATTENTION_BACKEND environment variable
is being deprecated in favor of the --attention-backend CLI flag.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Signed-off-by: yurekami <yurekami@users.noreply.github.com>
---
 vllm/v1/worker/cp_utils.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/vllm/v1/worker/cp_utils.py b/vllm/v1/worker/cp_utils.py
index 2627187290ff3..2e3c93c4ca24a 100644
--- a/vllm/v1/worker/cp_utils.py
+++ b/vllm/v1/worker/cp_utils.py
@@ -34,8 +34,8 @@ def check_attention_cp_compatibility(vllm_config: VllmConfig) -> None:
                     f"current attention backend "
                     f"'{layer_impl.__class__.__name__}'.\n\n"
                     f"To resolve this issue, try one of the following:\n"
-                    f"  1. Use a different attention backend by setting:\n"
-                    f"     export VLLM_ATTENTION_BACKEND=<backend>\n"
+                    f"  1. Use a different attention backend by specifying:\n"
+                    f"     --attention-backend <backend>\n"
                     f"  2. Set cp_kv_cache_interleave_size to 1\n"
                     f"  3. Disable speculative decoding"
                 )
@@ -48,8 +48,8 @@ def check_attention_cp_compatibility(vllm_config: VllmConfig) -> None:
                     f"'{layer_impl.__class__.__name__}' does not support this "
                     f"feature.\n\n"
                     f"To resolve this issue, try one of the following:\n"
-                    f"  1. Use a compatible attention backend by setting:\n"
-                    f"     export VLLM_ATTENTION_BACKEND=<backend>\n"
+                    f"  1. Use a compatible attention backend by specifying:\n"
+                    f"     --attention-backend <backend>\n"
                     f"     Compatible backends: FLASH_ATTN, FLASHINFER, "
                     f"TRITON_MLA, FLASH_MLA, FLASH_ATTN_MLA, CUTLASS_MLA\n"
                     f"  2. Disable DCP by removing the "
@@ -66,8 +66,8 @@ def check_attention_cp_compatibility(vllm_config: VllmConfig) -> None:
                     f"'{layer_impl.__class__.__name__}' does not support this "
                     f"feature.\n\n"
                     f"To resolve this issue, try one of the following:\n"
-                    f"  1. Use a compatible attention backend by setting:\n"
-                    f"     export VLLM_ATTENTION_BACKEND=<backend>\n"
+                    f"  1. Use a compatible attention backend by specifying:\n"
+                    f"     --attention-backend <backend>\n"
                     f"  2. Disable PCP by removing the "
                     f"--prefill-context-parallel-size flag\n\n"
                     f"For more information, see:\n"