mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-17 03:37:03 +08:00
Use --attention-backend flag instead of VLLM_ATTENTION_BACKEND env var
Per reviewer feedback, the VLLM_ATTENTION_BACKEND environment variable is being deprecated in favor of the --attention-backend CLI flag. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> Signed-off-by: yurekami <yurekami@users.noreply.github.com>
This commit is contained in:
parent
3c8358c328
commit
79e0db60ee
@ -34,8 +34,8 @@ def check_attention_cp_compatibility(vllm_config: VllmConfig) -> None:
|
||||
f"current attention backend "
|
||||
f"'{layer_impl.__class__.__name__}'.\n\n"
|
||||
f"To resolve this issue, try one of the following:\n"
|
||||
f" 1. Use a different attention backend by setting:\n"
|
||||
f" export VLLM_ATTENTION_BACKEND=<backend>\n"
|
||||
f" 1. Use a different attention backend by specifying:\n"
|
||||
f" --attention-backend <backend>\n"
|
||||
f" 2. Set cp_kv_cache_interleave_size to 1\n"
|
||||
f" 3. Disable speculative decoding"
|
||||
)
|
||||
@ -48,8 +48,8 @@ def check_attention_cp_compatibility(vllm_config: VllmConfig) -> None:
|
||||
f"'{layer_impl.__class__.__name__}' does not support this "
|
||||
f"feature.\n\n"
|
||||
f"To resolve this issue, try one of the following:\n"
|
||||
f" 1. Use a compatible attention backend by setting:\n"
|
||||
f" export VLLM_ATTENTION_BACKEND=<backend>\n"
|
||||
f" 1. Use a compatible attention backend by specifying:\n"
|
||||
f" --attention-backend <backend>\n"
|
||||
f" Compatible backends: FLASH_ATTN, FLASHINFER, "
|
||||
f"TRITON_MLA, FLASH_MLA, FLASH_ATTN_MLA, CUTLASS_MLA\n"
|
||||
f" 2. Disable DCP by removing the "
|
||||
@ -66,8 +66,8 @@ def check_attention_cp_compatibility(vllm_config: VllmConfig) -> None:
|
||||
f"'{layer_impl.__class__.__name__}' does not support this "
|
||||
f"feature.\n\n"
|
||||
f"To resolve this issue, try one of the following:\n"
|
||||
f" 1. Use a compatible attention backend by setting:\n"
|
||||
f" export VLLM_ATTENTION_BACKEND=<backend>\n"
|
||||
f" 1. Use a compatible attention backend by specifying:\n"
|
||||
f" --attention-backend <backend>\n"
|
||||
f" 2. Disable PCP by removing the "
|
||||
f"--prefill-context-parallel-size flag\n\n"
|
||||
f"For more information, see:\n"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user