diff --git a/vllm/v1/worker/cp_utils.py b/vllm/v1/worker/cp_utils.py index 2627187290ff3..2e3c93c4ca24a 100644 --- a/vllm/v1/worker/cp_utils.py +++ b/vllm/v1/worker/cp_utils.py @@ -34,8 +34,8 @@ def check_attention_cp_compatibility(vllm_config: VllmConfig) -> None: f"current attention backend " f"'{layer_impl.__class__.__name__}'.\n\n" f"To resolve this issue, try one of the following:\n" - f" 1. Use a different attention backend by setting:\n" - f" export VLLM_ATTENTION_BACKEND=\n" + f" 1. Use a different attention backend by specifying:\n" + f" --attention-backend \n" f" 2. Set cp_kv_cache_interleave_size to 1\n" f" 3. Disable speculative decoding" ) @@ -48,8 +48,8 @@ def check_attention_cp_compatibility(vllm_config: VllmConfig) -> None: f"'{layer_impl.__class__.__name__}' does not support this " f"feature.\n\n" f"To resolve this issue, try one of the following:\n" - f" 1. Use a compatible attention backend by setting:\n" - f" export VLLM_ATTENTION_BACKEND=\n" + f" 1. Use a compatible attention backend by specifying:\n" + f" --attention-backend \n" f" Compatible backends: FLASH_ATTN, FLASHINFER, " f"TRITON_MLA, FLASH_MLA, FLASH_ATTN_MLA, CUTLASS_MLA\n" f" 2. Disable DCP by removing the " @@ -66,8 +66,8 @@ def check_attention_cp_compatibility(vllm_config: VllmConfig) -> None: f"'{layer_impl.__class__.__name__}' does not support this " f"feature.\n\n" f"To resolve this issue, try one of the following:\n" - f" 1. Use a compatible attention backend by setting:\n" - f" export VLLM_ATTENTION_BACKEND=\n" + f" 1. Use a compatible attention backend by specifying:\n" + f" --attention-backend \n" f" 2. Disable PCP by removing the " f"--prefill-context-parallel-size flag\n\n" f"For more information, see:\n"