[Misc] Do not print async output warning for v1 (#21151)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2026-03-18 15:57:09 +08:00 · 2025-07-17 21:57:02 -07:00 · 2025-07-17 21:57:02 -07:00 · 54cf1cae62
commit 54cf1cae62
parent 5780121c95
2 changed files with 2 additions and 2 deletions
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@ -99,7 +99,7 @@ class CudaPlatformBase(Platform):

    @classmethod
    def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool:
-        if enforce_eager:
+        if enforce_eager and not envs.VLLM_USE_V1:
            logger.warning(
                "To see benefits of async output processing, enable CUDA "
                "graph. Since, enforce-eager is enabled, async output "
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@ -299,7 +299,7 @@ class RocmPlatform(Platform):

    @classmethod
    def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool:
-        if enforce_eager:
+        if enforce_eager and not envs.VLLM_USE_V1:
            logger.warning(
                "To see benefits of async output processing, enable CUDA "
                "graph. Since, enforce-eager is enabled, async output "