From 13dd93c66766efff810feed3edb919f69d0d3836 Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Thu, 25 Sep 2025 21:21:56 -0400 Subject: [PATCH] [Core] Force PIECEWISE CUDAGraph mode for encoder-decoder (#25701) Signed-off-by: Russell Bryant --- vllm/config/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 958df4c66955..2da9d8f4f3ea 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -364,9 +364,11 @@ class VllmConfig: self.compilation_config.cudagraph_mode = \ CUDAGraphMode.FULL_AND_PIECEWISE - # pooling model does not support full cudagraphs + # pooling models and encoder-decoder models + # do not support full cudagraphs if self.model_config is not None and \ - self.model_config.pooler_config is not None: + (self.model_config.pooler_config is not None + or self.model_config.is_encoder_decoder): self.compilation_config.cudagraph_mode = \ CUDAGraphMode.PIECEWISE else: