From ef160aa08e47b34386e883d179e869947dc5eaba Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Thu, 25 Sep 2025 21:21:56 -0400 Subject: [PATCH] [Core] Force PIECEWISE CUDAGraph mode for encoder-decoder (#25701) Signed-off-by: Russell Bryant Signed-off-by: yewentao256 --- vllm/config/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 958df4c669550..2da9d8f4f3eab 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -364,9 +364,11 @@ class VllmConfig: self.compilation_config.cudagraph_mode = \ CUDAGraphMode.FULL_AND_PIECEWISE - # pooling model does not support full cudagraphs + # pooling models and encoder-decoder models + # do not support full cudagraphs if self.model_config is not None and \ - self.model_config.pooler_config is not None: + (self.model_config.pooler_config is not None + or self.model_config.is_encoder_decoder): self.compilation_config.cudagraph_mode = \ CUDAGraphMode.PIECEWISE else: