From 83f74c698f1f7c781ae02e3c533a52432799e717 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Govedi=C4=8D?= Date: Thu, 15 May 2025 01:04:43 -0400 Subject: [PATCH] [Fix][ROCm] Enforce eager for all encoder-decoder models on ROCm (#18154) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luka Govedič --- vllm/config.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 09e89c1116f15..81cac4d041166 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -906,12 +906,17 @@ class ModelConfig: def _verify_cuda_graph(self) -> None: self.max_seq_len_to_capture = min(self.max_seq_len_to_capture, self.max_model_len) + # CUDAGraph capture not supported for enc-dec models and mllama on ROCm ROCM_UNSUPPORTED_MODELS = ['mllama'] - if (self.hf_config.model_type in ROCM_UNSUPPORTED_MODELS - and not self.enforce_eager and current_platform.is_rocm()): + unsupported_rocm = (self.hf_config.model_type + in ROCM_UNSUPPORTED_MODELS + or self.is_encoder_decoder) + + if (unsupported_rocm and not self.enforce_eager + and current_platform.is_rocm()): logger.warning( "CUDA graph is not supported for %s on ROCm yet, fallback " - "to the eager mode.", self.hf_config.model_type) + "to eager mode.", self.hf_config.model_type) self.enforce_eager = True def _verify_bnb_config(self) -> None: