[Fix][ROCm] Enforce eager for all encoder-decoder models on ROCm (#18154)

Signed-off-by: Luka Govedič <lgovedic@redhat.com>
2026-01-29 12:07:12 +08:00 · 2025-05-15 01:04:43 -04:00 · 2025-05-15 01:04:43 -04:00 · 83f74c698f
commit 83f74c698f
parent 2dff093574
1 changed files with 8 additions and 3 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -906,12 +906,17 @@ class ModelConfig:
    def _verify_cuda_graph(self) -> None:
        self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
                                          self.max_model_len)
+        # CUDAGraph capture not supported for enc-dec models and mllama on ROCm
        ROCM_UNSUPPORTED_MODELS = ['mllama']
-        if (self.hf_config.model_type in ROCM_UNSUPPORTED_MODELS
-                and not self.enforce_eager and current_platform.is_rocm()):
+        unsupported_rocm = (self.hf_config.model_type
+                            in ROCM_UNSUPPORTED_MODELS
+                            or self.is_encoder_decoder)
+
+        if (unsupported_rocm and not self.enforce_eager
+                and current_platform.is_rocm()):
            logger.warning(
                "CUDA graph is not supported for %s on ROCm yet, fallback "
-                "to the eager mode.", self.hf_config.model_type)
+                "to eager mode.", self.hf_config.model_type)
            self.enforce_eager = True

    def _verify_bnb_config(self) -> None: