[Fix][ROCm] Enforce eager for all encoder-decoder models on ROCm (#18154)

Signed-off-by: Luka Govedič <lgovedic@redhat.com>
This commit is contained in:
Luka Govedič 2025-05-15 01:04:43 -04:00 committed by GitHub
parent 2dff093574
commit 83f74c698f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -906,12 +906,17 @@ class ModelConfig:
def _verify_cuda_graph(self) -> None:
self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
self.max_model_len)
# CUDAGraph capture not supported for enc-dec models and mllama on ROCm
ROCM_UNSUPPORTED_MODELS = ['mllama']
if (self.hf_config.model_type in ROCM_UNSUPPORTED_MODELS
and not self.enforce_eager and current_platform.is_rocm()):
unsupported_rocm = (self.hf_config.model_type
in ROCM_UNSUPPORTED_MODELS
or self.is_encoder_decoder)
if (unsupported_rocm and not self.enforce_eager
and current_platform.is_rocm()):
logger.warning(
"CUDA graph is not supported for %s on ROCm yet, fallback "
"to the eager mode.", self.hf_config.model_type)
"to eager mode.", self.hf_config.model_type)
self.enforce_eager = True
def _verify_bnb_config(self) -> None: