mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-28 23:37:16 +08:00
[Fix][ROCm] Enforce eager for all encoder-decoder models on ROCm (#18154)
Signed-off-by: Luka Govedič <lgovedic@redhat.com>
This commit is contained in:
parent
2dff093574
commit
83f74c698f
@ -906,12 +906,17 @@ class ModelConfig:
|
||||
def _verify_cuda_graph(self) -> None:
|
||||
self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
|
||||
self.max_model_len)
|
||||
# CUDAGraph capture not supported for enc-dec models and mllama on ROCm
|
||||
ROCM_UNSUPPORTED_MODELS = ['mllama']
|
||||
if (self.hf_config.model_type in ROCM_UNSUPPORTED_MODELS
|
||||
and not self.enforce_eager and current_platform.is_rocm()):
|
||||
unsupported_rocm = (self.hf_config.model_type
|
||||
in ROCM_UNSUPPORTED_MODELS
|
||||
or self.is_encoder_decoder)
|
||||
|
||||
if (unsupported_rocm and not self.enforce_eager
|
||||
and current_platform.is_rocm()):
|
||||
logger.warning(
|
||||
"CUDA graph is not supported for %s on ROCm yet, fallback "
|
||||
"to the eager mode.", self.hf_config.model_type)
|
||||
"to eager mode.", self.hf_config.model_type)
|
||||
self.enforce_eager = True
|
||||
|
||||
def _verify_bnb_config(self) -> None:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user