From 00c1bde5d8cd30b14f661b11d9ad1c1d4470ddbf Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> Date: Fri, 13 Dec 2024 00:31:26 -0500 Subject: [PATCH] [ROCm][AMD] Disable auto enabling chunked prefill on ROCm (#11146) Signed-off-by: Gregory Shtrasberg --- vllm/engine/arg_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 0c28fe703272..0098648b1cd6 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1074,7 +1074,8 @@ class EngineArgs: if (is_gpu and not use_sliding_window and not use_spec_decode and not self.enable_lora and not self.enable_prompt_adapter - and model_config.runner_type != "pooling"): + and model_config.runner_type != "pooling" + and not current_platform.is_rocm()): self.enable_chunked_prefill = True logger.warning( "Chunked prefill is enabled by default for models with "