[ROCm][AMD] Disable auto enabling chunked prefill on ROCm (#11146)

Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
2025-12-16 08:56:02 +08:00 · 2024-12-13 00:31:26 -05:00 · 2024-12-13 00:31:26 -05:00 · 00c1bde5d8
commit 00c1bde5d8
parent 3989a79824
1 changed files with 2 additions and 1 deletions
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@ -1074,7 +1074,8 @@ class EngineArgs:
                if (is_gpu and not use_sliding_window and not use_spec_decode
                        and not self.enable_lora
                        and not self.enable_prompt_adapter
-                        and model_config.runner_type != "pooling"):
+                        and model_config.runner_type != "pooling"
+                        and not current_platform.is_rocm()):
                    self.enable_chunked_prefill = True
                    logger.warning(
                        "Chunked prefill is enabled by default for models with "