[ROCm][AMD] Disable auto enabling chunked prefill on ROCm (#11146)

Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
This commit is contained in:
Gregory Shtrasberg 2024-12-13 00:31:26 -05:00 committed by GitHub
parent 3989a79824
commit 00c1bde5d8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1074,7 +1074,8 @@ class EngineArgs:
if (is_gpu and not use_sliding_window and not use_spec_decode
and not self.enable_lora
and not self.enable_prompt_adapter
and model_config.runner_type != "pooling"):
and model_config.runner_type != "pooling"
and not current_platform.is_rocm()):
self.enable_chunked_prefill = True
logger.warning(
"Chunked prefill is enabled by default for models with "