[ROCm] Fix broken import in platform attention backend dispatching (#30432)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
Andreas Karatzas 2025-12-10 19:12:58 -06:00 committed by GitHub
parent b4054c8ab4
commit b51255f369
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -403,7 +403,21 @@ class RocmPlatform(Platform):
compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE
if cache_config and cache_config.block_size is None:
cache_config.block_size = 16
if (
envs.VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION and envs.VLLM_ROCM_USE_AITER
# NOTE: This block has been deprecated
# or get_env_variable_attn_backend()
# == AttentionBackendEnum.ROCM_AITER_UNIFIED_ATTN
# TODO: monitor https://github.com/vllm-project/vllm/pull/30396
# to see how we can transition to the new way of selecting
# attention backends
):
cache_config.block_size = 64
logger.warning(
"[ROCM_AITER_UNIFIED_ATTN]: Setting kv cache block size to 64."
)
else:
cache_config.block_size = 16
if parallel_config.worker_cls == "auto":
parallel_config.worker_cls = "vllm.v1.worker.gpu_worker.Worker"