mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-02 16:44:27 +08:00
[ROCm] Fix broken import in platform attention backend dispatching (#30432)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
parent
b4054c8ab4
commit
b51255f369
@ -403,7 +403,21 @@ class RocmPlatform(Platform):
|
|||||||
compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE
|
compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE
|
||||||
|
|
||||||
if cache_config and cache_config.block_size is None:
|
if cache_config and cache_config.block_size is None:
|
||||||
cache_config.block_size = 16
|
if (
|
||||||
|
envs.VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION and envs.VLLM_ROCM_USE_AITER
|
||||||
|
# NOTE: This block has been deprecated
|
||||||
|
# or get_env_variable_attn_backend()
|
||||||
|
# == AttentionBackendEnum.ROCM_AITER_UNIFIED_ATTN
|
||||||
|
# TODO: monitor https://github.com/vllm-project/vllm/pull/30396
|
||||||
|
# to see how we can transition to the new way of selecting
|
||||||
|
# attention backends
|
||||||
|
):
|
||||||
|
cache_config.block_size = 64
|
||||||
|
logger.warning(
|
||||||
|
"[ROCM_AITER_UNIFIED_ATTN]: Setting kv cache block size to 64."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cache_config.block_size = 16
|
||||||
|
|
||||||
if parallel_config.worker_cls == "auto":
|
if parallel_config.worker_cls == "auto":
|
||||||
parallel_config.worker_cls = "vllm.v1.worker.gpu_worker.Worker"
|
parallel_config.worker_cls = "vllm.v1.worker.gpu_worker.Worker"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user