From 255e34ca50db3fc1f0ef8b66193b7c2fe47ca672 Mon Sep 17 00:00:00 2001 From: Kuntai Du Date: Mon, 27 Oct 2025 18:32:23 -0700 Subject: [PATCH] [Stability fix] turn off HMA allocator when connector is set (#27592) Signed-off-by: KuntaiDu Signed-off-by: Kuntai Du --- vllm/config/vllm.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 597cf5793963..a7f7f3b45abe 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -597,6 +597,20 @@ class VllmConfig: if not current_platform.support_hybrid_kv_cache(): # Hybrid KV cache manager is not supported on non-GPU platforms. self.scheduler_config.disable_hybrid_kv_cache_manager = True + if self.kv_transfer_config is not None: + # NOTE(Kuntai): turn HMA off for connector for now. + # TODO(Kuntai): have a more elegent solution to check and + # turn off HMA for connector that does not support HMA. + logger.warning( + "Turning off hybrid kv cache manager because " + "`--kv-transfer-config` is set. This will reduce the " + "performance of vLLM on LLMs with sliding window attention " + "or Mamba attention. If you are a developer of kv connector" + ", please consider supporting hybrid kv cache manager for " + "your connector by making sure your connector is a subclass" + " of `SupportsHMA` defined in kv_connector/v1/base.py." + ) + self.scheduler_config.disable_hybrid_kv_cache_manager = True if self.kv_events_config is not None: # Hybrid KV cache manager is not compatible with KV events. self.scheduler_config.disable_hybrid_kv_cache_manager = True