From 255e34ca50db3fc1f0ef8b66193b7c2fe47ca672 Mon Sep 17 00:00:00 2001
From: Kuntai Du <kuntai@uchicago.edu>
Date: Mon, 27 Oct 2025 18:32:23 -0700
Subject: [PATCH] [Stability fix] turn off HMA allocator when connector is set
 (#27592)

Signed-off-by: KuntaiDu <kuntai@uchicago.edu>
Signed-off-by: Kuntai Du <kuntai@uchicago.edu>
---
 vllm/config/vllm.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py
index 597cf5793963..a7f7f3b45abe 100644
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -597,6 +597,20 @@ class VllmConfig:
             if not current_platform.support_hybrid_kv_cache():
                 # Hybrid KV cache manager is not supported on non-GPU platforms.
                 self.scheduler_config.disable_hybrid_kv_cache_manager = True
+            if self.kv_transfer_config is not None:
+                # NOTE(Kuntai): turn HMA off for connector for now.
+                # TODO(Kuntai): have a more elegent solution to check and
+                # turn off HMA for connector that does not support HMA.
+                logger.warning(
+                    "Turning off hybrid kv cache manager because "
+                    "`--kv-transfer-config` is set. This will reduce the "
+                    "performance of vLLM on LLMs with sliding window attention "
+                    "or Mamba attention. If you are a developer of kv connector"
+                    ", please consider supporting hybrid kv cache manager for "
+                    "your connector by making sure your connector is a subclass"
+                    " of `SupportsHMA` defined in kv_connector/v1/base.py."
+                )
+                self.scheduler_config.disable_hybrid_kv_cache_manager = True
             if self.kv_events_config is not None:
                 # Hybrid KV cache manager is not compatible with KV events.
                 self.scheduler_config.disable_hybrid_kv_cache_manager = True