mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 20:35:40 +08:00
[Stability fix] turn off HMA allocator when connector is set (#27592)
Signed-off-by: KuntaiDu <kuntai@uchicago.edu> Signed-off-by: Kuntai Du <kuntai@uchicago.edu>
This commit is contained in:
parent
a8d2e326ec
commit
255e34ca50
@ -597,6 +597,20 @@ class VllmConfig:
|
|||||||
if not current_platform.support_hybrid_kv_cache():
|
if not current_platform.support_hybrid_kv_cache():
|
||||||
# Hybrid KV cache manager is not supported on non-GPU platforms.
|
# Hybrid KV cache manager is not supported on non-GPU platforms.
|
||||||
self.scheduler_config.disable_hybrid_kv_cache_manager = True
|
self.scheduler_config.disable_hybrid_kv_cache_manager = True
|
||||||
|
if self.kv_transfer_config is not None:
|
||||||
|
# NOTE(Kuntai): turn HMA off for connector for now.
|
||||||
|
# TODO(Kuntai): have a more elegent solution to check and
|
||||||
|
# turn off HMA for connector that does not support HMA.
|
||||||
|
logger.warning(
|
||||||
|
"Turning off hybrid kv cache manager because "
|
||||||
|
"`--kv-transfer-config` is set. This will reduce the "
|
||||||
|
"performance of vLLM on LLMs with sliding window attention "
|
||||||
|
"or Mamba attention. If you are a developer of kv connector"
|
||||||
|
", please consider supporting hybrid kv cache manager for "
|
||||||
|
"your connector by making sure your connector is a subclass"
|
||||||
|
" of `SupportsHMA` defined in kv_connector/v1/base.py."
|
||||||
|
)
|
||||||
|
self.scheduler_config.disable_hybrid_kv_cache_manager = True
|
||||||
if self.kv_events_config is not None:
|
if self.kv_events_config is not None:
|
||||||
# Hybrid KV cache manager is not compatible with KV events.
|
# Hybrid KV cache manager is not compatible with KV events.
|
||||||
self.scheduler_config.disable_hybrid_kv_cache_manager = True
|
self.scheduler_config.disable_hybrid_kv_cache_manager = True
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user