diff --git a/vllm/config.py b/vllm/config.py index d8318ab229c8..3fbb6015fe40 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -4497,13 +4497,13 @@ class VllmConfig: # warning message here and will log it later. if not (current_platform.is_cuda() or current_platform.is_rocm()): # Hybrid KV cache manager is not supported on non-GPU platforms. - self.disable_hybrid_kv_cache_manager = True + self.scheduler_config.disable_hybrid_kv_cache_manager = True if self.kv_transfer_config is not None: # Hybrid KV cache manager is not compatible with KV transfer. - self.disable_hybrid_kv_cache_manager = True + self.scheduler_config.disable_hybrid_kv_cache_manager = True if self.kv_events_config is not None: # Hybrid KV cache manager is not compatible with KV events. - self.disable_hybrid_kv_cache_manager = True + self.scheduler_config.disable_hybrid_kv_cache_manager = True def update_sizes_for_sequence_parallelism(self, possible_sizes: list) -> list: