[Bugfix] Fix 2 precommit issues - (mamba_block_size, kv_cache_config) (#27811)

Signed-off-by: Tyler Michael Smith <tlrmchlsmth@gmail.com>
Signed-off-by: Tyler Michael Smith <tysmith@redhat.com>
Co-authored-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Tyler Michael Smith 2025-10-30 14:52:18 -04:00 committed by GitHub
parent 2918c1b49c
commit ab98f6556f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 10 additions and 6 deletions

View File

@ -406,7 +406,7 @@ class HybridAttentionMambaModelConfig(VerifyAndUpdateConfig):
# easily by changing the way we layout chunks in the # easily by changing the way we layout chunks in the
# mamba2 kernels. # mamba2 kernels.
base_chunk_size = model_config.get_mamba_chunk_size() base_chunk_size = mamba_block_size or model_config.get_mamba_chunk_size()
attn_tokens_per_mamba_state = cdiv(mamba_page_size, attn_page_size_1_token) attn_tokens_per_mamba_state = cdiv(mamba_page_size, attn_page_size_1_token)
chunk_size = lcm(base_chunk_size, kernel_block_alignment_size) chunk_size = lcm(base_chunk_size, kernel_block_alignment_size)
attn_block_size = chunk_size * cdiv(attn_tokens_per_mamba_state, chunk_size) attn_block_size = chunk_size * cdiv(attn_tokens_per_mamba_state, chunk_size)

View File

@ -13,7 +13,7 @@ from vllm.distributed.kv_transfer.kv_connector.factory import KVConnectorFactory
from vllm.distributed.kv_transfer.kv_connector.v1 import ( from vllm.distributed.kv_transfer.kv_connector.v1 import (
KVConnectorBase_V1, KVConnectorBase_V1,
KVConnectorRole, KVConnectorRole,
supports_hma, SupportsHMA,
) )
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
from vllm.logger import init_logger from vllm.logger import init_logger
@ -93,7 +93,11 @@ class Scheduler(SchedulerInterface):
) )
connector_vllm_config = copy.copy(self.vllm_config) connector_vllm_config = copy.copy(self.vllm_config)
connector_vllm_config.kv_cache_config = copy.copy(kv_cache_config)
# We're dynamically inserting a kv_cache_config variable into the
# connector_vllm_config. This is distinct from the cache_config
# that is already in there.
connector_vllm_config.kv_cache_config = copy.copy(kv_cache_config) # type: ignore[attr-defined]
self.connector = KVConnectorFactory.create_connector( self.connector = KVConnectorFactory.create_connector(
config=connector_vllm_config, role=KVConnectorRole.SCHEDULER config=connector_vllm_config, role=KVConnectorRole.SCHEDULER
) )
@ -1327,15 +1331,15 @@ class Scheduler(SchedulerInterface):
block_ids = self.kv_cache_manager.get_block_ids(request.request_id) block_ids = self.kv_cache_manager.get_block_ids(request.request_id)
if not supports_hma(self.connector): if not isinstance(self.connector, SupportsHMA):
# NOTE(Kuntai): We should deprecate this code path after we enforce # NOTE(Kuntai): We should deprecate this code path after we enforce
# all connectors to support HMA. # all connectors to support HMA.
# Hybrid memory allocator should be already turned off for this # Hybrid memory allocator should be already turned off for this
# code path, but let's double-check here. # code path, but let's double-check here.
assert len(self.kv_cache_config.kv_cache_groups) == 1 assert len(self.kv_cache_config.kv_cache_groups) == 1
return self.connector.request_finished(request, block_ids[0]) return self.connector.request_finished(request, block_ids[0])
else:
return self.connector.request_finished(request, block_ids) return self.connector.request_finished_all_groups(request, block_ids)
def _update_waiting_for_remote_kv(self, request: Request) -> bool: def _update_waiting_for_remote_kv(self, request: Request) -> bool:
""" """