From 1742f0cdfba1107eb8ccbcc9b08ae447072d7b70 Mon Sep 17 00:00:00 2001 From: inkcherry Date: Mon, 1 Dec 2025 07:35:10 +0000 Subject: [PATCH] clean up code Signed-off-by: inkcherry --- .../kv_connector/v1/moriio_connector.py | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/moriio_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/moriio_connector.py index 1299faff78336..823ed56b23806 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/moriio_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/moriio_connector.py @@ -1351,8 +1351,6 @@ class MoRIIOConnectorWorker: logger.info("Initializing MoRIIO worker %s", engine_id) - logging.getLogger("aiter").disabled = True - # Config. self.vllm_config = vllm_config assert vllm_config.kv_transfer_config is not None, ( @@ -1507,12 +1505,9 @@ class MoRIIOConnectorWorker: self.block_size, use_mla=self.use_mla, ) + + #TODO: consider the integration of flashinfer or other backends. self.backend_name = backend.get_name() - attn_backend = AttentionBackendEnum[self.backend_name] - self._use_flashinfer = attn_backend == AttentionBackendEnum.FLASHINFER - self._use_pallas = attn_backend == AttentionBackendEnum.PALLAS - # attn_backend = backend_name_to_enum(self.backend_name) - # self._use_flashinfer = attn_backend == _Backend.FLASHINFER logger.debug("Detected attention backend %s", self.backend_name) def schedule_write_blocks( @@ -1854,13 +1849,8 @@ class MoRIIOConnectorWorker: self.slot_size_bytes = kv_elem_size * kv_latent_dim else: # [2 (k and v), num_blocks, ...] - if self._use_flashinfer: - # FlashInfer swaps 2<->num_blocks dimensions. - self.num_blocks = first_kv_cache.shape[0] - block_rank = 4 # [2, block_size, kv_heads, head_dim] - else: - self.num_blocks = first_kv_cache.shape[1] - block_rank = 3 # [block_size, kv_heads, head_dim] + self.num_blocks = first_kv_cache.shape[1] + block_rank = 3 # [block_size, kv_heads, head_dim] block_shape = first_kv_cache.shape[-block_rank:] block_size, n_kv_heads, head_dim = block_shape[-3:] # head size in bytes. @@ -1884,7 +1874,7 @@ class MoRIIOConnectorWorker: for cache_or_caches in kv_caches.values(): cache_list = ( [cache_or_caches] - if use_mla or self._use_flashinfer + if use_mla else cache_or_caches ) for cache in cache_list: