From 6bb544be6dd4e15a0353e3384ea9676ac886319b Mon Sep 17 00:00:00 2001 From: Sage Ahrac Date: Mon, 27 Oct 2025 14:55:46 +0200 Subject: [PATCH 1/5] fix BlockStored kvevent with lora name instead of id Signed-off-by: Sage Ahrac --- examples/online_serving/kv_events_subscriber.py | 2 +- tests/v1/engine/test_engine_core_client.py | 2 +- tests/v1/kv_connector/unit/test_offloading_connector.py | 2 +- vllm/distributed/kv_events.py | 2 +- .../kv_transfer/kv_connector/v1/offloading_connector.py | 2 +- vllm/v1/core/block_pool.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/online_serving/kv_events_subscriber.py b/examples/online_serving/kv_events_subscriber.py index 19f6bd5726102..e98ce6c6c9008 100644 --- a/examples/online_serving/kv_events_subscriber.py +++ b/examples/online_serving/kv_events_subscriber.py @@ -28,7 +28,7 @@ class BlockStored(KVCacheEvent): parent_block_hash: ExternalBlockHash | None token_ids: list[int] block_size: int - lora_id: int | None + lora_name: str | None medium: str | None diff --git a/tests/v1/engine/test_engine_core_client.py b/tests/v1/engine/test_engine_core_client.py index 770560a5e549e..09cdc64aba3c1 100644 --- a/tests/v1/engine/test_engine_core_client.py +++ b/tests/v1/engine/test_engine_core_client.py @@ -635,7 +635,7 @@ def test_kv_cache_events( "Block size should be the same as the block size" ) assert event.parent_block_hash is None, "Parent block hash should be None" - assert event.lora_id is None, "Lora id should be None" + assert event.lora_name is None, "Lora name should be None" assert len(event.token_ids) == num_blocks * block_size, ( "Token ids should be the same as the custom tokens" ) diff --git a/tests/v1/kv_connector/unit/test_offloading_connector.py b/tests/v1/kv_connector/unit/test_offloading_connector.py index 69565f584ab89..86d05172e159b 100644 --- a/tests/v1/kv_connector/unit/test_offloading_connector.py +++ b/tests/v1/kv_connector/unit/test_offloading_connector.py @@ -527,7 +527,7 @@ def test_offloading_connector(request_runner): assert event.medium == "A" assert event.token_ids == [] assert event.parent_block_hash is None - assert event.lora_id is None + assert event.lora_name is None event = events[1] assert isinstance(event, BlockRemoved) assert event.block_hashes == to_hashes([4, 5, 6]) diff --git a/vllm/distributed/kv_events.py b/vllm/distributed/kv_events.py index 7b5cb94cf13ea..0795989c11d0e 100644 --- a/vllm/distributed/kv_events.py +++ b/vllm/distributed/kv_events.py @@ -51,7 +51,7 @@ class BlockStored(KVCacheEvent): parent_block_hash: ExternalBlockHash | None token_ids: list[int] block_size: int - lora_id: int | None + lora_name: str | None medium: str | None diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py index 0ad9d4ae1b39f..2243073d61579 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py @@ -402,7 +402,7 @@ class OffloadingConnectorScheduler: block_hashes=event.block_hashes, parent_block_hash=None, token_ids=[], - lora_id=None, + lora_name=None, block_size=event.block_size, medium=event.medium, ) diff --git a/vllm/v1/core/block_pool.py b/vllm/v1/core/block_pool.py index 8b0e8fd3a2410..ece9e8dfb2744 100644 --- a/vllm/v1/core/block_pool.py +++ b/vllm/v1/core/block_pool.py @@ -279,7 +279,7 @@ class BlockPool: num_cached_blocks * block_size : num_full_blocks * block_size ], block_size=block_size, - lora_id=request.lora_request.adapter_id + lora_name=request.lora_request.name if request.lora_request else None, medium=MEDIUM_GPU, From a86ef76c155330060c79f7333688c848bd989427 Mon Sep 17 00:00:00 2001 From: Sage Ahrac Date: Tue, 16 Dec 2025 20:33:55 +0200 Subject: [PATCH 2/5] add lora name as non breaking change Signed-off-by: Sage Ahrac --- examples/online_serving/kv_events_subscriber.py | 3 ++- tests/v1/engine/test_engine_core_client.py | 1 + tests/v1/kv_connector/unit/test_offloading_connector.py | 1 + vllm/distributed/kv_events.py | 3 ++- .../kv_transfer/kv_connector/v1/offloading_connector.py | 3 ++- vllm/v1/core/block_pool.py | 5 ++++- 6 files changed, 12 insertions(+), 4 deletions(-) diff --git a/examples/online_serving/kv_events_subscriber.py b/examples/online_serving/kv_events_subscriber.py index e98ce6c6c9008..d30510634f47d 100644 --- a/examples/online_serving/kv_events_subscriber.py +++ b/examples/online_serving/kv_events_subscriber.py @@ -28,8 +28,9 @@ class BlockStored(KVCacheEvent): parent_block_hash: ExternalBlockHash | None token_ids: list[int] block_size: int - lora_name: str | None + lora_id: int | None medium: str | None + lora_name: str | None class BlockRemoved(KVCacheEvent): diff --git a/tests/v1/engine/test_engine_core_client.py b/tests/v1/engine/test_engine_core_client.py index 09cdc64aba3c1..adf71f60f473c 100644 --- a/tests/v1/engine/test_engine_core_client.py +++ b/tests/v1/engine/test_engine_core_client.py @@ -635,6 +635,7 @@ def test_kv_cache_events( "Block size should be the same as the block size" ) assert event.parent_block_hash is None, "Parent block hash should be None" + assert event.lora_id is None, "Lora id should be None" assert event.lora_name is None, "Lora name should be None" assert len(event.token_ids) == num_blocks * block_size, ( "Token ids should be the same as the custom tokens" diff --git a/tests/v1/kv_connector/unit/test_offloading_connector.py b/tests/v1/kv_connector/unit/test_offloading_connector.py index 86d05172e159b..e1ea0b2980565 100644 --- a/tests/v1/kv_connector/unit/test_offloading_connector.py +++ b/tests/v1/kv_connector/unit/test_offloading_connector.py @@ -527,6 +527,7 @@ def test_offloading_connector(request_runner): assert event.medium == "A" assert event.token_ids == [] assert event.parent_block_hash is None + assert event.lora_id is None assert event.lora_name is None event = events[1] assert isinstance(event, BlockRemoved) diff --git a/vllm/distributed/kv_events.py b/vllm/distributed/kv_events.py index 144f3ea2b028e..8137bdb758fe6 100644 --- a/vllm/distributed/kv_events.py +++ b/vllm/distributed/kv_events.py @@ -51,8 +51,9 @@ class BlockStored(KVCacheEvent): parent_block_hash: ExternalBlockHash | None token_ids: list[int] block_size: int - lora_name: str | None + lora_id: int | None medium: str | None + lora_name: str | None def __hash__(self) -> int: return hash( diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py index 2243073d61579..10aaa972f3280 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py @@ -402,9 +402,10 @@ class OffloadingConnectorScheduler: block_hashes=event.block_hashes, parent_block_hash=None, token_ids=[], - lora_name=None, + lora_id=None, block_size=event.block_size, medium=event.medium, + lora_name=None, ) diff --git a/vllm/v1/core/block_pool.py b/vllm/v1/core/block_pool.py index bcd872c2f29a2..33d0b795b3115 100644 --- a/vllm/v1/core/block_pool.py +++ b/vllm/v1/core/block_pool.py @@ -284,10 +284,13 @@ class BlockPool: num_cached_blocks * block_size : num_full_blocks * block_size ], block_size=block_size, - lora_name=request.lora_request.name + lora_id=request.lora_request.adapter_id if request.lora_request else None, medium=MEDIUM_GPU, + lora_name=request.lora_request.name + if request.lora_request + else None, ) ) From c90817f538e2dd00c89865746c3c9140a6cf70d4 Mon Sep 17 00:00:00 2001 From: Sage Ahrac Date: Wed, 17 Dec 2025 12:27:59 +0200 Subject: [PATCH 3/5] add a deprecated comment to the lora_id field Signed-off-by: Sage Ahrac --- examples/online_serving/kv_events_subscriber.py | 5 +++++ vllm/distributed/kv_events.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/examples/online_serving/kv_events_subscriber.py b/examples/online_serving/kv_events_subscriber.py index d30510634f47d..30c3986f2fa40 100644 --- a/examples/online_serving/kv_events_subscriber.py +++ b/examples/online_serving/kv_events_subscriber.py @@ -28,7 +28,12 @@ class BlockStored(KVCacheEvent): parent_block_hash: ExternalBlockHash | None token_ids: list[int] block_size: int + lora_id: int | None + """Deprecated: use `lora_name` for KV block key hash. + Retained for backward compatibility. + """ + medium: str | None lora_name: str | None diff --git a/vllm/distributed/kv_events.py b/vllm/distributed/kv_events.py index 8137bdb758fe6..123af17ef0912 100644 --- a/vllm/distributed/kv_events.py +++ b/vllm/distributed/kv_events.py @@ -51,7 +51,12 @@ class BlockStored(KVCacheEvent): parent_block_hash: ExternalBlockHash | None token_ids: list[int] block_size: int + lora_id: int | None + """Deprecated: use `lora_name` for KV block key hash. + Retained for backward compatibility. + """ + medium: str | None lora_name: str | None From f3ce19943276620fc8abf0d88318c669f86e89c5 Mon Sep 17 00:00:00 2001 From: Sage Ahrac Date: Tue, 23 Dec 2025 13:14:03 +0200 Subject: [PATCH 4/5] fix-lmcache-tests Signed-off-by: Sage Ahrac --- .../unit/test_lmcache_connector.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/v1/kv_connector/unit/test_lmcache_connector.py b/tests/v1/kv_connector/unit/test_lmcache_connector.py index 6a8cfc71a67a6..59cec2c81e921 100644 --- a/tests/v1/kv_connector/unit/test_lmcache_connector.py +++ b/tests/v1/kv_connector/unit/test_lmcache_connector.py @@ -223,6 +223,7 @@ class TestUpdateConnectorOutput: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) kv_events.add_events([event]) @@ -243,6 +244,7 @@ class TestUpdateConnectorOutput: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) existing_events.add_events([event1]) existing_events.add_events([event1]) # Simulate 2 workers reporting @@ -258,6 +260,7 @@ class TestUpdateConnectorOutput: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) new_events.add_events([event2]) @@ -288,6 +291,7 @@ class TestUpdateConnectorOutput: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) new_events.add_events([event]) @@ -309,6 +313,7 @@ class TestUpdateConnectorOutput: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) events1.add_events([event1]) output1 = KVConnectorOutput(kv_cache_events=events1) @@ -323,6 +328,7 @@ class TestUpdateConnectorOutput: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) events2.add_events([event2]) output2 = KVConnectorOutput(kv_cache_events=events2) @@ -337,6 +343,7 @@ class TestUpdateConnectorOutput: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) events3.add_events([event3]) output3 = KVConnectorOutput(kv_cache_events=events3) @@ -358,6 +365,7 @@ class TestUpdateConnectorOutput: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) events1.add_events([event1]) output1 = KVConnectorOutput(kv_cache_events=events1) @@ -397,6 +405,7 @@ class TestTakeEvents: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) event2 = BlockStored( block_hashes=["hash2"], @@ -405,6 +414,7 @@ class TestTakeEvents: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) kv_events.add_events([event1, event2]) mock_connector._kv_cache_events = kv_events @@ -431,6 +441,7 @@ class TestTakeEvents: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) uncommon_event = BlockStored( block_hashes=["hash_uncommon"], @@ -439,6 +450,7 @@ class TestTakeEvents: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) # All 3 workers report common_event @@ -469,6 +481,7 @@ class TestTakeEvents: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) kv_events1.add_events([event1]) mock_connector._kv_cache_events = kv_events1 @@ -491,6 +504,7 @@ class TestTakeEvents: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) kv_events2.add_events([event2]) mock_connector._kv_cache_events = kv_events2 @@ -510,6 +524,7 @@ class TestTakeEvents: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) event2 = BlockStored( block_hashes=["hash2"], @@ -518,6 +533,7 @@ class TestTakeEvents: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) # Worker 1 reports event1 @@ -667,6 +683,7 @@ class TestIntegrationScenarios: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) worker1_unique_event = BlockStored( @@ -676,6 +693,7 @@ class TestIntegrationScenarios: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) worker2_unique_event = BlockStored( @@ -685,6 +703,7 @@ class TestIntegrationScenarios: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) worker3_unique_event = BlockStored( @@ -694,6 +713,7 @@ class TestIntegrationScenarios: block_size=16, lora_id=None, medium="GPU", + lora_name=None, ) # Create events for each worker From e7d66083ce6f43379160478ed3e492ad0c94b41c Mon Sep 17 00:00:00 2001 From: Sage Ahrac Date: Tue, 23 Dec 2025 13:31:37 +0200 Subject: [PATCH 5/5] fix-lmcache-tests Signed-off-by: Sage Ahrac --- tests/v1/kv_connector/unit/test_lmcache_connector.py | 10 ++++++++++ .../kv_transfer/kv_connector/v1/lmcache_connector.py | 1 + 2 files changed, 11 insertions(+) diff --git a/tests/v1/kv_connector/unit/test_lmcache_connector.py b/tests/v1/kv_connector/unit/test_lmcache_connector.py index 59cec2c81e921..c3df2b68b1ff1 100644 --- a/tests/v1/kv_connector/unit/test_lmcache_connector.py +++ b/tests/v1/kv_connector/unit/test_lmcache_connector.py @@ -25,6 +25,7 @@ def mock_lmcache_engine_event(): lora_id, block_size, medium, + lora_name, ): self.block_hashes = block_hashes self.parent_block_hash = parent_block_hash @@ -32,6 +33,7 @@ def mock_lmcache_engine_event(): self.lora_id = lora_id self.block_size = block_size self.medium = medium + self.lora_name = lora_name return MockEvent( block_hashes=["hash1", "hash2"], @@ -40,6 +42,7 @@ def mock_lmcache_engine_event(): lora_id=None, block_size=16, medium="GPU", + lora_name=None, ) @@ -109,6 +112,7 @@ class TestGetKVConnectorKVCacheEvents: assert events[0].lora_id is None assert events[0].block_size == 16 assert events[0].medium == "GPU" + assert events[0].lora_name is None def test_converts_multiple_events(self, mock_connector): """Test conversion of multiple events from lmcache engine format.""" @@ -121,6 +125,7 @@ class TestGetKVConnectorKVCacheEvents: self.lora_id = None self.block_size = 16 self.medium = "GPU" + self.lora_name = None events = [MockEvent(i) for i in range(5)] mock_connector._lmcache_engine.get_kv_events.return_value = events @@ -150,6 +155,7 @@ class TestGetKVConnectorKVCacheEvents: self.lora_id = 42 self.block_size = 32 self.medium = "DISK" + self.lora_name = "lora_example" mock_connector._lmcache_engine.get_kv_events.return_value = [ MockEventWithLora() @@ -166,6 +172,7 @@ class TestGetKVConnectorKVCacheEvents: assert event.lora_id == 42 assert event.block_size == 32 assert event.medium == "DISK" + assert event.lora_name == "lora_example" def test_handles_none_parent_block_hash(self, mock_connector): """Test handling of events with None parent_block_hash.""" @@ -178,6 +185,7 @@ class TestGetKVConnectorKVCacheEvents: self.lora_id = None self.block_size = 16 self.medium = "GPU" + self.lora_name = None mock_connector._lmcache_engine.get_kv_events.return_value = [ MockEventNoParent() @@ -588,6 +596,7 @@ class TestIntegrationScenarios: self.lora_id = None self.block_size = 16 self.medium = "GPU" + self.lora_name = None # Worker 1 mock_connector._lmcache_engine.get_kv_events.return_value = [ @@ -644,6 +653,7 @@ class TestIntegrationScenarios: self.lora_id = None self.block_size = 16 self.medium = "GPU" + self.lora_name = None for cycle in range(3): # Get events diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py index 17d468fe6c305..6b1bdd4113626 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py @@ -218,6 +218,7 @@ class LMCacheConnectorV1(KVConnectorBase_V1): lora_id=e.lora_id, block_size=e.block_size, medium=e.medium, + lora_name=e.lora_name, ) for e in events ]