mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-16 09:57:09 +08:00
kv_transfer: Rename the shared storage connectors (#30201)
Signed-off-by: Or Ozeri <oro@il.ibm.com>
This commit is contained in:
parent
03b91f7262
commit
4c6fd25880
@ -47,6 +47,6 @@ docker run \
|
||||
pytest -v -s v1/worker --ignore=v1/worker/test_gpu_model_runner.py
|
||||
pytest -v -s v1/structured_output
|
||||
pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_tree_attention.py --ignore=v1/spec_decode/test_speculators_eagle3.py
|
||||
pytest -v -s v1/kv_connector/unit --ignore=v1/kv_connector/unit/test_multi_connector.py --ignore=v1/kv_connector/unit/test_nixl_connector.py --ignore=v1/kv_connector/unit/test_shared_storage_connector.py --ignore=v1/kv_connector/unit/test_lmcache_integration.py
|
||||
pytest -v -s v1/kv_connector/unit --ignore=v1/kv_connector/unit/test_multi_connector.py --ignore=v1/kv_connector/unit/test_nixl_connector.py --ignore=v1/kv_connector/unit/test_example_connector.py --ignore=v1/kv_connector/unit/test_lmcache_integration.py
|
||||
pytest -v -s v1/test_serial_utils.py
|
||||
'
|
||||
|
||||
@ -32,14 +32,14 @@ Design doc: <https://docs.google.com/document/d/1aed8KtC6XkXtdoV87pWT0a8OJlZ-Cpn
|
||||
|
||||
## 2 Usage Example
|
||||
|
||||
The current reference pathway is **SharedStorageConnector**.
|
||||
The current reference pathway is **ExampleConnector**.
|
||||
Below ready-to-run scripts shows the workflow:
|
||||
|
||||
1 Encoder instance + 1 PD instance:
|
||||
`examples/online_serving/disaggregated_encoder/shared_storage_connector/disagg_encoder_example.sh`
|
||||
`examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh`
|
||||
|
||||
1 Encoder instance + 1 Prefill instance + 1 Decode instance:
|
||||
`examples/online_serving/disaggregated_encoder/shared_storage_connector/disagg_epd_example.sh`
|
||||
`examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh`
|
||||
|
||||
---
|
||||
|
||||
|
||||
@ -21,14 +21,14 @@ Please refer to [examples/online_serving/disaggregated_prefill.sh](../../example
|
||||
|
||||
Now supports 5 types of connectors:
|
||||
|
||||
- **SharedStorageConnector**: refer to [examples/offline_inference/disaggregated-prefill-v1/run.sh](../../examples/offline_inference/disaggregated-prefill-v1/run.sh) for the example usage of SharedStorageConnector disaggregated prefilling.
|
||||
- **ExampleConnector**: refer to [examples/offline_inference/disaggregated-prefill-v1/run.sh](../../examples/offline_inference/disaggregated-prefill-v1/run.sh) for the example usage of ExampleConnector disaggregated prefilling.
|
||||
- **LMCacheConnectorV1**: refer to [examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh](../../examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh) for the example usage of LMCacheConnectorV1 disaggregated prefilling which uses NIXL as the underlying KV transmission.
|
||||
- **NixlConnector**: refer to [tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh](../../tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh) for the example usage of NixlConnector disaggregated prefilling which support fully async send/recv. For detailed usage guide, see [NixlConnector Usage Guide](nixl_connector_usage.md).
|
||||
- **P2pNcclConnector**: refer to [examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh](../../examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh) for the example usage of P2pNcclConnector disaggregated prefilling.
|
||||
- **MultiConnector**: take advantage of the kv_connector_extra_config: dict[str, Any] already present in KVTransferConfig to stash all the connectors we want in an ordered list of kwargs.such as:
|
||||
|
||||
```bash
|
||||
--kv-transfer-config '{"kv_connector":"MultiConnector","kv_role":"kv_both","kv_connector_extra_config":{"connectors":[{"kv_connector":"NixlConnector","kv_role":"kv_both"},{"kv_connector":"SharedStorageConnector","kv_role":"kv_both","kv_connector_extra_config":{"shared_storage_path":"local_storage"}}]}}'
|
||||
--kv-transfer-config '{"kv_connector":"MultiConnector","kv_role":"kv_both","kv_connector_extra_config":{"connectors":[{"kv_connector":"NixlConnector","kv_role":"kv_both"},{"kv_connector":"ExampleConnector","kv_role":"kv_both","kv_connector_extra_config":{"shared_storage_path":"local_storage"}}]}}'
|
||||
```
|
||||
|
||||
For NixlConnector, you may also specify one or multiple NIXL_Backend. Such as:
|
||||
|
||||
@ -30,7 +30,7 @@ def main():
|
||||
max_num_batched_tokens=64,
|
||||
max_num_seqs=16,
|
||||
kv_transfer_config=KVTransferConfig(
|
||||
kv_connector="SharedStorageConnector",
|
||||
kv_connector="ExampleConnector",
|
||||
kv_role="kv_both",
|
||||
kv_connector_extra_config={"shared_storage_path": "local_storage"},
|
||||
),
|
||||
|
||||
@ -26,7 +26,7 @@ def main():
|
||||
enforce_eager=True,
|
||||
gpu_memory_utilization=0.8,
|
||||
kv_transfer_config=KVTransferConfig(
|
||||
kv_connector="SharedStorageConnector",
|
||||
kv_connector="ExampleConnector",
|
||||
kv_role="kv_both",
|
||||
kv_connector_extra_config={"shared_storage_path": "local_storage"},
|
||||
),
|
||||
|
||||
@ -10,7 +10,7 @@ It demonstrates vLLM's ability to recover from KV load failures in both synchron
|
||||
- `decode_example.py` – performs the decode stage. Accepts:
|
||||
- `--simulate-failure`: simulates KV load failure using a custom connector.
|
||||
- `--async-load`: enables asynchronous KV loading mode.
|
||||
- `rogue_shared_storage_connector.py` – defines `RogueSharedStorageConnector`, a subclass of `SharedStorageConnector`, that simulates missing or corrupted external KV blocks by failing to load blocks for the first decode request.
|
||||
- `load_recovery_example_connector.py` – defines `LoadRecoveryExampleConnector`, a subclass of `ExampleConnector`, that simulates missing or corrupted external KV blocks by failing to load blocks for the first decode request.
|
||||
- `run.sh` – orchestrates the test: runs the prefill stage, then three decode stages:
|
||||
1. Normal decode (baseline).
|
||||
2. Decode with simulated sync KV load failure.
|
||||
@ -20,7 +20,7 @@ It demonstrates vLLM's ability to recover from KV load failures in both synchron
|
||||
|
||||
## How It Works
|
||||
|
||||
- The test dynamically loads `RogueSharedStorageConnector` via `KVTransferConfig.kv_connector_module_path`, enabling controlled simulation of load failures without modifying the original connector.
|
||||
- The test dynamically loads `LoadRecoveryExampleConnector` via `KVTransferConfig.kv_connector_module_path`, enabling controlled simulation of load failures without modifying the original connector.
|
||||
- The decode stages that simulate failure are expected to trigger recovery logic in vLLM, resulting in the same output as the baseline decode.
|
||||
- If recovery fails, the script prints a unified diff of the output mismatch and exits with error.
|
||||
|
||||
|
||||
@ -35,13 +35,13 @@ def main():
|
||||
|
||||
if args.simulate_failure:
|
||||
ktc = KVTransferConfig(
|
||||
kv_connector="RogueSharedStorageConnector",
|
||||
kv_connector="LoadRecoveryExampleConnector",
|
||||
kv_role="kv_both",
|
||||
kv_connector_extra_config={
|
||||
"shared_storage_path": "local_storage",
|
||||
"async_load": args.async_load,
|
||||
},
|
||||
kv_connector_module_path="rogue_shared_storage_connector",
|
||||
kv_connector_module_path="load_recovery_example_connector",
|
||||
)
|
||||
out_file = (
|
||||
"async_decode_recovered_output.txt"
|
||||
@ -50,7 +50,7 @@ def main():
|
||||
)
|
||||
else:
|
||||
ktc = KVTransferConfig(
|
||||
kv_connector="SharedStorageConnector",
|
||||
kv_connector="ExampleConnector",
|
||||
kv_role="kv_both",
|
||||
kv_connector_extra_config={
|
||||
"shared_storage_path": "local_storage",
|
||||
|
||||
@ -10,9 +10,9 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
|
||||
KVConnectorMetadata,
|
||||
KVConnectorRole,
|
||||
)
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector import (
|
||||
SharedStorageConnector,
|
||||
SharedStorageConnectorMetadata,
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.example_connector import (
|
||||
ExampleConnector,
|
||||
ExampleConnectorMetadata,
|
||||
)
|
||||
from vllm.forward_context import ForwardContext
|
||||
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
|
||||
@ -26,15 +26,15 @@ logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RogueSharedStorageConnectorMetadata(SharedStorageConnectorMetadata):
|
||||
class LoadRecoveryExampleConnectorMetadata(ExampleConnectorMetadata):
|
||||
req_to_block_ids: dict[str, set[int]] = field(default_factory=dict)
|
||||
|
||||
@classmethod
|
||||
def from_base(cls, base: SharedStorageConnectorMetadata):
|
||||
def from_base(cls, base: ExampleConnectorMetadata):
|
||||
return cls(requests=base.requests)
|
||||
|
||||
|
||||
class RogueSharedStorageConnector(SharedStorageConnector):
|
||||
class LoadRecoveryExampleConnector(ExampleConnector):
|
||||
def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
|
||||
super().__init__(vllm_config=vllm_config, role=role)
|
||||
self._async_load = vllm_config.kv_transfer_config.get_from_extra_config(
|
||||
@ -45,7 +45,7 @@ class RogueSharedStorageConnector(SharedStorageConnector):
|
||||
self._req_to_block_ids: dict[str, list[int]] = dict()
|
||||
|
||||
def bind_connector_metadata(self, connector_metadata: KVConnectorMetadata) -> None:
|
||||
assert isinstance(connector_metadata, RogueSharedStorageConnectorMetadata)
|
||||
assert isinstance(connector_metadata, LoadRecoveryExampleConnectorMetadata)
|
||||
index, failed_request = next(
|
||||
(
|
||||
(i, x)
|
||||
@ -84,7 +84,7 @@ class RogueSharedStorageConnector(SharedStorageConnector):
|
||||
) -> tuple[set[str] | None, set[str] | None]:
|
||||
if self._async_load:
|
||||
meta = self._get_connector_metadata()
|
||||
assert isinstance(meta, RogueSharedStorageConnectorMetadata)
|
||||
assert isinstance(meta, LoadRecoveryExampleConnectorMetadata)
|
||||
if meta.req_to_block_ids:
|
||||
return None, set(meta.req_to_block_ids)
|
||||
|
||||
@ -126,9 +126,9 @@ class RogueSharedStorageConnector(SharedStorageConnector):
|
||||
) -> KVConnectorMetadata:
|
||||
if not self._async_load:
|
||||
base = super().build_connector_meta(scheduler_output)
|
||||
meta = RogueSharedStorageConnectorMetadata.from_base(base)
|
||||
meta = LoadRecoveryExampleConnectorMetadata.from_base(base)
|
||||
else:
|
||||
meta = RogueSharedStorageConnectorMetadata()
|
||||
meta = LoadRecoveryExampleConnectorMetadata()
|
||||
if self._requests_need_load:
|
||||
for req_id, request in self._requests_need_load.items():
|
||||
meta.add_request(
|
||||
@ -26,7 +26,7 @@ def main():
|
||||
enforce_eager=True,
|
||||
gpu_memory_utilization=0.8,
|
||||
kv_transfer_config=KVTransferConfig(
|
||||
kv_connector="SharedStorageConnector",
|
||||
kv_connector="ExampleConnector",
|
||||
kv_role="kv_both",
|
||||
kv_connector_extra_config={"shared_storage_path": "local_storage"},
|
||||
),
|
||||
|
||||
@ -50,12 +50,12 @@ The vllm instances and `disagg_encoder_proxy` supports local URIs with ```{"url"
|
||||
|
||||
## EC connector and KV transfer
|
||||
|
||||
The `ECSharedStorageConnector` is used to store the encoder cache on local disk and facilitate transfer. To enable the encoder disaggregation feature, add the following configuration:
|
||||
The `ECExampleonnector` is used to store the encoder cache on local disk and facilitate transfer. To enable the encoder disaggregation feature, add the following configuration:
|
||||
|
||||
```bash
|
||||
# Add to encoder instance:
|
||||
--ec-transfer-config '{
|
||||
"ec_connector": "ECSharedStorageConnector",
|
||||
"ec_connector": "ECExampleConnector",
|
||||
"ec_role": "ec_producer",
|
||||
"ec_connector_extra_config": {
|
||||
"shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
|
||||
@ -64,7 +64,7 @@ The `ECSharedStorageConnector` is used to store the encoder cache on local disk
|
||||
|
||||
# Add to prefill/prefill+decode instance:
|
||||
--ec-transfer-config '{
|
||||
"ec_connector": "ECSharedStorageConnector",
|
||||
"ec_connector": "ECExampleConnector",
|
||||
"ec_role": "ec_consumer",
|
||||
"ec_connector_extra_config": {
|
||||
"shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
|
||||
|
||||
@ -102,7 +102,7 @@ CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
|
||||
--max-num-seqs 128 \
|
||||
--allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
|
||||
--ec-transfer-config '{
|
||||
"ec_connector": "ECSharedStorageConnector",
|
||||
"ec_connector": "ECExampleConnector",
|
||||
"ec_role": "ec_producer",
|
||||
"ec_connector_extra_config": {
|
||||
"shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
|
||||
@ -126,7 +126,7 @@ vllm serve "$MODEL" \
|
||||
--max-num-seqs 128 \
|
||||
--allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
|
||||
--ec-transfer-config '{
|
||||
"ec_connector": "ECSharedStorageConnector",
|
||||
"ec_connector": "ECExampleConnector",
|
||||
"ec_role": "ec_consumer",
|
||||
"ec_connector_extra_config": {
|
||||
"shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
|
||||
|
||||
@ -96,7 +96,7 @@ CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
|
||||
--max-num-seqs 128 \
|
||||
--allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
|
||||
--ec-transfer-config '{
|
||||
"ec_connector": "ECSharedStorageConnector",
|
||||
"ec_connector": "ECExampleConnector",
|
||||
"ec_role": "ec_producer",
|
||||
"ec_connector_extra_config": {
|
||||
"shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
|
||||
@ -117,7 +117,7 @@ CUDA_VISIBLE_DEVICES="$GPU_PD" vllm serve "$MODEL" \
|
||||
--max-num-seqs 128 \
|
||||
--allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
|
||||
--ec-transfer-config '{
|
||||
"ec_connector": "ECSharedStorageConnector",
|
||||
"ec_connector": "ECExampleConnector",
|
||||
"ec_role": "ec_consumer",
|
||||
"ec_connector_extra_config": {
|
||||
"shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
|
||||
|
||||
@ -61,7 +61,7 @@ def test_get_kv_connector_cache_layout_with_multi_connector():
|
||||
kv_role="kv_both",
|
||||
kv_connector_extra_config={
|
||||
"connectors": [
|
||||
{"kv_connector": "SharedStorageConnector", "kv_role": "kv_both"},
|
||||
{"kv_connector": "ExampleConnector", "kv_role": "kv_both"},
|
||||
{"kv_connector": "NixlConnector", "kv_role": "kv_both"},
|
||||
]
|
||||
},
|
||||
|
||||
@ -1536,7 +1536,7 @@ def create_scheduler_with_priority(
|
||||
)
|
||||
kv_transfer_config = (
|
||||
KVTransferConfig(
|
||||
kv_connector="SharedStorageConnector",
|
||||
kv_connector="ExampleConnector",
|
||||
kv_role="kv_both",
|
||||
kv_connector_extra_config={"shared_storage_path": "local_storage"},
|
||||
)
|
||||
@ -1552,7 +1552,7 @@ def create_scheduler_with_priority(
|
||||
|
||||
ec_transfer_config = (
|
||||
ECTransferConfig(
|
||||
ec_connector="ECSharedStorageConnector",
|
||||
ec_connector="ECExampleConnector",
|
||||
ec_role=ec_role,
|
||||
ec_connector_extra_config={"shared_storage_path": "/tmp/ec_test"},
|
||||
)
|
||||
@ -2413,7 +2413,7 @@ def _assert_right_ec_connector_metadata(
|
||||
metadata_dict = {mm_data.mm_hash: mm_data for mm_data in metadata.mm_datas}
|
||||
|
||||
# Check all required identifiers exist in metadata; and no extra
|
||||
# In ECSharedStorageConnector format
|
||||
# In ECExampleConnector format
|
||||
# NOTE: even having same identifier, the mm_features can be different
|
||||
# since their mm_position can be in different offsets, etc
|
||||
identifiers_dict = {f.identifier for f in mm_features_list}
|
||||
|
||||
@ -108,7 +108,7 @@ def create_scheduler(
|
||||
)
|
||||
elif use_kv_connector:
|
||||
kv_transfer_config = KVTransferConfig(
|
||||
kv_connector="SharedStorageConnector",
|
||||
kv_connector="ExampleConnector",
|
||||
kv_role="kv_both",
|
||||
kv_connector_extra_config={"shared_storage_path": "local_storage"},
|
||||
)
|
||||
@ -121,7 +121,7 @@ def create_scheduler(
|
||||
|
||||
ec_transfer_config = (
|
||||
ECTransferConfig(
|
||||
ec_connector="ECSharedStorageConnector",
|
||||
ec_connector="ECExampleConnector",
|
||||
ec_role=ec_role,
|
||||
ec_connector_extra_config={"shared_storage_path": "/tmp/ec_test"},
|
||||
)
|
||||
|
||||
@ -148,7 +148,7 @@ run_epd_1e_1pd() {
|
||||
--max-num-seqs 128 \
|
||||
--allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
|
||||
--ec-transfer-config '{
|
||||
"ec_connector": "ECSharedStorageConnector",
|
||||
"ec_connector": "ECExampleConnector",
|
||||
"ec_role": "ec_producer",
|
||||
"ec_connector_extra_config": {
|
||||
"shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
|
||||
@ -167,7 +167,7 @@ run_epd_1e_1pd() {
|
||||
--max-num-seqs 128 \
|
||||
--allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
|
||||
--ec-transfer-config '{
|
||||
"ec_connector": "ECSharedStorageConnector",
|
||||
"ec_connector": "ECExampleConnector",
|
||||
"ec_role": "ec_consumer",
|
||||
"ec_connector_extra_config": {
|
||||
"shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
|
||||
@ -348,7 +348,7 @@ run_epd_1e_1p_1d() {
|
||||
--max-num-seqs 128 \
|
||||
--allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
|
||||
--ec-transfer-config '{
|
||||
"ec_connector": "ECSharedStorageConnector",
|
||||
"ec_connector": "ECExampleConnector",
|
||||
"ec_role": "ec_producer",
|
||||
"ec_connector_extra_config": {
|
||||
"shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
|
||||
@ -369,7 +369,7 @@ run_epd_1e_1p_1d() {
|
||||
--max-num-seqs 128 \
|
||||
--allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
|
||||
--ec-transfer-config '{
|
||||
"ec_connector": "ECSharedStorageConnector",
|
||||
"ec_connector": "ECExampleConnector",
|
||||
"ec_role": "ec_consumer",
|
||||
"ec_connector_extra_config": {
|
||||
"shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""
|
||||
Unit tests for ECSharedStorageConnector.
|
||||
Unit tests for ECExampleConnector.
|
||||
"""
|
||||
|
||||
import os
|
||||
@ -13,9 +13,9 @@ import torch
|
||||
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.distributed.ec_transfer.ec_connector.base import ECConnectorRole
|
||||
from vllm.distributed.ec_transfer.ec_connector.shared_storage_connector import (
|
||||
ECSharedStorageConnector,
|
||||
ECSharedStorageConnectorMetadata,
|
||||
from vllm.distributed.ec_transfer.ec_connector.example_connector import (
|
||||
ECExampleConnector,
|
||||
ECExampleConnectorMetadata,
|
||||
MMMeta,
|
||||
)
|
||||
from vllm.multimodal.inputs import MultiModalFeatureSpec, PlaceholderRange
|
||||
@ -81,12 +81,12 @@ def mock_request_with_3_mm():
|
||||
|
||||
|
||||
# ------------------ Unit Tests ------------------ #
|
||||
class TestECSharedStorageConnectorBasics:
|
||||
class TestECExampleConnectorBasics:
|
||||
"""Test basic EC connector functionality."""
|
||||
|
||||
def test_initialization_producer(self, mock_vllm_config_producer, temp_storage):
|
||||
"""Test connector initializes correctly as producer."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.SCHEDULER,
|
||||
)
|
||||
@ -98,7 +98,7 @@ class TestECSharedStorageConnectorBasics:
|
||||
|
||||
def test_initialization_consumer(self, mock_vllm_config_consumer, temp_storage):
|
||||
"""Test connector initializes correctly as consumer."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_consumer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
@ -109,11 +109,11 @@ class TestECSharedStorageConnectorBasics:
|
||||
|
||||
def test_role_assignment(self, mock_vllm_config_producer):
|
||||
"""Test role is correctly assigned."""
|
||||
scheduler_connector = ECSharedStorageConnector(
|
||||
scheduler_connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.SCHEDULER,
|
||||
)
|
||||
worker_connector = ECSharedStorageConnector(
|
||||
worker_connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
@ -133,7 +133,7 @@ class TestCacheExistence:
|
||||
):
|
||||
"""Test has_caches returns True when all 3 caches exist."""
|
||||
# Test for producer first
|
||||
producer = ECSharedStorageConnector(
|
||||
producer = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.SCHEDULER,
|
||||
)
|
||||
@ -154,7 +154,7 @@ class TestCacheExistence:
|
||||
assert all(producer_result), f"Expected all True, got {producer_result}"
|
||||
|
||||
# Also test consumer can check if cache exists
|
||||
consumer = ECSharedStorageConnector(
|
||||
consumer = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_consumer,
|
||||
role=ECConnectorRole.SCHEDULER,
|
||||
)
|
||||
@ -170,7 +170,7 @@ class TestCacheExistence:
|
||||
self, mock_vllm_config_producer, mock_request_with_3_mm
|
||||
):
|
||||
"""Test has_caches returns False when no caches exist."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.SCHEDULER,
|
||||
)
|
||||
@ -186,7 +186,7 @@ class TestCacheExistence:
|
||||
self, mock_vllm_config_producer, mock_request_with_3_mm
|
||||
):
|
||||
"""Test has_caches with some caches existing (1 of 3)."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.SCHEDULER,
|
||||
)
|
||||
@ -213,7 +213,7 @@ class TestStateManagement:
|
||||
self, mock_vllm_config_producer, mock_request_with_3_mm
|
||||
):
|
||||
"""Test state update after allocation for 3 MM items."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.SCHEDULER,
|
||||
)
|
||||
@ -238,7 +238,7 @@ class TestStateManagement:
|
||||
self, mock_vllm_config_producer, mock_request_with_3_mm
|
||||
):
|
||||
"""Test metadata building for 3 MM items."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.SCHEDULER,
|
||||
)
|
||||
@ -252,7 +252,7 @@ class TestStateManagement:
|
||||
metadata = connector.build_connector_meta(scheduler_output)
|
||||
|
||||
# Assert
|
||||
assert isinstance(metadata, ECSharedStorageConnectorMetadata)
|
||||
assert isinstance(metadata, ECExampleConnectorMetadata)
|
||||
assert len(metadata.mm_datas) == 3
|
||||
assert metadata.mm_datas[0].mm_hash == "img_hash_1"
|
||||
assert metadata.mm_datas[0].num_token == 100
|
||||
@ -266,7 +266,7 @@ class TestStateManagement:
|
||||
|
||||
def test_build_connector_meta_empty(self, mock_vllm_config_producer):
|
||||
"""Test metadata building with empty state."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.SCHEDULER,
|
||||
)
|
||||
@ -274,14 +274,14 @@ class TestStateManagement:
|
||||
scheduler_output = Mock(spec=SchedulerOutput)
|
||||
metadata = connector.build_connector_meta(scheduler_output)
|
||||
|
||||
assert isinstance(metadata, ECSharedStorageConnectorMetadata)
|
||||
assert isinstance(metadata, ECExampleConnectorMetadata)
|
||||
assert len(metadata.mm_datas) == 0
|
||||
|
||||
def test_state_cleared_after_metadata_build(
|
||||
self, mock_vllm_config_producer, mock_request_with_3_mm
|
||||
):
|
||||
"""Test that state is properly cleared after building metadata."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.SCHEDULER,
|
||||
)
|
||||
@ -310,7 +310,7 @@ class TestCacheSaving:
|
||||
self, mock_vllm_config_producer, mock_request_with_3_mm, temp_storage
|
||||
):
|
||||
"""Test cache saving as producer for 3 different MM items."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
@ -336,7 +336,7 @@ class TestCacheSaving:
|
||||
|
||||
def test_save_caches_consumer_skips(self, mock_vllm_config_consumer):
|
||||
"""Test cache saving is skipped for consumer."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_consumer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
@ -366,7 +366,7 @@ class TestCacheLoading:
|
||||
):
|
||||
"""Test consumer loads 3 caches from storage."""
|
||||
# First, create producer to save caches
|
||||
producer = ECSharedStorageConnector(
|
||||
producer = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
@ -379,13 +379,13 @@ class TestCacheLoading:
|
||||
producer.save_caches(saved_caches, mm_hash)
|
||||
|
||||
# Now consumer loads
|
||||
consumer = ECSharedStorageConnector(
|
||||
consumer = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_consumer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
|
||||
# Setup metadata for all 3
|
||||
metadata = ECSharedStorageConnectorMetadata()
|
||||
metadata = ECExampleConnectorMetadata()
|
||||
for mm_hash in mm_hashes:
|
||||
metadata.add_mm_data(MMMeta.make_meta(mm_hash, 100))
|
||||
consumer.bind_connector_metadata(metadata)
|
||||
@ -410,7 +410,7 @@ class TestCacheLoading:
|
||||
):
|
||||
"""Test cache loading skips already cached items."""
|
||||
# Setup: producer saves cache
|
||||
producer = ECSharedStorageConnector(
|
||||
producer = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
@ -420,12 +420,12 @@ class TestCacheLoading:
|
||||
producer.save_caches({mm_hash: saved_cache}, mm_hash)
|
||||
|
||||
# Consumer setup
|
||||
consumer = ECSharedStorageConnector(
|
||||
consumer = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_consumer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
|
||||
metadata = ECSharedStorageConnectorMetadata()
|
||||
metadata = ECExampleConnectorMetadata()
|
||||
metadata.add_mm_data(MMMeta.make_meta(mm_hash, 100))
|
||||
consumer.bind_connector_metadata(metadata)
|
||||
|
||||
@ -444,13 +444,13 @@ class TestCacheLoading:
|
||||
|
||||
def test_start_load_caches_empty_metadata(self, mock_vllm_config_consumer):
|
||||
"""Test loading with empty metadata does nothing."""
|
||||
consumer = ECSharedStorageConnector(
|
||||
consumer = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_consumer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
|
||||
# Setup empty metadata
|
||||
metadata = ECSharedStorageConnectorMetadata()
|
||||
metadata = ECExampleConnectorMetadata()
|
||||
consumer.bind_connector_metadata(metadata)
|
||||
|
||||
# Load (should not raise)
|
||||
@ -466,7 +466,7 @@ class TestFilenameGeneration:
|
||||
|
||||
def test_generate_foldername(self, mock_vllm_config_producer, temp_storage):
|
||||
"""Test folder name generation."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
@ -479,7 +479,7 @@ class TestFilenameGeneration:
|
||||
|
||||
def test_generate_filename(self, mock_vllm_config_producer, temp_storage):
|
||||
"""Test filename generation."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
@ -493,7 +493,7 @@ class TestFilenameGeneration:
|
||||
|
||||
def test_generate_filename_consistency(self, mock_vllm_config_producer):
|
||||
"""Test filename generation is consistent."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
@ -510,12 +510,12 @@ class TestMetadataBindingLifecycle:
|
||||
|
||||
def test_bind_connector_metadata(self, mock_vllm_config_consumer):
|
||||
"""Test binding connector metadata."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_consumer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
|
||||
metadata = ECSharedStorageConnectorMetadata()
|
||||
metadata = ECExampleConnectorMetadata()
|
||||
metadata.add_mm_data(MMMeta.make_meta("hash_1", 100))
|
||||
|
||||
connector.bind_connector_metadata(metadata)
|
||||
@ -524,12 +524,12 @@ class TestMetadataBindingLifecycle:
|
||||
|
||||
def test_clear_connector_metadata(self, mock_vllm_config_consumer):
|
||||
"""Test clearing connector metadata."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_consumer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
|
||||
metadata = ECSharedStorageConnectorMetadata()
|
||||
metadata = ECExampleConnectorMetadata()
|
||||
connector.bind_connector_metadata(metadata)
|
||||
|
||||
connector.clear_connector_metadata()
|
||||
@ -538,12 +538,12 @@ class TestMetadataBindingLifecycle:
|
||||
|
||||
def test_get_connector_metadata(self, mock_vllm_config_consumer):
|
||||
"""Test getting connector metadata."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_consumer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
|
||||
metadata = ECSharedStorageConnectorMetadata()
|
||||
metadata = ECExampleConnectorMetadata()
|
||||
connector.bind_connector_metadata(metadata)
|
||||
|
||||
retrieved = connector._get_connector_metadata()
|
||||
@ -552,7 +552,7 @@ class TestMetadataBindingLifecycle:
|
||||
|
||||
def test_get_connector_metadata_not_set(self, mock_vllm_config_consumer):
|
||||
"""Test getting metadata when not set raises."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_consumer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
@ -566,7 +566,7 @@ class TestEdgeCases:
|
||||
|
||||
def test_save_empty_cache(self, mock_vllm_config_producer):
|
||||
"""Test saving empty tensor."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
@ -579,12 +579,12 @@ class TestEdgeCases:
|
||||
|
||||
def test_load_nonexistent_cache(self, mock_vllm_config_consumer):
|
||||
"""Test loading cache that doesn't exist raises error."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_consumer,
|
||||
role=ECConnectorRole.WORKER,
|
||||
)
|
||||
|
||||
metadata = ECSharedStorageConnectorMetadata()
|
||||
metadata = ECExampleConnectorMetadata()
|
||||
metadata.add_mm_data(MMMeta.make_meta("nonexistent_hash", 100))
|
||||
connector.bind_connector_metadata(metadata)
|
||||
|
||||
@ -596,7 +596,7 @@ class TestEdgeCases:
|
||||
|
||||
def test_has_caches_empty_request(self, mock_vllm_config_producer):
|
||||
"""Test has_caches with request that has no MM data."""
|
||||
connector = ECSharedStorageConnector(
|
||||
connector = ECExampleConnector(
|
||||
vllm_config=mock_vllm_config_producer,
|
||||
role=ECConnectorRole.SCHEDULER,
|
||||
)
|
||||
@ -507,7 +507,7 @@ def test_encoder_instance_zero_kv_cache(
|
||||
)
|
||||
kv_transfer_config = (
|
||||
KVTransferConfig(
|
||||
kv_connector="SharedStorageConnector",
|
||||
kv_connector="ExampleConnector",
|
||||
kv_role="kv_both",
|
||||
kv_connector_extra_config={"shared_storage_path": "local_storage"},
|
||||
)
|
||||
@ -515,7 +515,7 @@ def test_encoder_instance_zero_kv_cache(
|
||||
else None
|
||||
)
|
||||
ec_transfer_config = ECTransferConfig(
|
||||
ec_connector="ECSharedStorageConnector",
|
||||
ec_connector="ECExampleConnector",
|
||||
ec_role=ec_role,
|
||||
ec_connector_extra_config={"shared_storage_path": "/tmp/ec_test_encoder"},
|
||||
)
|
||||
|
||||
@ -218,12 +218,12 @@ def test_internal_connector_uses_new_signature():
|
||||
Test that internal connectors (registered in factory) always use the new
|
||||
signature and get kv_cache_config.
|
||||
"""
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector import (
|
||||
SharedStorageConnector,
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.example_connector import (
|
||||
ExampleConnector,
|
||||
)
|
||||
|
||||
vllm_config = create_vllm_config()
|
||||
vllm_config.kv_transfer_config.kv_connector = "SharedStorageConnector"
|
||||
vllm_config.kv_transfer_config.kv_connector = "ExampleConnector"
|
||||
|
||||
scheduler = create_scheduler(vllm_config)
|
||||
kv_cache_config = scheduler.kv_cache_config
|
||||
@ -233,7 +233,7 @@ def test_internal_connector_uses_new_signature():
|
||||
)
|
||||
|
||||
assert connector is not None
|
||||
assert isinstance(connector, SharedStorageConnector)
|
||||
assert isinstance(connector, ExampleConnector)
|
||||
assert connector._kv_cache_config is not None
|
||||
assert connector._kv_cache_config == kv_cache_config
|
||||
|
||||
|
||||
@ -119,16 +119,16 @@ def process_prompt(processor, llm: LLM, question: str, image_urls: list[Image]):
|
||||
)
|
||||
def test_shared_storage_connector_hashes(tmp_path):
|
||||
"""
|
||||
Tests that SharedStorageConnector saves KV to the storage locations
|
||||
Tests that ExampleConnector saves KV to the storage locations
|
||||
with proper hashes; that are unique for inputs with identical text but
|
||||
different images (same size), or same multiple images but different orders.
|
||||
"""
|
||||
# Using tmp_path as the storage path to store KV
|
||||
print(f"KV storage path at: {str(tmp_path)}")
|
||||
|
||||
# Configure the SharedStorageConnector
|
||||
# Configure the ExampleConnector
|
||||
kv_transfer_config = KVTransferConfig(
|
||||
kv_connector="SharedStorageConnector",
|
||||
kv_connector="ExampleConnector",
|
||||
kv_role="kv_both",
|
||||
kv_connector_extra_config={"shared_storage_path": str(tmp_path)},
|
||||
)
|
||||
@ -1,8 +1,8 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector import ( # noqa: E501
|
||||
SharedStorageConnectorMetadata,
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.example_connector import ( # noqa: E501
|
||||
ExampleConnectorMetadata,
|
||||
)
|
||||
from vllm.distributed.kv_transfer.kv_transfer_state import (
|
||||
ensure_kv_transfer_initialized,
|
||||
@ -11,7 +11,7 @@ from vllm.distributed.kv_transfer.kv_transfer_state import (
|
||||
from vllm.v1.core.sched.output import CachedRequestData, SchedulerOutput
|
||||
from vllm.v1.worker.kv_connector_model_runner_mixin import KVConnectorModelRunnerMixin
|
||||
|
||||
# Importing utils registers TestSharedStorageConnector with the factory
|
||||
# Importing utils registers TestExampleConnector with the factory
|
||||
from .utils import create_vllm_config
|
||||
|
||||
|
||||
@ -26,13 +26,13 @@ def _make_empty_scheduler_output():
|
||||
num_common_prefix_blocks=[],
|
||||
finished_req_ids=set(),
|
||||
free_encoder_mm_hashes=[],
|
||||
kv_connector_metadata=SharedStorageConnectorMetadata(),
|
||||
kv_connector_metadata=ExampleConnectorMetadata(),
|
||||
)
|
||||
|
||||
|
||||
def test_kv_connector_mixin_clears_metadata():
|
||||
vllm_config = create_vllm_config()
|
||||
vllm_config.kv_transfer_config.kv_connector = "TestSharedStorageConnector"
|
||||
vllm_config.kv_transfer_config.kv_connector = "TestExampleConnector"
|
||||
vllm_config.kv_transfer_config.kv_role = "kv_both"
|
||||
vllm_config.kv_transfer_config.kv_connector_extra_config["name"] = "unit"
|
||||
|
||||
|
||||
@ -77,9 +77,9 @@ def _compare_directories(dir1: Path, dir2: Path) -> bool:
|
||||
"https://github.com/ROCm/pytorch/issues/2822"
|
||||
),
|
||||
)
|
||||
def test_multi_shared_storage_connector_consistency():
|
||||
def test_multi_example_connector_consistency():
|
||||
"""
|
||||
Tests that MultiConnector with two SharedStorageConnectors saves
|
||||
Tests that MultiConnector with two ExampleConnectors saves
|
||||
identical KV cache data to separate storage locations.
|
||||
"""
|
||||
storage_1_path = Path("storage_1/")
|
||||
@ -89,14 +89,14 @@ def test_multi_shared_storage_connector_consistency():
|
||||
storage_1_path.mkdir()
|
||||
storage_2_path.mkdir()
|
||||
|
||||
# Configure MultiConnector with two SharedStorageConnectors
|
||||
# Configure MultiConnector with two ExampleConnectors
|
||||
kv_transfer_config = KVTransferConfig(
|
||||
kv_connector="MultiConnector",
|
||||
kv_role="kv_both",
|
||||
kv_connector_extra_config={
|
||||
"connectors": [
|
||||
{
|
||||
"kv_connector": "TestSharedStorageConnector",
|
||||
"kv_connector": "TestExampleConnector",
|
||||
"kv_role": "kv_both",
|
||||
"kv_connector_extra_config": {
|
||||
"shared_storage_path": str(storage_1_path),
|
||||
@ -105,7 +105,7 @@ def test_multi_shared_storage_connector_consistency():
|
||||
"kv_connector_module_path": "tests.v1.kv_connector.unit.utils",
|
||||
},
|
||||
{
|
||||
"kv_connector": "TestSharedStorageConnector",
|
||||
"kv_connector": "TestExampleConnector",
|
||||
"kv_role": "kv_both",
|
||||
"kv_connector_extra_config": {
|
||||
"shared_storage_path": str(storage_2_path),
|
||||
@ -427,7 +427,7 @@ class TestMultiConnectorStats:
|
||||
|
||||
def test_build_kv_connector_stats_skips_connectors_without_custom_stats(self):
|
||||
"""Test that connectors without custom stats (return None) are skipped."""
|
||||
# SharedStorageConnector doesn't override build_kv_connector_stats,
|
||||
# ExampleConnector doesn't override build_kv_connector_stats,
|
||||
# so it returns None and should be skipped
|
||||
serialized_data = {
|
||||
"NixlConnector": {
|
||||
@ -440,7 +440,7 @@ class TestMultiConnectorStats:
|
||||
"num_failed_notifications": [],
|
||||
}
|
||||
},
|
||||
"SharedStorageConnector": {"data": {"some_field": [1, 2, 3]}},
|
||||
"ExampleConnector": {"data": {"some_field": [1, 2, 3]}},
|
||||
}
|
||||
|
||||
stats = MultiConnector.build_kv_connector_stats(data=serialized_data)
|
||||
@ -451,8 +451,8 @@ class TestMultiConnectorStats:
|
||||
assert len(stats.data) == 1
|
||||
assert "NixlConnector" in stats.data
|
||||
assert isinstance(stats.data["NixlConnector"], NixlKVConnectorStats)
|
||||
# SharedStorageConnector should be skipped (returns None)
|
||||
assert "SharedStorageConnector" not in stats.data
|
||||
# ExampleConnector should be skipped (returns None)
|
||||
assert "ExampleConnector" not in stats.data
|
||||
|
||||
def test_build_kv_connector_stats_handles_malformed_data(self):
|
||||
"""Test that malformed data raises appropriate errors."""
|
||||
@ -527,13 +527,13 @@ class TestMultiConnectorStats:
|
||||
)
|
||||
|
||||
stats2 = MultiKVConnectorStats(
|
||||
data={"SharedStorageConnector": KVConnectorStats(data={"field": [1, 2]})}
|
||||
data={"ExampleConnector": KVConnectorStats(data={"field": [1, 2]})}
|
||||
)
|
||||
|
||||
result = stats1.aggregate(stats2)
|
||||
|
||||
assert "NixlConnector" in result.data
|
||||
assert "SharedStorageConnector" in result.data
|
||||
assert "ExampleConnector" in result.data
|
||||
|
||||
def test_reduce(self):
|
||||
"""Test that reduce() correctly reduces all nested connector stats."""
|
||||
|
||||
@ -24,8 +24,8 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
|
||||
KVConnectorMetadata,
|
||||
KVConnectorRole,
|
||||
)
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector import ( # noqa
|
||||
SharedStorageConnector,
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.example_connector import ( # noqa
|
||||
ExampleConnector,
|
||||
)
|
||||
from vllm.utils.hashing import sha256
|
||||
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
|
||||
@ -264,10 +264,10 @@ def create_model_runner_output(
|
||||
)
|
||||
|
||||
|
||||
class TestSharedStorageConnector(SharedStorageConnector):
|
||||
class TestExampleConnector(ExampleConnector):
|
||||
def __init__(self, config: VllmConfig, role, kv_cache_config):
|
||||
self.name = config.kv_transfer_config.kv_connector_extra_config["name"]
|
||||
self._connector = SharedStorageConnector(config, role)
|
||||
self._connector = ExampleConnector(config, role)
|
||||
self.call_record: dict[str, int] = defaultdict(int)
|
||||
# Use a unique temp file per connector
|
||||
self._event_file = (
|
||||
@ -394,7 +394,7 @@ class MockKVConnector(KVConnectorBase_V1):
|
||||
|
||||
|
||||
KVConnectorFactory.register_connector(
|
||||
"TestSharedStorageConnector", __name__, TestSharedStorageConnector.__name__
|
||||
"TestExampleConnector", __name__, TestExampleConnector.__name__
|
||||
)
|
||||
|
||||
KVConnectorFactory.register_connector(
|
||||
|
||||
@ -32,7 +32,7 @@ class MMMeta:
|
||||
|
||||
|
||||
@dataclass
|
||||
class ECSharedStorageConnectorMetadata(ECConnectorMetadata):
|
||||
class ECExampleConnectorMetadata(ECConnectorMetadata):
|
||||
mm_datas: list[MMMeta]
|
||||
|
||||
def __init__(self):
|
||||
@ -42,7 +42,7 @@ class ECSharedStorageConnectorMetadata(ECConnectorMetadata):
|
||||
self.mm_datas.append(mm_data)
|
||||
|
||||
|
||||
class ECSharedStorageConnector(ECConnectorBase):
|
||||
class ECExampleConnector(ECConnectorBase):
|
||||
# NOTE: This is Simple debug implementation of the EC connector.
|
||||
# It save / load the EC cache to / from the disk.
|
||||
|
||||
@ -76,7 +76,7 @@ class ECSharedStorageConnector(ECConnectorBase):
|
||||
|
||||
# Get the metadata
|
||||
metadata: ECConnectorMetadata = self._get_connector_metadata()
|
||||
assert isinstance(metadata, ECSharedStorageConnectorMetadata)
|
||||
assert isinstance(metadata, ECExampleConnectorMetadata)
|
||||
assert encoder_cache is not None
|
||||
if metadata is None:
|
||||
logger.warning(
|
||||
@ -160,7 +160,7 @@ class ECSharedStorageConnector(ECConnectorBase):
|
||||
Args:
|
||||
scheduler_output (SchedulerOutput): the scheduler output object.
|
||||
"""
|
||||
meta = ECSharedStorageConnectorMetadata()
|
||||
meta = ECExampleConnectorMetadata()
|
||||
for mm_hash, num_encoder_token in self._mm_datas_need_loads.items():
|
||||
meta.add_mm_data(MMMeta.make_meta(mm_hash, num_encoder_token))
|
||||
self._mm_datas_need_loads.clear()
|
||||
@ -79,7 +79,7 @@ class ECConnectorFactory:
|
||||
# only load the files corresponding to the current connector.
|
||||
|
||||
ECConnectorFactory.register_connector(
|
||||
"ECSharedStorageConnector",
|
||||
"vllm.distributed.ec_transfer.ec_connector.shared_storage_connector",
|
||||
"ECSharedStorageConnector",
|
||||
"ECExampleConnector",
|
||||
"vllm.distributed.ec_transfer.ec_connector.example_connector",
|
||||
"ECExampleConnector",
|
||||
)
|
||||
|
||||
@ -144,9 +144,9 @@ class KVConnectorFactory:
|
||||
# only load the files corresponding to the current connector.
|
||||
|
||||
KVConnectorFactory.register_connector(
|
||||
"SharedStorageConnector",
|
||||
"vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector",
|
||||
"SharedStorageConnector",
|
||||
"ExampleConnector",
|
||||
"vllm.distributed.kv_transfer.kv_connector.v1.example_connector",
|
||||
"ExampleConnector",
|
||||
)
|
||||
|
||||
KVConnectorFactory.register_connector(
|
||||
|
||||
@ -65,7 +65,7 @@ class ReqMeta:
|
||||
|
||||
|
||||
@dataclass
|
||||
class SharedStorageConnectorMetadata(KVConnectorMetadata):
|
||||
class ExampleConnectorMetadata(KVConnectorMetadata):
|
||||
requests: list[ReqMeta] = field(default_factory=list)
|
||||
|
||||
def add_request(
|
||||
@ -81,7 +81,7 @@ class SharedStorageConnectorMetadata(KVConnectorMetadata):
|
||||
)
|
||||
|
||||
|
||||
class SharedStorageConnector(KVConnectorBase_V1):
|
||||
class ExampleConnector(KVConnectorBase_V1):
|
||||
# NOTE: This is Simple debug implementation of the KV connector.
|
||||
# It save / load the KV cache to / from the disk.
|
||||
# It does extra work which will overwrite the existing prefix-cache in GPU
|
||||
@ -157,7 +157,7 @@ class SharedStorageConnector(KVConnectorBase_V1):
|
||||
|
||||
# Get the metadata
|
||||
metadata: KVConnectorMetadata = self._get_connector_metadata()
|
||||
assert isinstance(metadata, SharedStorageConnectorMetadata)
|
||||
assert isinstance(metadata, ExampleConnectorMetadata)
|
||||
|
||||
if metadata is None:
|
||||
logger.warning(
|
||||
@ -241,7 +241,7 @@ class SharedStorageConnector(KVConnectorBase_V1):
|
||||
return layer.reshape(2, num_pages * page_size, -1)[:, slot_mapping, ...]
|
||||
|
||||
connector_metadata = self._get_connector_metadata()
|
||||
assert isinstance(connector_metadata, SharedStorageConnectorMetadata)
|
||||
assert isinstance(connector_metadata, ExampleConnectorMetadata)
|
||||
for request in connector_metadata.requests:
|
||||
if request.is_store:
|
||||
filename = self._generate_filename_debug(
|
||||
@ -315,7 +315,7 @@ class SharedStorageConnector(KVConnectorBase_V1):
|
||||
Args:
|
||||
scheduler_output (SchedulerOutput): the scheduler output object.
|
||||
"""
|
||||
meta = SharedStorageConnectorMetadata()
|
||||
meta = ExampleConnectorMetadata()
|
||||
|
||||
total_need_load = 0
|
||||
for new_req in scheduler_output.scheduled_new_reqs:
|
||||
Loading…
x
Reference in New Issue
Block a user