mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-18 21:15:38 +08:00
Signed-off-by: ApostaC <yihua98@uchicago.edu> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
66 lines
2.4 KiB
Python
66 lines
2.4 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
|
|
"""Tests for KV cache offloading configuration."""
|
|
|
|
import pytest
|
|
|
|
from vllm.config import CacheConfig, KVTransferConfig, ParallelConfig, VllmConfig
|
|
|
|
pytestmark = pytest.mark.cpu_test
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"kv_offloading_backend,kv_offloading_size,tp,pp,expected_backend,expected_bytes",
|
|
[
|
|
("native", 4.0, 1, 1, "OffloadingConnector", 4.0 * (1 << 30)),
|
|
# bytes per rank: 8.0 GiB / (2 * 2) = 2.0 GiB
|
|
("native", 8.0, 2, 2, "OffloadingConnector", 8.0 * (1 << 30) / 4),
|
|
("lmcache", 4.0, 1, 1, "LMCacheConnectorV1", 4.0),
|
|
# size per rank: 8.0 GiB / (2 * 2) = 2.0 GiB
|
|
("lmcache", 8.0, 2, 2, "LMCacheConnectorV1", 2.0),
|
|
(None, None, 1, 1, None, None),
|
|
],
|
|
)
|
|
def test_kv_connector(
|
|
kv_offloading_backend, kv_offloading_size, tp, pp, expected_backend, expected_bytes
|
|
):
|
|
kv_transfer_config = (
|
|
KVTransferConfig(kv_connector_extra_config={"existing_key": "existing_value"})
|
|
if expected_backend is not None
|
|
else None
|
|
)
|
|
|
|
vllm_config = VllmConfig(
|
|
cache_config=CacheConfig(
|
|
kv_offloading_backend=kv_offloading_backend,
|
|
kv_offloading_size=kv_offloading_size,
|
|
),
|
|
kv_transfer_config=kv_transfer_config,
|
|
parallel_config=ParallelConfig(
|
|
tensor_parallel_size=tp, pipeline_parallel_size=pp
|
|
),
|
|
)
|
|
|
|
# No KV transfer config expected
|
|
if expected_backend is None:
|
|
assert vllm_config.kv_transfer_config is expected_backend
|
|
return
|
|
|
|
kv_transfer_config = vllm_config.kv_transfer_config
|
|
kv_connector_extra_config = kv_transfer_config.kv_connector_extra_config
|
|
|
|
assert kv_transfer_config.kv_connector == expected_backend
|
|
assert kv_transfer_config.kv_role == "kv_both"
|
|
|
|
if kv_offloading_backend == "native":
|
|
assert kv_connector_extra_config["kv_bytes_per_rank"] == expected_bytes
|
|
assert kv_connector_extra_config["num_cpu_blocks"] == 0
|
|
# Existing config should be preserved
|
|
assert kv_connector_extra_config["existing_key"] == "existing_value"
|
|
elif kv_offloading_backend == "lmcache":
|
|
assert kv_connector_extra_config["lmcache.local_cpu"] is True
|
|
assert kv_connector_extra_config["lmcache.max_local_cpu_size"] == expected_bytes
|
|
# Existing config should be replaced
|
|
assert "existing_key" not in kv_connector_extra_config
|