# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Tests for KV cache offloading configuration.""" import pytest from vllm.config import CacheConfig, KVTransferConfig, ParallelConfig, VllmConfig pytestmark = pytest.mark.cpu_test @pytest.mark.parametrize( "kv_offloading_backend,kv_offloading_size,tp,pp,expected_backend,expected_bytes", [ ("native", 4.0, 1, 1, "OffloadingConnector", 4.0 * (1 << 30)), # bytes per rank: 8.0 GiB / (2 * 2) = 2.0 GiB ("native", 8.0, 2, 2, "OffloadingConnector", 8.0 * (1 << 30) / 4), ("lmcache", 4.0, 1, 1, "LMCacheConnectorV1", 4.0), # size per rank: 8.0 GiB / (2 * 2) = 2.0 GiB ("lmcache", 8.0, 2, 2, "LMCacheConnectorV1", 2.0), (None, None, 1, 1, None, None), ], ) def test_kv_connector( kv_offloading_backend, kv_offloading_size, tp, pp, expected_backend, expected_bytes ): kv_transfer_config = ( KVTransferConfig(kv_connector_extra_config={"existing_key": "existing_value"}) if expected_backend is not None else None ) vllm_config = VllmConfig( cache_config=CacheConfig( kv_offloading_backend=kv_offloading_backend, kv_offloading_size=kv_offloading_size, ), kv_transfer_config=kv_transfer_config, parallel_config=ParallelConfig( tensor_parallel_size=tp, pipeline_parallel_size=pp ), ) # No KV transfer config expected if expected_backend is None: assert vllm_config.kv_transfer_config is expected_backend return kv_transfer_config = vllm_config.kv_transfer_config kv_connector_extra_config = kv_transfer_config.kv_connector_extra_config assert kv_transfer_config.kv_connector == expected_backend assert kv_transfer_config.kv_role == "kv_both" if kv_offloading_backend == "native": assert kv_connector_extra_config["kv_bytes_per_rank"] == expected_bytes assert kv_connector_extra_config["num_cpu_blocks"] == 0 # Existing config should be preserved assert kv_connector_extra_config["existing_key"] == "existing_value" elif kv_offloading_backend == "lmcache": assert kv_connector_extra_config["lmcache.local_cpu"] is True assert kv_connector_extra_config["lmcache.max_local_cpu_size"] == expected_bytes # Existing config should be replaced assert "existing_key" not in kv_connector_extra_config