# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import pytest from vllm.v1.core.kv_cache_utils import check_enough_kv_cache_memory from vllm.v1.kv_cache_interface import FullAttentionSpec def test_kv_cache_oom_no_memory(): from unittest.mock import MagicMock config = MagicMock() config.model_config.max_model_len = 2048 spec = { "layer_0": FullAttentionSpec( block_size=16, num_kv_heads=8, head_size=128, dtype="float16", ) } with pytest.raises(ValueError): check_enough_kv_cache_memory(config, spec, 0) def test_kv_cache_oom_insufficient_memory(monkeypatch): from unittest.mock import MagicMock config = MagicMock() config.model_config.max_model_len = 2048 config.cache_config.block_size = 16 config.parallel_config.tensor_parallel_size = 1 config.parallel_config.pipeline_parallel_size = 1 config.parallel_config.decode_context_parallel_size = 1 monkeypatch.setattr( "vllm.v1.core.kv_cache_utils.max_memory_usage_bytes", lambda c, s: 100 * 1024**3, # 100 GiB ) spec = { "layer_0": FullAttentionSpec( block_size=16, num_kv_heads=8, head_size=128, dtype="float16", ) } with pytest.raises(ValueError): check_enough_kv_cache_memory(config, spec, 1024**3) # 1 GiB