Move KVEventsConfig from config/__init__.py to config/kv_events.py (#24433)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor 2025-09-08 14:41:27 +01:00 committed by GitHub
parent 9cd76b71ab
commit 03dd652c16
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 53 additions and 44 deletions

View File

@ -8,7 +8,7 @@ import msgspec.msgpack
import pytest import pytest
import zmq import zmq
from vllm.config import KVEventsConfig from vllm.config.kv_events import KVEventsConfig
from vllm.distributed.kv_events import EventPublisherFactory from vllm.distributed.kv_events import EventPublisherFactory
from .test_events import SampleBatch from .test_events import SampleBatch

View File

@ -33,6 +33,7 @@ from vllm.config.cache import (BlockSize, CacheConfig, CacheDType, MambaDType,
PrefixCachingHashAlgo) PrefixCachingHashAlgo)
from vllm.config.compilation import (CompilationConfig, CompilationLevel, from vllm.config.compilation import (CompilationConfig, CompilationLevel,
CUDAGraphMode, PassConfig) CUDAGraphMode, PassConfig)
from vllm.config.kv_events import KVEventsConfig
from vllm.config.parallel import (DistributedExecutorBackend, EPLBConfig, from vllm.config.parallel import (DistributedExecutorBackend, EPLBConfig,
ParallelConfig) ParallelConfig)
from vllm.config.scheduler import SchedulerConfig, SchedulerPolicy from vllm.config.scheduler import SchedulerConfig, SchedulerPolicy
@ -3310,48 +3311,6 @@ class KVTransferConfig:
return self.kv_connector_extra_config.get(key, default) return self.kv_connector_extra_config.get(key, default)
@config
@dataclass
class KVEventsConfig:
"""Configuration for KV event publishing."""
enable_kv_cache_events: bool = False
"""If True, enable KV cache events for tracking block storage and removal.
Events can be published externally by zmq using the event publisher config.
"""
publisher: str = "null"
"""The publisher to use for publishing kv events. Can be "null", "zmq".
"""
endpoint: str = "tcp://*:5557"
"""The zmq endpoint to use for publishing kv events.
"""
replay_endpoint: Optional[str] = None
"""The zmq endpoint to use for replaying kv events.
"""
buffer_steps: int = 10_000
"""The number of steps to cache for replay endpoint. Will only save
events from the last N steps for the replay endpoint.
"""
hwm: int = 100_000
"""The zmq high water mark for the event publisher. After queueing N events,
events will start dropping if the consumer is not keeping up.
"""
max_queue_size: int = 100_000
"""The maximum number of events to queue while waiting for publishing.
"""
topic: str = ""
"""The topic to use for the event publisher. Consumers can subscribe to
this topic to receive events.
"""
@config @config
@dataclass(config=ConfigDict(arbitrary_types_allowed=True)) @dataclass(config=ConfigDict(arbitrary_types_allowed=True))
class VllmConfig: class VllmConfig:

50
vllm/config/kv_events.py Normal file
View File

@ -0,0 +1,50 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Optional
from pydantic.dataclasses import dataclass
from vllm.config.utils import config
@config
@dataclass
class KVEventsConfig:
"""Configuration for KV event publishing."""
enable_kv_cache_events: bool = False
"""If True, enable KV cache events for tracking block storage and removal.
Events can be published externally by zmq using the event publisher config.
"""
publisher: str = "null"
"""The publisher to use for publishing kv events. Can be "null", "zmq".
"""
endpoint: str = "tcp://*:5557"
"""The zmq endpoint to use for publishing kv events.
"""
replay_endpoint: Optional[str] = None
"""The zmq endpoint to use for replaying kv events.
"""
buffer_steps: int = 10_000
"""The number of steps to cache for replay endpoint. Will only save
events from the last N steps for the replay endpoint.
"""
hwm: int = 100_000
"""The zmq high water mark for the event publisher. After queueing N events,
events will start dropping if the consumer is not keeping up.
"""
max_queue_size: int = 100_000
"""The maximum number of events to queue while waiting for publishing.
"""
topic: str = ""
"""The topic to use for the event publisher. Consumers can subscribe to
this topic to receive events.
"""

View File

@ -14,7 +14,7 @@ from typing import Any, Callable, Optional, Union
import msgspec import msgspec
import zmq import zmq
from vllm.config import KVEventsConfig from vllm.config.kv_events import KVEventsConfig
from vllm.logger import init_logger from vllm.logger import init_logger
logger = init_logger(__name__) logger = init_logger(__name__)