diff --git a/tests/distributed/conftest.py b/tests/distributed/conftest.py index 666a715cc0da..7dc4a0cc3d58 100644 --- a/tests/distributed/conftest.py +++ b/tests/distributed/conftest.py @@ -8,7 +8,7 @@ import msgspec.msgpack import pytest import zmq -from vllm.config import KVEventsConfig +from vllm.config.kv_events import KVEventsConfig from vllm.distributed.kv_events import EventPublisherFactory from .test_events import SampleBatch diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index f6f1838aedfc..e3ce1987fe9e 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -33,6 +33,7 @@ from vllm.config.cache import (BlockSize, CacheConfig, CacheDType, MambaDType, PrefixCachingHashAlgo) from vllm.config.compilation import (CompilationConfig, CompilationLevel, CUDAGraphMode, PassConfig) +from vllm.config.kv_events import KVEventsConfig from vllm.config.parallel import (DistributedExecutorBackend, EPLBConfig, ParallelConfig) from vllm.config.scheduler import SchedulerConfig, SchedulerPolicy @@ -3310,48 +3311,6 @@ class KVTransferConfig: return self.kv_connector_extra_config.get(key, default) -@config -@dataclass -class KVEventsConfig: - """Configuration for KV event publishing.""" - - enable_kv_cache_events: bool = False - """If True, enable KV cache events for tracking block storage and removal. - Events can be published externally by zmq using the event publisher config. - """ - - publisher: str = "null" - """The publisher to use for publishing kv events. Can be "null", "zmq". - """ - - endpoint: str = "tcp://*:5557" - """The zmq endpoint to use for publishing kv events. - """ - - replay_endpoint: Optional[str] = None - """The zmq endpoint to use for replaying kv events. - """ - - buffer_steps: int = 10_000 - """The number of steps to cache for replay endpoint. Will only save - events from the last N steps for the replay endpoint. - """ - - hwm: int = 100_000 - """The zmq high water mark for the event publisher. After queueing N events, - events will start dropping if the consumer is not keeping up. - """ - - max_queue_size: int = 100_000 - """The maximum number of events to queue while waiting for publishing. - """ - - topic: str = "" - """The topic to use for the event publisher. Consumers can subscribe to - this topic to receive events. - """ - - @config @dataclass(config=ConfigDict(arbitrary_types_allowed=True)) class VllmConfig: diff --git a/vllm/config/kv_events.py b/vllm/config/kv_events.py new file mode 100644 index 000000000000..1c6bdffa1281 --- /dev/null +++ b/vllm/config/kv_events.py @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from typing import Optional + +from pydantic.dataclasses import dataclass + +from vllm.config.utils import config + + +@config +@dataclass +class KVEventsConfig: + """Configuration for KV event publishing.""" + + enable_kv_cache_events: bool = False + """If True, enable KV cache events for tracking block storage and removal. + Events can be published externally by zmq using the event publisher config. + """ + + publisher: str = "null" + """The publisher to use for publishing kv events. Can be "null", "zmq". + """ + + endpoint: str = "tcp://*:5557" + """The zmq endpoint to use for publishing kv events. + """ + + replay_endpoint: Optional[str] = None + """The zmq endpoint to use for replaying kv events. + """ + + buffer_steps: int = 10_000 + """The number of steps to cache for replay endpoint. Will only save + events from the last N steps for the replay endpoint. + """ + + hwm: int = 100_000 + """The zmq high water mark for the event publisher. After queueing N events, + events will start dropping if the consumer is not keeping up. + """ + + max_queue_size: int = 100_000 + """The maximum number of events to queue while waiting for publishing. + """ + + topic: str = "" + """The topic to use for the event publisher. Consumers can subscribe to + this topic to receive events. + """ diff --git a/vllm/distributed/kv_events.py b/vllm/distributed/kv_events.py index 37f8f72fa905..09f42b550fe2 100644 --- a/vllm/distributed/kv_events.py +++ b/vllm/distributed/kv_events.py @@ -14,7 +14,7 @@ from typing import Any, Callable, Optional, Union import msgspec import zmq -from vllm.config import KVEventsConfig +from vllm.config.kv_events import KVEventsConfig from vllm.logger import init_logger logger = init_logger(__name__)