mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 05:04:58 +08:00
Move KVEventsConfig from config/__init__.py to config/kv_events.py (#24433)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
9cd76b71ab
commit
03dd652c16
@ -8,7 +8,7 @@ import msgspec.msgpack
|
|||||||
import pytest
|
import pytest
|
||||||
import zmq
|
import zmq
|
||||||
|
|
||||||
from vllm.config import KVEventsConfig
|
from vllm.config.kv_events import KVEventsConfig
|
||||||
from vllm.distributed.kv_events import EventPublisherFactory
|
from vllm.distributed.kv_events import EventPublisherFactory
|
||||||
|
|
||||||
from .test_events import SampleBatch
|
from .test_events import SampleBatch
|
||||||
|
|||||||
@ -33,6 +33,7 @@ from vllm.config.cache import (BlockSize, CacheConfig, CacheDType, MambaDType,
|
|||||||
PrefixCachingHashAlgo)
|
PrefixCachingHashAlgo)
|
||||||
from vllm.config.compilation import (CompilationConfig, CompilationLevel,
|
from vllm.config.compilation import (CompilationConfig, CompilationLevel,
|
||||||
CUDAGraphMode, PassConfig)
|
CUDAGraphMode, PassConfig)
|
||||||
|
from vllm.config.kv_events import KVEventsConfig
|
||||||
from vllm.config.parallel import (DistributedExecutorBackend, EPLBConfig,
|
from vllm.config.parallel import (DistributedExecutorBackend, EPLBConfig,
|
||||||
ParallelConfig)
|
ParallelConfig)
|
||||||
from vllm.config.scheduler import SchedulerConfig, SchedulerPolicy
|
from vllm.config.scheduler import SchedulerConfig, SchedulerPolicy
|
||||||
@ -3310,48 +3311,6 @@ class KVTransferConfig:
|
|||||||
return self.kv_connector_extra_config.get(key, default)
|
return self.kv_connector_extra_config.get(key, default)
|
||||||
|
|
||||||
|
|
||||||
@config
|
|
||||||
@dataclass
|
|
||||||
class KVEventsConfig:
|
|
||||||
"""Configuration for KV event publishing."""
|
|
||||||
|
|
||||||
enable_kv_cache_events: bool = False
|
|
||||||
"""If True, enable KV cache events for tracking block storage and removal.
|
|
||||||
Events can be published externally by zmq using the event publisher config.
|
|
||||||
"""
|
|
||||||
|
|
||||||
publisher: str = "null"
|
|
||||||
"""The publisher to use for publishing kv events. Can be "null", "zmq".
|
|
||||||
"""
|
|
||||||
|
|
||||||
endpoint: str = "tcp://*:5557"
|
|
||||||
"""The zmq endpoint to use for publishing kv events.
|
|
||||||
"""
|
|
||||||
|
|
||||||
replay_endpoint: Optional[str] = None
|
|
||||||
"""The zmq endpoint to use for replaying kv events.
|
|
||||||
"""
|
|
||||||
|
|
||||||
buffer_steps: int = 10_000
|
|
||||||
"""The number of steps to cache for replay endpoint. Will only save
|
|
||||||
events from the last N steps for the replay endpoint.
|
|
||||||
"""
|
|
||||||
|
|
||||||
hwm: int = 100_000
|
|
||||||
"""The zmq high water mark for the event publisher. After queueing N events,
|
|
||||||
events will start dropping if the consumer is not keeping up.
|
|
||||||
"""
|
|
||||||
|
|
||||||
max_queue_size: int = 100_000
|
|
||||||
"""The maximum number of events to queue while waiting for publishing.
|
|
||||||
"""
|
|
||||||
|
|
||||||
topic: str = ""
|
|
||||||
"""The topic to use for the event publisher. Consumers can subscribe to
|
|
||||||
this topic to receive events.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
@config
|
@config
|
||||||
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
|
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
|
||||||
class VllmConfig:
|
class VllmConfig:
|
||||||
|
|||||||
50
vllm/config/kv_events.py
Normal file
50
vllm/config/kv_events.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from pydantic.dataclasses import dataclass
|
||||||
|
|
||||||
|
from vllm.config.utils import config
|
||||||
|
|
||||||
|
|
||||||
|
@config
|
||||||
|
@dataclass
|
||||||
|
class KVEventsConfig:
|
||||||
|
"""Configuration for KV event publishing."""
|
||||||
|
|
||||||
|
enable_kv_cache_events: bool = False
|
||||||
|
"""If True, enable KV cache events for tracking block storage and removal.
|
||||||
|
Events can be published externally by zmq using the event publisher config.
|
||||||
|
"""
|
||||||
|
|
||||||
|
publisher: str = "null"
|
||||||
|
"""The publisher to use for publishing kv events. Can be "null", "zmq".
|
||||||
|
"""
|
||||||
|
|
||||||
|
endpoint: str = "tcp://*:5557"
|
||||||
|
"""The zmq endpoint to use for publishing kv events.
|
||||||
|
"""
|
||||||
|
|
||||||
|
replay_endpoint: Optional[str] = None
|
||||||
|
"""The zmq endpoint to use for replaying kv events.
|
||||||
|
"""
|
||||||
|
|
||||||
|
buffer_steps: int = 10_000
|
||||||
|
"""The number of steps to cache for replay endpoint. Will only save
|
||||||
|
events from the last N steps for the replay endpoint.
|
||||||
|
"""
|
||||||
|
|
||||||
|
hwm: int = 100_000
|
||||||
|
"""The zmq high water mark for the event publisher. After queueing N events,
|
||||||
|
events will start dropping if the consumer is not keeping up.
|
||||||
|
"""
|
||||||
|
|
||||||
|
max_queue_size: int = 100_000
|
||||||
|
"""The maximum number of events to queue while waiting for publishing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
topic: str = ""
|
||||||
|
"""The topic to use for the event publisher. Consumers can subscribe to
|
||||||
|
this topic to receive events.
|
||||||
|
"""
|
||||||
@ -14,7 +14,7 @@ from typing import Any, Callable, Optional, Union
|
|||||||
import msgspec
|
import msgspec
|
||||||
import zmq
|
import zmq
|
||||||
|
|
||||||
from vllm.config import KVEventsConfig
|
from vllm.config.kv_events import KVEventsConfig
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user