[Misc] Separate out utils.counter and move utils.Device to engine (#27588)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung 2025-10-28 20:20:46 +08:00 committed by GitHub
parent 44b5ce956d
commit f58d9b6404
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 59 additions and 54 deletions

View File

@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import enum
from abc import ABC, abstractmethod
from collections.abc import AsyncGenerator, Iterable, Mapping
from typing import Any
@ -15,13 +16,17 @@ from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams
from vllm.tasks import SupportedTask
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.utils import Device
from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.processor import Processor
logger = init_logger(__name__)
class Device(enum.Enum):
GPU = enum.auto()
CPU = enum.auto()
class EngineClient(ABC):
"""Protocol class for Clients to Engine"""

View File

@ -31,6 +31,7 @@ from vllm.config.model import (
TokenizerMode,
)
from vllm.engine.arg_utils import EngineArgs
from vllm.engine.protocol import Device
from vllm.entrypoints.chat_utils import (
ChatCompletionMessageParam,
ChatTemplateContentFormatOption,
@ -75,8 +76,8 @@ from vllm.transformers_utils.tokenizer import (
get_cached_tokenizer,
)
from vllm.usage.usage_lib import UsageContext
from vllm.utils import Counter, Device
from vllm.utils.collection_utils import as_iter, is_list_of
from vllm.utils.counter import Counter
from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.llm_engine import LLMEngine
from vllm.v1.sample.logits_processor import LogitsProcessor
@ -1490,8 +1491,8 @@ class LLM:
def stop_profile(self) -> None:
self.llm_engine.stop_profile()
def reset_prefix_cache(self, device: Device | None = None) -> bool:
return self.llm_engine.reset_prefix_cache(device)
def reset_prefix_cache(self, device: Device | None = None) -> None:
self.llm_engine.reset_prefix_cache(device)
def sleep(self, level: int = 1):
"""

View File

@ -40,7 +40,7 @@ from typing_extensions import assert_never
import vllm.envs as envs
from vllm.config import VllmConfig
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.protocol import EngineClient
from vllm.engine.protocol import Device, EngineClient
from vllm.entrypoints.launcher import serve_http
from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
@ -108,7 +108,6 @@ from vllm.entrypoints.utils import (
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParserManager
from vllm.usage.usage_lib import UsageContext
from vllm.utils import Device
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.network_utils import is_valid_ipv6_address
from vllm.utils.system_utils import decorate_logs, set_ulimit

View File

@ -19,7 +19,7 @@ from vllm.entrypoints.openai.protocol import (
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
from vllm.utils import AtomicCounter
from vllm.utils.counter import AtomicCounter
logger = init_logger(__name__)

View File

@ -3,7 +3,6 @@
import enum
import inspect
import threading
import uuid
import warnings
from functools import wraps
@ -68,54 +67,11 @@ STR_INVALID_VAL: str = "INVALID"
T = TypeVar("T")
class Device(enum.Enum):
GPU = enum.auto()
CPU = enum.auto()
class LayerBlockType(enum.Enum):
attention = "attention"
mamba = "mamba"
class Counter:
def __init__(self, start: int = 0) -> None:
self.counter = start
def __next__(self) -> int:
i = self.counter
self.counter += 1
return i
def reset(self) -> None:
self.counter = 0
class AtomicCounter:
"""An atomic, thread-safe counter"""
def __init__(self, initial=0):
"""Initialize a new atomic counter to given initial value"""
self._value = initial
self._lock = threading.Lock()
def inc(self, num=1):
"""Atomically increment the counter by num and return the new value"""
with self._lock:
self._value += num
return self._value
def dec(self, num=1):
"""Atomically decrement the counter by num and return the new value"""
with self._lock:
self._value -= num
return self._value
@property
def value(self):
return self._value
def random_uuid() -> str:
return str(uuid.uuid4().hex)

45
vllm/utils/counter.py Normal file
View File

@ -0,0 +1,45 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import threading
class Counter:
def __init__(self, start: int = 0) -> None:
super().__init__()
self.counter = start
def __next__(self) -> int:
i = self.counter
self.counter += 1
return i
def reset(self) -> None:
self.counter = 0
class AtomicCounter:
"""An atomic, thread-safe counter"""
def __init__(self, initial: int = 0) -> None:
"""Initialize a new atomic counter to given initial value"""
super().__init__()
self._value = initial
self._lock = threading.Lock()
@property
def value(self) -> int:
return self._value
def inc(self, num: int = 1) -> int:
"""Atomically increment the counter by num and return the new value"""
with self._lock:
self._value += num
return self._value
def dec(self, num: int = 1) -> int:
"""Atomically decrement the counter by num and return the new value"""
with self._lock:
self._value -= num
return self._value

View File

@ -14,7 +14,7 @@ import torch
import vllm.envs as envs
from vllm.config import VllmConfig
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.protocol import EngineClient
from vllm.engine.protocol import Device, EngineClient
from vllm.entrypoints.utils import _validate_truncation_size
from vllm.inputs import PromptType
from vllm.logger import init_logger
@ -29,7 +29,6 @@ from vllm.tracing import init_tracer
from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs
from vllm.usage.usage_lib import UsageContext
from vllm.utils import Device
from vllm.utils.async_utils import cancel_task_threadsafe
from vllm.utils.collection_utils import as_list
from vllm.utils.func_utils import deprecate_kwargs

View File

@ -14,6 +14,7 @@ from vllm.config import ParallelConfig, VllmConfig
from vllm.distributed import stateless_destroy_torch_distributed_process_group
from vllm.distributed.parallel_state import get_dp_group
from vllm.engine.arg_utils import EngineArgs
from vllm.engine.protocol import Device
from vllm.inputs import PromptType
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
@ -26,7 +27,6 @@ from vllm.tasks import SupportedTask
from vllm.tracing import init_tracer
from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs
from vllm.usage.usage_lib import UsageContext
from vllm.utils import Device
from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.core_client import EngineCoreClient
from vllm.v1.engine.output_processor import OutputProcessor