[Misc] Separate out utils.counter and move utils.Device to engine (#27588)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung 2025-10-28 20:20:46 +08:00 committed by GitHub
parent 44b5ce956d
commit f58d9b6404
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 59 additions and 54 deletions

View File

@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import enum
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from collections.abc import AsyncGenerator, Iterable, Mapping from collections.abc import AsyncGenerator, Iterable, Mapping
from typing import Any from typing import Any
@ -15,13 +16,17 @@ from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.tasks import SupportedTask from vllm.tasks import SupportedTask
from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.utils import Device
from vllm.v1.engine import EngineCoreRequest from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.processor import Processor from vllm.v1.engine.processor import Processor
logger = init_logger(__name__) logger = init_logger(__name__)
class Device(enum.Enum):
GPU = enum.auto()
CPU = enum.auto()
class EngineClient(ABC): class EngineClient(ABC):
"""Protocol class for Clients to Engine""" """Protocol class for Clients to Engine"""

View File

@ -31,6 +31,7 @@ from vllm.config.model import (
TokenizerMode, TokenizerMode,
) )
from vllm.engine.arg_utils import EngineArgs from vllm.engine.arg_utils import EngineArgs
from vllm.engine.protocol import Device
from vllm.entrypoints.chat_utils import ( from vllm.entrypoints.chat_utils import (
ChatCompletionMessageParam, ChatCompletionMessageParam,
ChatTemplateContentFormatOption, ChatTemplateContentFormatOption,
@ -75,8 +76,8 @@ from vllm.transformers_utils.tokenizer import (
get_cached_tokenizer, get_cached_tokenizer,
) )
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.utils import Counter, Device
from vllm.utils.collection_utils import as_iter, is_list_of from vllm.utils.collection_utils import as_iter, is_list_of
from vllm.utils.counter import Counter
from vllm.v1.engine import EngineCoreRequest from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.llm_engine import LLMEngine from vllm.v1.engine.llm_engine import LLMEngine
from vllm.v1.sample.logits_processor import LogitsProcessor from vllm.v1.sample.logits_processor import LogitsProcessor
@ -1490,8 +1491,8 @@ class LLM:
def stop_profile(self) -> None: def stop_profile(self) -> None:
self.llm_engine.stop_profile() self.llm_engine.stop_profile()
def reset_prefix_cache(self, device: Device | None = None) -> bool: def reset_prefix_cache(self, device: Device | None = None) -> None:
return self.llm_engine.reset_prefix_cache(device) self.llm_engine.reset_prefix_cache(device)
def sleep(self, level: int = 1): def sleep(self, level: int = 1):
""" """

View File

@ -40,7 +40,7 @@ from typing_extensions import assert_never
import vllm.envs as envs import vllm.envs as envs
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.protocol import EngineClient from vllm.engine.protocol import Device, EngineClient
from vllm.entrypoints.launcher import serve_http from vllm.entrypoints.launcher import serve_http
from vllm.entrypoints.logger import RequestLogger from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
@ -108,7 +108,6 @@ from vllm.entrypoints.utils import (
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.reasoning import ReasoningParserManager from vllm.reasoning import ReasoningParserManager
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.utils import Device
from vllm.utils.argparse_utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.network_utils import is_valid_ipv6_address from vllm.utils.network_utils import is_valid_ipv6_address
from vllm.utils.system_utils import decorate_logs, set_ulimit from vllm.utils.system_utils import decorate_logs, set_ulimit

View File

@ -19,7 +19,7 @@ from vllm.entrypoints.openai.protocol import (
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
from vllm.utils import AtomicCounter from vllm.utils.counter import AtomicCounter
logger = init_logger(__name__) logger = init_logger(__name__)

View File

@ -3,7 +3,6 @@
import enum import enum
import inspect import inspect
import threading
import uuid import uuid
import warnings import warnings
from functools import wraps from functools import wraps
@ -68,54 +67,11 @@ STR_INVALID_VAL: str = "INVALID"
T = TypeVar("T") T = TypeVar("T")
class Device(enum.Enum):
GPU = enum.auto()
CPU = enum.auto()
class LayerBlockType(enum.Enum): class LayerBlockType(enum.Enum):
attention = "attention" attention = "attention"
mamba = "mamba" mamba = "mamba"
class Counter:
def __init__(self, start: int = 0) -> None:
self.counter = start
def __next__(self) -> int:
i = self.counter
self.counter += 1
return i
def reset(self) -> None:
self.counter = 0
class AtomicCounter:
"""An atomic, thread-safe counter"""
def __init__(self, initial=0):
"""Initialize a new atomic counter to given initial value"""
self._value = initial
self._lock = threading.Lock()
def inc(self, num=1):
"""Atomically increment the counter by num and return the new value"""
with self._lock:
self._value += num
return self._value
def dec(self, num=1):
"""Atomically decrement the counter by num and return the new value"""
with self._lock:
self._value -= num
return self._value
@property
def value(self):
return self._value
def random_uuid() -> str: def random_uuid() -> str:
return str(uuid.uuid4().hex) return str(uuid.uuid4().hex)

45
vllm/utils/counter.py Normal file
View File

@ -0,0 +1,45 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import threading
class Counter:
def __init__(self, start: int = 0) -> None:
super().__init__()
self.counter = start
def __next__(self) -> int:
i = self.counter
self.counter += 1
return i
def reset(self) -> None:
self.counter = 0
class AtomicCounter:
"""An atomic, thread-safe counter"""
def __init__(self, initial: int = 0) -> None:
"""Initialize a new atomic counter to given initial value"""
super().__init__()
self._value = initial
self._lock = threading.Lock()
@property
def value(self) -> int:
return self._value
def inc(self, num: int = 1) -> int:
"""Atomically increment the counter by num and return the new value"""
with self._lock:
self._value += num
return self._value
def dec(self, num: int = 1) -> int:
"""Atomically decrement the counter by num and return the new value"""
with self._lock:
self._value -= num
return self._value

View File

@ -14,7 +14,7 @@ import torch
import vllm.envs as envs import vllm.envs as envs
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.protocol import EngineClient from vllm.engine.protocol import Device, EngineClient
from vllm.entrypoints.utils import _validate_truncation_size from vllm.entrypoints.utils import _validate_truncation_size
from vllm.inputs import PromptType from vllm.inputs import PromptType
from vllm.logger import init_logger from vllm.logger import init_logger
@ -29,7 +29,6 @@ from vllm.tracing import init_tracer
from vllm.transformers_utils.config import maybe_register_config_serialize_by_value from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.utils import Device
from vllm.utils.async_utils import cancel_task_threadsafe from vllm.utils.async_utils import cancel_task_threadsafe
from vllm.utils.collection_utils import as_list from vllm.utils.collection_utils import as_list
from vllm.utils.func_utils import deprecate_kwargs from vllm.utils.func_utils import deprecate_kwargs

View File

@ -14,6 +14,7 @@ from vllm.config import ParallelConfig, VllmConfig
from vllm.distributed import stateless_destroy_torch_distributed_process_group from vllm.distributed import stateless_destroy_torch_distributed_process_group
from vllm.distributed.parallel_state import get_dp_group from vllm.distributed.parallel_state import get_dp_group
from vllm.engine.arg_utils import EngineArgs from vllm.engine.arg_utils import EngineArgs
from vllm.engine.protocol import Device
from vllm.inputs import PromptType from vllm.inputs import PromptType
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
@ -26,7 +27,6 @@ from vllm.tasks import SupportedTask
from vllm.tracing import init_tracer from vllm.tracing import init_tracer
from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.utils import Device
from vllm.v1.engine import EngineCoreRequest from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.core_client import EngineCoreClient from vllm.v1.engine.core_client import EngineCoreClient
from vllm.v1.engine.output_processor import OutputProcessor from vllm.v1.engine.output_processor import OutputProcessor