mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 21:15:28 +08:00
[Misc] Separate out utils.counter and move utils.Device to engine (#27588)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
44b5ce956d
commit
f58d9b6404
@ -1,6 +1,7 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
|
import enum
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from collections.abc import AsyncGenerator, Iterable, Mapping
|
from collections.abc import AsyncGenerator, Iterable, Mapping
|
||||||
from typing import Any
|
from typing import Any
|
||||||
@ -15,13 +16,17 @@ from vllm.pooling_params import PoolingParams
|
|||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.tasks import SupportedTask
|
from vllm.tasks import SupportedTask
|
||||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||||
from vllm.utils import Device
|
|
||||||
from vllm.v1.engine import EngineCoreRequest
|
from vllm.v1.engine import EngineCoreRequest
|
||||||
from vllm.v1.engine.processor import Processor
|
from vllm.v1.engine.processor import Processor
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Device(enum.Enum):
|
||||||
|
GPU = enum.auto()
|
||||||
|
CPU = enum.auto()
|
||||||
|
|
||||||
|
|
||||||
class EngineClient(ABC):
|
class EngineClient(ABC):
|
||||||
"""Protocol class for Clients to Engine"""
|
"""Protocol class for Clients to Engine"""
|
||||||
|
|
||||||
|
|||||||
@ -31,6 +31,7 @@ from vllm.config.model import (
|
|||||||
TokenizerMode,
|
TokenizerMode,
|
||||||
)
|
)
|
||||||
from vllm.engine.arg_utils import EngineArgs
|
from vllm.engine.arg_utils import EngineArgs
|
||||||
|
from vllm.engine.protocol import Device
|
||||||
from vllm.entrypoints.chat_utils import (
|
from vllm.entrypoints.chat_utils import (
|
||||||
ChatCompletionMessageParam,
|
ChatCompletionMessageParam,
|
||||||
ChatTemplateContentFormatOption,
|
ChatTemplateContentFormatOption,
|
||||||
@ -75,8 +76,8 @@ from vllm.transformers_utils.tokenizer import (
|
|||||||
get_cached_tokenizer,
|
get_cached_tokenizer,
|
||||||
)
|
)
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
from vllm.utils import Counter, Device
|
|
||||||
from vllm.utils.collection_utils import as_iter, is_list_of
|
from vllm.utils.collection_utils import as_iter, is_list_of
|
||||||
|
from vllm.utils.counter import Counter
|
||||||
from vllm.v1.engine import EngineCoreRequest
|
from vllm.v1.engine import EngineCoreRequest
|
||||||
from vllm.v1.engine.llm_engine import LLMEngine
|
from vllm.v1.engine.llm_engine import LLMEngine
|
||||||
from vllm.v1.sample.logits_processor import LogitsProcessor
|
from vllm.v1.sample.logits_processor import LogitsProcessor
|
||||||
@ -1490,8 +1491,8 @@ class LLM:
|
|||||||
def stop_profile(self) -> None:
|
def stop_profile(self) -> None:
|
||||||
self.llm_engine.stop_profile()
|
self.llm_engine.stop_profile()
|
||||||
|
|
||||||
def reset_prefix_cache(self, device: Device | None = None) -> bool:
|
def reset_prefix_cache(self, device: Device | None = None) -> None:
|
||||||
return self.llm_engine.reset_prefix_cache(device)
|
self.llm_engine.reset_prefix_cache(device)
|
||||||
|
|
||||||
def sleep(self, level: int = 1):
|
def sleep(self, level: int = 1):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -40,7 +40,7 @@ from typing_extensions import assert_never
|
|||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||||
from vllm.engine.protocol import EngineClient
|
from vllm.engine.protocol import Device, EngineClient
|
||||||
from vllm.entrypoints.launcher import serve_http
|
from vllm.entrypoints.launcher import serve_http
|
||||||
from vllm.entrypoints.logger import RequestLogger
|
from vllm.entrypoints.logger import RequestLogger
|
||||||
from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
|
from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
|
||||||
@ -108,7 +108,6 @@ from vllm.entrypoints.utils import (
|
|||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.reasoning import ReasoningParserManager
|
from vllm.reasoning import ReasoningParserManager
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
from vllm.utils import Device
|
|
||||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||||
from vllm.utils.network_utils import is_valid_ipv6_address
|
from vllm.utils.network_utils import is_valid_ipv6_address
|
||||||
from vllm.utils.system_utils import decorate_logs, set_ulimit
|
from vllm.utils.system_utils import decorate_logs, set_ulimit
|
||||||
|
|||||||
@ -19,7 +19,7 @@ from vllm.entrypoints.openai.protocol import (
|
|||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
|
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
|
||||||
from vllm.utils import AtomicCounter
|
from vllm.utils.counter import AtomicCounter
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
|
|||||||
@ -3,7 +3,6 @@
|
|||||||
|
|
||||||
import enum
|
import enum
|
||||||
import inspect
|
import inspect
|
||||||
import threading
|
|
||||||
import uuid
|
import uuid
|
||||||
import warnings
|
import warnings
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
@ -68,54 +67,11 @@ STR_INVALID_VAL: str = "INVALID"
|
|||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
class Device(enum.Enum):
|
|
||||||
GPU = enum.auto()
|
|
||||||
CPU = enum.auto()
|
|
||||||
|
|
||||||
|
|
||||||
class LayerBlockType(enum.Enum):
|
class LayerBlockType(enum.Enum):
|
||||||
attention = "attention"
|
attention = "attention"
|
||||||
mamba = "mamba"
|
mamba = "mamba"
|
||||||
|
|
||||||
|
|
||||||
class Counter:
|
|
||||||
def __init__(self, start: int = 0) -> None:
|
|
||||||
self.counter = start
|
|
||||||
|
|
||||||
def __next__(self) -> int:
|
|
||||||
i = self.counter
|
|
||||||
self.counter += 1
|
|
||||||
return i
|
|
||||||
|
|
||||||
def reset(self) -> None:
|
|
||||||
self.counter = 0
|
|
||||||
|
|
||||||
|
|
||||||
class AtomicCounter:
|
|
||||||
"""An atomic, thread-safe counter"""
|
|
||||||
|
|
||||||
def __init__(self, initial=0):
|
|
||||||
"""Initialize a new atomic counter to given initial value"""
|
|
||||||
self._value = initial
|
|
||||||
self._lock = threading.Lock()
|
|
||||||
|
|
||||||
def inc(self, num=1):
|
|
||||||
"""Atomically increment the counter by num and return the new value"""
|
|
||||||
with self._lock:
|
|
||||||
self._value += num
|
|
||||||
return self._value
|
|
||||||
|
|
||||||
def dec(self, num=1):
|
|
||||||
"""Atomically decrement the counter by num and return the new value"""
|
|
||||||
with self._lock:
|
|
||||||
self._value -= num
|
|
||||||
return self._value
|
|
||||||
|
|
||||||
@property
|
|
||||||
def value(self):
|
|
||||||
return self._value
|
|
||||||
|
|
||||||
|
|
||||||
def random_uuid() -> str:
|
def random_uuid() -> str:
|
||||||
return str(uuid.uuid4().hex)
|
return str(uuid.uuid4().hex)
|
||||||
|
|
||||||
|
|||||||
45
vllm/utils/counter.py
Normal file
45
vllm/utils/counter.py
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
import threading
|
||||||
|
|
||||||
|
|
||||||
|
class Counter:
|
||||||
|
def __init__(self, start: int = 0) -> None:
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.counter = start
|
||||||
|
|
||||||
|
def __next__(self) -> int:
|
||||||
|
i = self.counter
|
||||||
|
self.counter += 1
|
||||||
|
return i
|
||||||
|
|
||||||
|
def reset(self) -> None:
|
||||||
|
self.counter = 0
|
||||||
|
|
||||||
|
|
||||||
|
class AtomicCounter:
|
||||||
|
"""An atomic, thread-safe counter"""
|
||||||
|
|
||||||
|
def __init__(self, initial: int = 0) -> None:
|
||||||
|
"""Initialize a new atomic counter to given initial value"""
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self._value = initial
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def value(self) -> int:
|
||||||
|
return self._value
|
||||||
|
|
||||||
|
def inc(self, num: int = 1) -> int:
|
||||||
|
"""Atomically increment the counter by num and return the new value"""
|
||||||
|
with self._lock:
|
||||||
|
self._value += num
|
||||||
|
return self._value
|
||||||
|
|
||||||
|
def dec(self, num: int = 1) -> int:
|
||||||
|
"""Atomically decrement the counter by num and return the new value"""
|
||||||
|
with self._lock:
|
||||||
|
self._value -= num
|
||||||
|
return self._value
|
||||||
@ -14,7 +14,7 @@ import torch
|
|||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||||
from vllm.engine.protocol import EngineClient
|
from vllm.engine.protocol import Device, EngineClient
|
||||||
from vllm.entrypoints.utils import _validate_truncation_size
|
from vllm.entrypoints.utils import _validate_truncation_size
|
||||||
from vllm.inputs import PromptType
|
from vllm.inputs import PromptType
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
@ -29,7 +29,6 @@ from vllm.tracing import init_tracer
|
|||||||
from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
|
from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
|
||||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs
|
from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
from vllm.utils import Device
|
|
||||||
from vllm.utils.async_utils import cancel_task_threadsafe
|
from vllm.utils.async_utils import cancel_task_threadsafe
|
||||||
from vllm.utils.collection_utils import as_list
|
from vllm.utils.collection_utils import as_list
|
||||||
from vllm.utils.func_utils import deprecate_kwargs
|
from vllm.utils.func_utils import deprecate_kwargs
|
||||||
|
|||||||
@ -14,6 +14,7 @@ from vllm.config import ParallelConfig, VllmConfig
|
|||||||
from vllm.distributed import stateless_destroy_torch_distributed_process_group
|
from vllm.distributed import stateless_destroy_torch_distributed_process_group
|
||||||
from vllm.distributed.parallel_state import get_dp_group
|
from vllm.distributed.parallel_state import get_dp_group
|
||||||
from vllm.engine.arg_utils import EngineArgs
|
from vllm.engine.arg_utils import EngineArgs
|
||||||
|
from vllm.engine.protocol import Device
|
||||||
from vllm.inputs import PromptType
|
from vllm.inputs import PromptType
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
@ -26,7 +27,6 @@ from vllm.tasks import SupportedTask
|
|||||||
from vllm.tracing import init_tracer
|
from vllm.tracing import init_tracer
|
||||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs
|
from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
from vllm.utils import Device
|
|
||||||
from vllm.v1.engine import EngineCoreRequest
|
from vllm.v1.engine import EngineCoreRequest
|
||||||
from vllm.v1.engine.core_client import EngineCoreClient
|
from vllm.v1.engine.core_client import EngineCoreClient
|
||||||
from vllm.v1.engine.output_processor import OutputProcessor
|
from vllm.v1.engine.output_processor import OutputProcessor
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user