mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-16 14:17:16 +08:00
Remove Python 3.9 support ahead of PyTorch 2.9 in v0.11.1 (#26416)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
4ba8875749
commit
e09d1753ec
@ -55,11 +55,6 @@ repos:
|
|||||||
types_or: [python, pyi]
|
types_or: [python, pyi]
|
||||||
require_serial: true
|
require_serial: true
|
||||||
additional_dependencies: [mypy==1.11.1, regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic]
|
additional_dependencies: [mypy==1.11.1, regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic]
|
||||||
- id: mypy-3.9 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
|
|
||||||
name: Run mypy for Python 3.9
|
|
||||||
entry: python tools/pre_commit/mypy.py 1 "3.9"
|
|
||||||
<<: *mypy_common
|
|
||||||
stages: [manual] # Only run in CI
|
|
||||||
- id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
|
- id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
|
||||||
name: Run mypy for Python 3.10
|
name: Run mypy for Python 3.10
|
||||||
entry: python tools/pre_commit/mypy.py 1 "3.10"
|
entry: python tools/pre_commit/mypy.py 1 "3.10"
|
||||||
@ -75,6 +70,11 @@ repos:
|
|||||||
entry: python tools/pre_commit/mypy.py 1 "3.12"
|
entry: python tools/pre_commit/mypy.py 1 "3.12"
|
||||||
<<: *mypy_common
|
<<: *mypy_common
|
||||||
stages: [manual] # Only run in CI
|
stages: [manual] # Only run in CI
|
||||||
|
- id: mypy-3.13 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
|
||||||
|
name: Run mypy for Python 3.13
|
||||||
|
entry: python tools/pre_commit/mypy.py 1 "3.13"
|
||||||
|
<<: *mypy_common
|
||||||
|
stages: [manual] # Only run in CI
|
||||||
- id: shellcheck
|
- id: shellcheck
|
||||||
name: Lint shell scripts
|
name: Lint shell scripts
|
||||||
entry: tools/shellcheck.sh
|
entry: tools/shellcheck.sh
|
||||||
|
|||||||
@ -34,7 +34,7 @@ install(CODE "set(CMAKE_INSTALL_LOCAL_ONLY TRUE)" ALL_COMPONENTS)
|
|||||||
# Supported python versions. These versions will be searched in order, the
|
# Supported python versions. These versions will be searched in order, the
|
||||||
# first match will be selected. These should be kept in sync with setup.py.
|
# first match will be selected. These should be kept in sync with setup.py.
|
||||||
#
|
#
|
||||||
set(PYTHON_SUPPORTED_VERSIONS "3.9" "3.10" "3.11" "3.12" "3.13")
|
set(PYTHON_SUPPORTED_VERSIONS "3.10" "3.11" "3.12" "3.13")
|
||||||
|
|
||||||
# Supported AMD GPU architectures.
|
# Supported AMD GPU architectures.
|
||||||
set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151")
|
set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151")
|
||||||
|
|||||||
@ -13,7 +13,7 @@ from datetime import datetime
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from http import HTTPStatus
|
from http import HTTPStatus
|
||||||
from statistics import mean
|
from statistics import mean
|
||||||
from typing import NamedTuple, Optional, Union
|
from typing import NamedTuple, Union
|
||||||
|
|
||||||
import aiohttp # type: ignore
|
import aiohttp # type: ignore
|
||||||
import numpy as np # type: ignore
|
import numpy as np # type: ignore
|
||||||
@ -46,9 +46,9 @@ class ConversationSampling(str, Enum):
|
|||||||
|
|
||||||
class ClientArgs(NamedTuple):
|
class ClientArgs(NamedTuple):
|
||||||
seed: int
|
seed: int
|
||||||
max_num_requests: Optional[int]
|
max_num_requests: int | None
|
||||||
skip_first_turn: bool
|
skip_first_turn: bool
|
||||||
max_turns: Optional[int]
|
max_turns: int | None
|
||||||
max_active_conversations: int
|
max_active_conversations: int
|
||||||
verbose: bool
|
verbose: bool
|
||||||
print_content: bool
|
print_content: bool
|
||||||
@ -109,9 +109,9 @@ class RequestStats(NamedTuple):
|
|||||||
|
|
||||||
class MetricStats:
|
class MetricStats:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.min: Optional[float] = None
|
self.min: float | None = None
|
||||||
self.max: Optional[float] = None
|
self.max: float | None = None
|
||||||
self.avg: Optional[float] = None
|
self.avg: float | None = None
|
||||||
self.sum = 0.0
|
self.sum = 0.0
|
||||||
self.count = 0
|
self.count = 0
|
||||||
|
|
||||||
@ -143,7 +143,7 @@ class MovingAverage:
|
|||||||
self.index = 0
|
self.index = 0
|
||||||
self.sum = 0.0
|
self.sum = 0.0
|
||||||
self.count = 0
|
self.count = 0
|
||||||
self.avg: Optional[float] = None
|
self.avg: float | None = None
|
||||||
|
|
||||||
def update(self, new_value: float) -> None:
|
def update(self, new_value: float) -> None:
|
||||||
if self.count < self.window_size:
|
if self.count < self.window_size:
|
||||||
@ -198,14 +198,6 @@ class DebugStats:
|
|||||||
self.logger.info("-" * 50)
|
self.logger.info("-" * 50)
|
||||||
|
|
||||||
|
|
||||||
# Must support Python 3.8, we can't use str.removeprefix(prefix)
|
|
||||||
# introduced in Python 3.9
|
|
||||||
def remove_prefix(text: str, prefix: str) -> str:
|
|
||||||
if text.startswith(prefix):
|
|
||||||
return text[len(prefix) :]
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
def nanosec_to_millisec(value: float) -> float:
|
def nanosec_to_millisec(value: float) -> float:
|
||||||
return value / 1000000.0
|
return value / 1000000.0
|
||||||
|
|
||||||
@ -220,8 +212,8 @@ async def send_request(
|
|||||||
chat_url: str,
|
chat_url: str,
|
||||||
model: str,
|
model: str,
|
||||||
stream: bool = True,
|
stream: bool = True,
|
||||||
min_tokens: Optional[int] = None,
|
min_tokens: int | None = None,
|
||||||
max_tokens: Optional[int] = None,
|
max_tokens: int | None = None,
|
||||||
) -> ServerResponse:
|
) -> ServerResponse:
|
||||||
payload = {
|
payload = {
|
||||||
"model": model,
|
"model": model,
|
||||||
@ -250,9 +242,9 @@ async def send_request(
|
|||||||
timeout = aiohttp.ClientTimeout(total=timeout_sec)
|
timeout = aiohttp.ClientTimeout(total=timeout_sec)
|
||||||
|
|
||||||
valid_response = True
|
valid_response = True
|
||||||
ttft: Optional[float] = None
|
ttft: float | None = None
|
||||||
chunk_delay: list[int] = []
|
chunk_delay: list[int] = []
|
||||||
latency: Optional[float] = None
|
latency: float | None = None
|
||||||
first_chunk = ""
|
first_chunk = ""
|
||||||
generated_text = ""
|
generated_text = ""
|
||||||
|
|
||||||
@ -269,7 +261,7 @@ async def send_request(
|
|||||||
if not chunk_bytes:
|
if not chunk_bytes:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
chunk = remove_prefix(chunk_bytes.decode("utf-8"), "data: ")
|
chunk = chunk_bytes.decode("utf-8").removeprefix("data: ")
|
||||||
if chunk == "[DONE]":
|
if chunk == "[DONE]":
|
||||||
# End of stream
|
# End of stream
|
||||||
latency = time.perf_counter_ns() - start_time
|
latency = time.perf_counter_ns() - start_time
|
||||||
@ -364,7 +356,7 @@ async def send_turn(
|
|||||||
req_args: RequestArgs,
|
req_args: RequestArgs,
|
||||||
verbose: bool,
|
verbose: bool,
|
||||||
verify_output: bool,
|
verify_output: bool,
|
||||||
) -> Optional[RequestStats]:
|
) -> RequestStats | None:
|
||||||
assert messages_to_use > 0
|
assert messages_to_use > 0
|
||||||
assert messages_to_use <= len(conversation_messages)
|
assert messages_to_use <= len(conversation_messages)
|
||||||
|
|
||||||
@ -769,7 +761,7 @@ def get_client_config(
|
|||||||
"Number of conversations must be equal or larger than the number of clients"
|
"Number of conversations must be equal or larger than the number of clients"
|
||||||
)
|
)
|
||||||
|
|
||||||
max_req_per_client: Optional[int] = None
|
max_req_per_client: int | None = None
|
||||||
if args.max_num_requests is not None:
|
if args.max_num_requests is not None:
|
||||||
# Max number of requests per client
|
# Max number of requests per client
|
||||||
req_per_client = args.max_num_requests // args.num_clients
|
req_per_client = args.max_num_requests // args.num_clients
|
||||||
@ -1032,7 +1024,7 @@ def process_statistics(
|
|||||||
warmup_percentages: list[float],
|
warmup_percentages: list[float],
|
||||||
test_params: dict,
|
test_params: dict,
|
||||||
verbose: bool,
|
verbose: bool,
|
||||||
gen_conv_args: Optional[GenConvArgs] = None,
|
gen_conv_args: GenConvArgs | None = None,
|
||||||
excel_output: bool = False,
|
excel_output: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
if len(client_metrics) == 0:
|
if len(client_metrics) == 0:
|
||||||
|
|||||||
@ -13,7 +13,7 @@
|
|||||||
# vllm-dev: used for development
|
# vllm-dev: used for development
|
||||||
#
|
#
|
||||||
# Build arguments:
|
# Build arguments:
|
||||||
# PYTHON_VERSION=3.12 (default)|3.11|3.10|3.9
|
# PYTHON_VERSION=3.13|3.12 (default)|3.11|3.10
|
||||||
# VLLM_CPU_DISABLE_AVX512=false (default)|true
|
# VLLM_CPU_DISABLE_AVX512=false (default)|true
|
||||||
# VLLM_CPU_AVX512BF16=false (default)|true
|
# VLLM_CPU_AVX512BF16=false (default)|true
|
||||||
# VLLM_CPU_AVX512VNNI=false (default)|true
|
# VLLM_CPU_AVX512VNNI=false (default)|true
|
||||||
|
|||||||
@ -54,7 +54,7 @@ For more details about installing from source and installing for other hardware,
|
|||||||
For an optimized workflow when iterating on C++/CUDA kernels, see the [Incremental Compilation Workflow](./incremental_build.md) for recommendations.
|
For an optimized workflow when iterating on C++/CUDA kernels, see the [Incremental Compilation Workflow](./incremental_build.md) for recommendations.
|
||||||
|
|
||||||
!!! tip
|
!!! tip
|
||||||
vLLM is compatible with Python versions 3.9 to 3.12. However, vLLM's default [Dockerfile](gh-file:docker/Dockerfile) ships with Python 3.12 and tests in CI (except `mypy`) are run with Python 3.12.
|
vLLM is compatible with Python versions 3.10 to 3.13. However, vLLM's default [Dockerfile](gh-file:docker/Dockerfile) ships with Python 3.12 and tests in CI (except `mypy`) are run with Python 3.12.
|
||||||
|
|
||||||
Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment.
|
Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment.
|
||||||
|
|
||||||
@ -83,7 +83,7 @@ vLLM's `pre-commit` hooks will now run automatically every time you commit.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
pre-commit run --hook-stage manual markdownlint
|
pre-commit run --hook-stage manual markdownlint
|
||||||
pre-commit run --hook-stage manual mypy-3.9
|
pre-commit run --hook-stage manual mypy-3.10
|
||||||
```
|
```
|
||||||
|
|
||||||
### Documentation
|
### Documentation
|
||||||
|
|||||||
@ -20,7 +20,7 @@ vLLM is a Python library that supports the following CPU variants. Select your C
|
|||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
- Python: 3.9 -- 3.12
|
- Python: 3.10 -- 3.13
|
||||||
|
|
||||||
=== "Intel/AMD x86"
|
=== "Intel/AMD x86"
|
||||||
|
|
||||||
|
|||||||
@ -17,7 +17,7 @@ vLLM is a Python library that supports the following GPU variants. Select your G
|
|||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
- OS: Linux
|
- OS: Linux
|
||||||
- Python: 3.9 -- 3.12
|
- Python: 3.10 -- 3.13
|
||||||
|
|
||||||
!!! note
|
!!! note
|
||||||
vLLM does not support Windows natively. To run vLLM on Windows, you can use the Windows Subsystem for Linux (WSL) with a compatible Linux distribution, or use some community-maintained forks, e.g. [https://github.com/SystemPanic/vllm-windows](https://github.com/SystemPanic/vllm-windows).
|
vLLM does not support Windows natively. To run vLLM on Windows, you can use the Windows Subsystem for Linux (WSL) with a compatible Linux distribution, or use some community-maintained forks, e.g. [https://github.com/SystemPanic/vllm-windows](https://github.com/SystemPanic/vllm-windows).
|
||||||
|
|||||||
@ -8,7 +8,7 @@ This guide will help you quickly get started with vLLM to perform:
|
|||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
- OS: Linux
|
- OS: Linux
|
||||||
- Python: 3.9 -- 3.13
|
- Python: 3.10 -- 3.13
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "examples-online-structured-outputs"
|
name = "examples-online-structured-outputs"
|
||||||
requires-python = ">=3.9, <3.13"
|
requires-python = ">=3.10, <3.14"
|
||||||
dependencies = ["openai==1.78.1", "pydantic==2.11.4"]
|
dependencies = ["openai==1.78.1", "pydantic==2.11.4"]
|
||||||
version = "0.0.0"
|
version = "0.0.0"
|
||||||
|
|
||||||
|
|||||||
@ -20,7 +20,6 @@ license-files = ["LICENSE"]
|
|||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
description = "A high-throughput and memory-efficient inference and serving engine for LLMs"
|
description = "A high-throughput and memory-efficient inference and serving engine for LLMs"
|
||||||
classifiers = [
|
classifiers = [
|
||||||
"Programming Language :: Python :: 3.9",
|
|
||||||
"Programming Language :: Python :: 3.10",
|
"Programming Language :: Python :: 3.10",
|
||||||
"Programming Language :: Python :: 3.11",
|
"Programming Language :: Python :: 3.11",
|
||||||
"Programming Language :: Python :: 3.12",
|
"Programming Language :: Python :: 3.12",
|
||||||
@ -31,7 +30,7 @@ classifiers = [
|
|||||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||||
"Topic :: Scientific/Engineering :: Information Analysis",
|
"Topic :: Scientific/Engineering :: Information Analysis",
|
||||||
]
|
]
|
||||||
requires-python = ">=3.9,<3.14"
|
requires-python = ">=3.10,<3.14"
|
||||||
dynamic = [ "version", "dependencies", "optional-dependencies"]
|
dynamic = [ "version", "dependencies", "optional-dependencies"]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
@ -79,12 +78,18 @@ ignore = [
|
|||||||
"F405", "F403",
|
"F405", "F403",
|
||||||
# lambda expression assignment
|
# lambda expression assignment
|
||||||
"E731",
|
"E731",
|
||||||
|
# zip without `strict=`
|
||||||
|
"B905",
|
||||||
# Loop control variable not used within loop body
|
# Loop control variable not used within loop body
|
||||||
"B007",
|
"B007",
|
||||||
# f-string format
|
# f-string format
|
||||||
"UP032",
|
"UP032",
|
||||||
# Can remove once 3.10+ is the minimum Python version
|
# Can remove once 3.10+ is the minimum Python version
|
||||||
"UP007",
|
"UP007",
|
||||||
|
"UP027",
|
||||||
|
"UP035",
|
||||||
|
"UP038",
|
||||||
|
"UP045",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.ruff.format]
|
[tool.ruff.format]
|
||||||
|
|||||||
@ -1,8 +1,7 @@
|
|||||||
# Common dependencies
|
# Common dependencies
|
||||||
-r common.txt
|
-r common.txt
|
||||||
|
|
||||||
numba == 0.60.0; python_version == '3.9' and platform_machine != "s390x" # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
numba == 0.61.2; platform_machine != "s390x" # Required for N-gram speculative decoding
|
||||||
numba == 0.61.2; python_version > '3.9' and platform_machine != "s390x"
|
|
||||||
|
|
||||||
# Dependencies for CPUs
|
# Dependencies for CPUs
|
||||||
packaging>=24.2
|
packaging>=24.2
|
||||||
|
|||||||
@ -1,8 +1,7 @@
|
|||||||
# Common dependencies
|
# Common dependencies
|
||||||
-r common.txt
|
-r common.txt
|
||||||
|
|
||||||
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
numba == 0.61.2 # Required for N-gram speculative decoding
|
||||||
numba == 0.61.2; python_version > '3.9'
|
|
||||||
|
|
||||||
# Dependencies for NVIDIA GPUs
|
# Dependencies for NVIDIA GPUs
|
||||||
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
|
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
|
||||||
|
|||||||
@ -40,8 +40,7 @@ buildkite-test-collector==0.1.9
|
|||||||
genai_perf==0.0.8
|
genai_perf==0.0.8
|
||||||
tritonclient==2.51.0
|
tritonclient==2.51.0
|
||||||
|
|
||||||
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
numba == 0.61.2 # Required for N-gram speculative decoding
|
||||||
numba == 0.61.2; python_version > '3.9'
|
|
||||||
numpy
|
numpy
|
||||||
runai-model-streamer[s3,gcs]==0.14.0
|
runai-model-streamer[s3,gcs]==0.14.0
|
||||||
fastsafetensors>=0.1.10
|
fastsafetensors>=0.1.10
|
||||||
|
|||||||
@ -1,8 +1,7 @@
|
|||||||
# Common dependencies
|
# Common dependencies
|
||||||
-r common.txt
|
-r common.txt
|
||||||
|
|
||||||
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
numba == 0.61.2 # Required for N-gram speculative decoding
|
||||||
numba == 0.61.2; python_version > '3.9'
|
|
||||||
|
|
||||||
# Dependencies for AMD GPUs
|
# Dependencies for AMD GPUs
|
||||||
datasets
|
datasets
|
||||||
|
|||||||
@ -48,8 +48,7 @@ buildkite-test-collector==0.1.9
|
|||||||
genai_perf==0.0.8
|
genai_perf==0.0.8
|
||||||
tritonclient==2.51.0
|
tritonclient==2.51.0
|
||||||
|
|
||||||
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
numba == 0.61.2 # Required for N-gram speculative decoding
|
||||||
numba == 0.61.2; python_version > '3.9'
|
|
||||||
numpy
|
numpy
|
||||||
runai-model-streamer[s3,gcs]==0.14.0
|
runai-model-streamer[s3,gcs]==0.14.0
|
||||||
fastsafetensors>=0.1.10
|
fastsafetensors>=0.1.10
|
||||||
|
|||||||
@ -9,7 +9,7 @@ setuptools>=77.0.3,<80.0.0
|
|||||||
wheel
|
wheel
|
||||||
jinja2>=3.1.6
|
jinja2>=3.1.6
|
||||||
datasets # for benchmark scripts
|
datasets # for benchmark scripts
|
||||||
numba == 0.60.0 # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
numba == 0.61.2 # Required for N-gram speculative decoding
|
||||||
nixl==0.3.0 # for PD disaggregation
|
nixl==0.3.0 # for PD disaggregation
|
||||||
torch==2.8.0+xpu
|
torch==2.8.0+xpu
|
||||||
torchaudio
|
torchaudio
|
||||||
|
|||||||
@ -8,6 +8,7 @@ and that each field has a docstring.
|
|||||||
import ast
|
import ast
|
||||||
import inspect
|
import inspect
|
||||||
import sys
|
import sys
|
||||||
|
from itertools import pairwise
|
||||||
|
|
||||||
import regex as re
|
import regex as re
|
||||||
|
|
||||||
@ -20,19 +21,6 @@ def get_attr_docs(cls_node: ast.ClassDef) -> dict[str, str]:
|
|||||||
https://davidism.com/mit-license/
|
https://davidism.com/mit-license/
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def pairwise(iterable):
|
|
||||||
"""
|
|
||||||
Manually implement https://docs.python.org/3/library/itertools.html#itertools.pairwise
|
|
||||||
|
|
||||||
Can be removed when Python 3.9 support is dropped.
|
|
||||||
"""
|
|
||||||
iterator = iter(iterable)
|
|
||||||
a = next(iterator, None)
|
|
||||||
|
|
||||||
for b in iterator:
|
|
||||||
yield a, b
|
|
||||||
a = b
|
|
||||||
|
|
||||||
out = {}
|
out = {}
|
||||||
|
|
||||||
# Consider each pair of nodes.
|
# Consider each pair of nodes.
|
||||||
|
|||||||
@ -7,6 +7,7 @@ import inspect
|
|||||||
import textwrap
|
import textwrap
|
||||||
from collections.abc import Iterable
|
from collections.abc import Iterable
|
||||||
from dataclasses import MISSING, Field, field, fields, is_dataclass, replace
|
from dataclasses import MISSING, Field, field, fields, is_dataclass, replace
|
||||||
|
from itertools import pairwise
|
||||||
from typing import TYPE_CHECKING, Any, Protocol, TypeVar
|
from typing import TYPE_CHECKING, Any, Protocol, TypeVar
|
||||||
|
|
||||||
import regex as re
|
import regex as re
|
||||||
@ -102,19 +103,6 @@ def get_attr_docs(cls: type[Any]) -> dict[str, str]:
|
|||||||
https://davidism.com/mit-license/
|
https://davidism.com/mit-license/
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def pairwise(iterable):
|
|
||||||
"""
|
|
||||||
Manually implement https://docs.python.org/3/library/itertools.html#itertools.pairwise
|
|
||||||
|
|
||||||
Can be removed when Python 3.9 support is dropped.
|
|
||||||
"""
|
|
||||||
iterator = iter(iterable)
|
|
||||||
a = next(iterator, None)
|
|
||||||
|
|
||||||
for b in iterator:
|
|
||||||
yield a, b
|
|
||||||
a = b
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cls_node = ast.parse(textwrap.dedent(inspect.getsource(cls))).body[0]
|
cls_node = ast.parse(textwrap.dedent(inspect.getsource(cls))).body[0]
|
||||||
except (OSError, KeyError, TypeError):
|
except (OSError, KeyError, TypeError):
|
||||||
|
|||||||
@ -15,12 +15,7 @@ plugins_loaded = False
|
|||||||
|
|
||||||
|
|
||||||
def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]:
|
def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]:
|
||||||
import sys
|
from importlib.metadata import entry_points
|
||||||
|
|
||||||
if sys.version_info < (3, 10):
|
|
||||||
from importlib_metadata import entry_points
|
|
||||||
else:
|
|
||||||
from importlib.metadata import entry_points
|
|
||||||
|
|
||||||
allowed_plugins = envs.VLLM_PLUGINS
|
allowed_plugins = envs.VLLM_PLUGINS
|
||||||
|
|
||||||
|
|||||||
@ -55,12 +55,7 @@ BUILTIN_LOGITS_PROCESSORS: list[type[LogitsProcessor]] = [
|
|||||||
def _load_logitsprocs_plugins() -> list[type[LogitsProcessor]]:
|
def _load_logitsprocs_plugins() -> list[type[LogitsProcessor]]:
|
||||||
"""Load all installed logit processor plugins"""
|
"""Load all installed logit processor plugins"""
|
||||||
|
|
||||||
import sys
|
from importlib.metadata import entry_points
|
||||||
|
|
||||||
if sys.version_info < (3, 10):
|
|
||||||
from importlib_metadata import entry_points
|
|
||||||
else:
|
|
||||||
from importlib.metadata import entry_points
|
|
||||||
|
|
||||||
installed_logitsprocs_plugins = entry_points(group=LOGITSPROCS_GROUP)
|
installed_logitsprocs_plugins = entry_points(group=LOGITSPROCS_GROUP)
|
||||||
if len(installed_logitsprocs_plugins) == 0:
|
if len(installed_logitsprocs_plugins) == 0:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user