Remove Python 3.9 support ahead of PyTorch 2.9 in v0.11.1 (#26416)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor 2025-10-08 18:40:42 +01:00 committed by GitHub
parent 4ba8875749
commit e09d1753ec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 45 additions and 87 deletions

View File

@ -55,11 +55,6 @@ repos:
types_or: [python, pyi] types_or: [python, pyi]
require_serial: true require_serial: true
additional_dependencies: [mypy==1.11.1, regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic] additional_dependencies: [mypy==1.11.1, regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic]
- id: mypy-3.9 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
name: Run mypy for Python 3.9
entry: python tools/pre_commit/mypy.py 1 "3.9"
<<: *mypy_common
stages: [manual] # Only run in CI
- id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward - id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
name: Run mypy for Python 3.10 name: Run mypy for Python 3.10
entry: python tools/pre_commit/mypy.py 1 "3.10" entry: python tools/pre_commit/mypy.py 1 "3.10"
@ -75,6 +70,11 @@ repos:
entry: python tools/pre_commit/mypy.py 1 "3.12" entry: python tools/pre_commit/mypy.py 1 "3.12"
<<: *mypy_common <<: *mypy_common
stages: [manual] # Only run in CI stages: [manual] # Only run in CI
- id: mypy-3.13 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
name: Run mypy for Python 3.13
entry: python tools/pre_commit/mypy.py 1 "3.13"
<<: *mypy_common
stages: [manual] # Only run in CI
- id: shellcheck - id: shellcheck
name: Lint shell scripts name: Lint shell scripts
entry: tools/shellcheck.sh entry: tools/shellcheck.sh

View File

@ -34,7 +34,7 @@ install(CODE "set(CMAKE_INSTALL_LOCAL_ONLY TRUE)" ALL_COMPONENTS)
# Supported python versions. These versions will be searched in order, the # Supported python versions. These versions will be searched in order, the
# first match will be selected. These should be kept in sync with setup.py. # first match will be selected. These should be kept in sync with setup.py.
# #
set(PYTHON_SUPPORTED_VERSIONS "3.9" "3.10" "3.11" "3.12" "3.13") set(PYTHON_SUPPORTED_VERSIONS "3.10" "3.11" "3.12" "3.13")
# Supported AMD GPU architectures. # Supported AMD GPU architectures.
set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151") set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151")

View File

@ -13,7 +13,7 @@ from datetime import datetime
from enum import Enum from enum import Enum
from http import HTTPStatus from http import HTTPStatus
from statistics import mean from statistics import mean
from typing import NamedTuple, Optional, Union from typing import NamedTuple, Union
import aiohttp # type: ignore import aiohttp # type: ignore
import numpy as np # type: ignore import numpy as np # type: ignore
@ -46,9 +46,9 @@ class ConversationSampling(str, Enum):
class ClientArgs(NamedTuple): class ClientArgs(NamedTuple):
seed: int seed: int
max_num_requests: Optional[int] max_num_requests: int | None
skip_first_turn: bool skip_first_turn: bool
max_turns: Optional[int] max_turns: int | None
max_active_conversations: int max_active_conversations: int
verbose: bool verbose: bool
print_content: bool print_content: bool
@ -109,9 +109,9 @@ class RequestStats(NamedTuple):
class MetricStats: class MetricStats:
def __init__(self) -> None: def __init__(self) -> None:
self.min: Optional[float] = None self.min: float | None = None
self.max: Optional[float] = None self.max: float | None = None
self.avg: Optional[float] = None self.avg: float | None = None
self.sum = 0.0 self.sum = 0.0
self.count = 0 self.count = 0
@ -143,7 +143,7 @@ class MovingAverage:
self.index = 0 self.index = 0
self.sum = 0.0 self.sum = 0.0
self.count = 0 self.count = 0
self.avg: Optional[float] = None self.avg: float | None = None
def update(self, new_value: float) -> None: def update(self, new_value: float) -> None:
if self.count < self.window_size: if self.count < self.window_size:
@ -198,14 +198,6 @@ class DebugStats:
self.logger.info("-" * 50) self.logger.info("-" * 50)
# Must support Python 3.8, we can't use str.removeprefix(prefix)
# introduced in Python 3.9
def remove_prefix(text: str, prefix: str) -> str:
if text.startswith(prefix):
return text[len(prefix) :]
return text
def nanosec_to_millisec(value: float) -> float: def nanosec_to_millisec(value: float) -> float:
return value / 1000000.0 return value / 1000000.0
@ -220,8 +212,8 @@ async def send_request(
chat_url: str, chat_url: str,
model: str, model: str,
stream: bool = True, stream: bool = True,
min_tokens: Optional[int] = None, min_tokens: int | None = None,
max_tokens: Optional[int] = None, max_tokens: int | None = None,
) -> ServerResponse: ) -> ServerResponse:
payload = { payload = {
"model": model, "model": model,
@ -250,9 +242,9 @@ async def send_request(
timeout = aiohttp.ClientTimeout(total=timeout_sec) timeout = aiohttp.ClientTimeout(total=timeout_sec)
valid_response = True valid_response = True
ttft: Optional[float] = None ttft: float | None = None
chunk_delay: list[int] = [] chunk_delay: list[int] = []
latency: Optional[float] = None latency: float | None = None
first_chunk = "" first_chunk = ""
generated_text = "" generated_text = ""
@ -269,7 +261,7 @@ async def send_request(
if not chunk_bytes: if not chunk_bytes:
continue continue
chunk = remove_prefix(chunk_bytes.decode("utf-8"), "data: ") chunk = chunk_bytes.decode("utf-8").removeprefix("data: ")
if chunk == "[DONE]": if chunk == "[DONE]":
# End of stream # End of stream
latency = time.perf_counter_ns() - start_time latency = time.perf_counter_ns() - start_time
@ -364,7 +356,7 @@ async def send_turn(
req_args: RequestArgs, req_args: RequestArgs,
verbose: bool, verbose: bool,
verify_output: bool, verify_output: bool,
) -> Optional[RequestStats]: ) -> RequestStats | None:
assert messages_to_use > 0 assert messages_to_use > 0
assert messages_to_use <= len(conversation_messages) assert messages_to_use <= len(conversation_messages)
@ -769,7 +761,7 @@ def get_client_config(
"Number of conversations must be equal or larger than the number of clients" "Number of conversations must be equal or larger than the number of clients"
) )
max_req_per_client: Optional[int] = None max_req_per_client: int | None = None
if args.max_num_requests is not None: if args.max_num_requests is not None:
# Max number of requests per client # Max number of requests per client
req_per_client = args.max_num_requests // args.num_clients req_per_client = args.max_num_requests // args.num_clients
@ -1032,7 +1024,7 @@ def process_statistics(
warmup_percentages: list[float], warmup_percentages: list[float],
test_params: dict, test_params: dict,
verbose: bool, verbose: bool,
gen_conv_args: Optional[GenConvArgs] = None, gen_conv_args: GenConvArgs | None = None,
excel_output: bool = False, excel_output: bool = False,
) -> None: ) -> None:
if len(client_metrics) == 0: if len(client_metrics) == 0:

View File

@ -13,7 +13,7 @@
# vllm-dev: used for development # vllm-dev: used for development
# #
# Build arguments: # Build arguments:
# PYTHON_VERSION=3.12 (default)|3.11|3.10|3.9 # PYTHON_VERSION=3.13|3.12 (default)|3.11|3.10
# VLLM_CPU_DISABLE_AVX512=false (default)|true # VLLM_CPU_DISABLE_AVX512=false (default)|true
# VLLM_CPU_AVX512BF16=false (default)|true # VLLM_CPU_AVX512BF16=false (default)|true
# VLLM_CPU_AVX512VNNI=false (default)|true # VLLM_CPU_AVX512VNNI=false (default)|true

View File

@ -54,7 +54,7 @@ For more details about installing from source and installing for other hardware,
For an optimized workflow when iterating on C++/CUDA kernels, see the [Incremental Compilation Workflow](./incremental_build.md) for recommendations. For an optimized workflow when iterating on C++/CUDA kernels, see the [Incremental Compilation Workflow](./incremental_build.md) for recommendations.
!!! tip !!! tip
vLLM is compatible with Python versions 3.9 to 3.12. However, vLLM's default [Dockerfile](gh-file:docker/Dockerfile) ships with Python 3.12 and tests in CI (except `mypy`) are run with Python 3.12. vLLM is compatible with Python versions 3.10 to 3.13. However, vLLM's default [Dockerfile](gh-file:docker/Dockerfile) ships with Python 3.12 and tests in CI (except `mypy`) are run with Python 3.12.
Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment. Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment.
@ -83,7 +83,7 @@ vLLM's `pre-commit` hooks will now run automatically every time you commit.
```bash ```bash
pre-commit run --hook-stage manual markdownlint pre-commit run --hook-stage manual markdownlint
pre-commit run --hook-stage manual mypy-3.9 pre-commit run --hook-stage manual mypy-3.10
``` ```
### Documentation ### Documentation

View File

@ -20,7 +20,7 @@ vLLM is a Python library that supports the following CPU variants. Select your C
## Requirements ## Requirements
- Python: 3.9 -- 3.12 - Python: 3.10 -- 3.13
=== "Intel/AMD x86" === "Intel/AMD x86"

View File

@ -17,7 +17,7 @@ vLLM is a Python library that supports the following GPU variants. Select your G
## Requirements ## Requirements
- OS: Linux - OS: Linux
- Python: 3.9 -- 3.12 - Python: 3.10 -- 3.13
!!! note !!! note
vLLM does not support Windows natively. To run vLLM on Windows, you can use the Windows Subsystem for Linux (WSL) with a compatible Linux distribution, or use some community-maintained forks, e.g. [https://github.com/SystemPanic/vllm-windows](https://github.com/SystemPanic/vllm-windows). vLLM does not support Windows natively. To run vLLM on Windows, you can use the Windows Subsystem for Linux (WSL) with a compatible Linux distribution, or use some community-maintained forks, e.g. [https://github.com/SystemPanic/vllm-windows](https://github.com/SystemPanic/vllm-windows).

View File

@ -8,7 +8,7 @@ This guide will help you quickly get started with vLLM to perform:
## Prerequisites ## Prerequisites
- OS: Linux - OS: Linux
- Python: 3.9 -- 3.13 - Python: 3.10 -- 3.13
## Installation ## Installation

View File

@ -1,6 +1,6 @@
[project] [project]
name = "examples-online-structured-outputs" name = "examples-online-structured-outputs"
requires-python = ">=3.9, <3.13" requires-python = ">=3.10, <3.14"
dependencies = ["openai==1.78.1", "pydantic==2.11.4"] dependencies = ["openai==1.78.1", "pydantic==2.11.4"]
version = "0.0.0" version = "0.0.0"

View File

@ -20,7 +20,6 @@ license-files = ["LICENSE"]
readme = "README.md" readme = "README.md"
description = "A high-throughput and memory-efficient inference and serving engine for LLMs" description = "A high-throughput and memory-efficient inference and serving engine for LLMs"
classifiers = [ classifiers = [
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.12",
@ -31,7 +30,7 @@ classifiers = [
"Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering :: Information Analysis", "Topic :: Scientific/Engineering :: Information Analysis",
] ]
requires-python = ">=3.9,<3.14" requires-python = ">=3.10,<3.14"
dynamic = [ "version", "dependencies", "optional-dependencies"] dynamic = [ "version", "dependencies", "optional-dependencies"]
[project.urls] [project.urls]
@ -79,12 +78,18 @@ ignore = [
"F405", "F403", "F405", "F403",
# lambda expression assignment # lambda expression assignment
"E731", "E731",
# zip without `strict=`
"B905",
# Loop control variable not used within loop body # Loop control variable not used within loop body
"B007", "B007",
# f-string format # f-string format
"UP032", "UP032",
# Can remove once 3.10+ is the minimum Python version # Can remove once 3.10+ is the minimum Python version
"UP007", "UP007",
"UP027",
"UP035",
"UP038",
"UP045",
] ]
[tool.ruff.format] [tool.ruff.format]

View File

@ -1,8 +1,7 @@
# Common dependencies # Common dependencies
-r common.txt -r common.txt
numba == 0.60.0; python_version == '3.9' and platform_machine != "s390x" # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2; platform_machine != "s390x" # Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9' and platform_machine != "s390x"
# Dependencies for CPUs # Dependencies for CPUs
packaging>=24.2 packaging>=24.2

View File

@ -1,8 +1,7 @@
# Common dependencies # Common dependencies
-r common.txt -r common.txt
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9'
# Dependencies for NVIDIA GPUs # Dependencies for NVIDIA GPUs
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1. ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.

View File

@ -40,8 +40,7 @@ buildkite-test-collector==0.1.9
genai_perf==0.0.8 genai_perf==0.0.8
tritonclient==2.51.0 tritonclient==2.51.0
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9'
numpy numpy
runai-model-streamer[s3,gcs]==0.14.0 runai-model-streamer[s3,gcs]==0.14.0
fastsafetensors>=0.1.10 fastsafetensors>=0.1.10

View File

@ -1,8 +1,7 @@
# Common dependencies # Common dependencies
-r common.txt -r common.txt
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9'
# Dependencies for AMD GPUs # Dependencies for AMD GPUs
datasets datasets

View File

@ -48,8 +48,7 @@ buildkite-test-collector==0.1.9
genai_perf==0.0.8 genai_perf==0.0.8
tritonclient==2.51.0 tritonclient==2.51.0
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9'
numpy numpy
runai-model-streamer[s3,gcs]==0.14.0 runai-model-streamer[s3,gcs]==0.14.0
fastsafetensors>=0.1.10 fastsafetensors>=0.1.10

View File

@ -9,7 +9,7 @@ setuptools>=77.0.3,<80.0.0
wheel wheel
jinja2>=3.1.6 jinja2>=3.1.6
datasets # for benchmark scripts datasets # for benchmark scripts
numba == 0.60.0 # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
nixl==0.3.0 # for PD disaggregation nixl==0.3.0 # for PD disaggregation
torch==2.8.0+xpu torch==2.8.0+xpu
torchaudio torchaudio

View File

@ -8,6 +8,7 @@ and that each field has a docstring.
import ast import ast
import inspect import inspect
import sys import sys
from itertools import pairwise
import regex as re import regex as re
@ -20,19 +21,6 @@ def get_attr_docs(cls_node: ast.ClassDef) -> dict[str, str]:
https://davidism.com/mit-license/ https://davidism.com/mit-license/
""" """
def pairwise(iterable):
"""
Manually implement https://docs.python.org/3/library/itertools.html#itertools.pairwise
Can be removed when Python 3.9 support is dropped.
"""
iterator = iter(iterable)
a = next(iterator, None)
for b in iterator:
yield a, b
a = b
out = {} out = {}
# Consider each pair of nodes. # Consider each pair of nodes.

View File

@ -7,6 +7,7 @@ import inspect
import textwrap import textwrap
from collections.abc import Iterable from collections.abc import Iterable
from dataclasses import MISSING, Field, field, fields, is_dataclass, replace from dataclasses import MISSING, Field, field, fields, is_dataclass, replace
from itertools import pairwise
from typing import TYPE_CHECKING, Any, Protocol, TypeVar from typing import TYPE_CHECKING, Any, Protocol, TypeVar
import regex as re import regex as re
@ -102,19 +103,6 @@ def get_attr_docs(cls: type[Any]) -> dict[str, str]:
https://davidism.com/mit-license/ https://davidism.com/mit-license/
""" """
def pairwise(iterable):
"""
Manually implement https://docs.python.org/3/library/itertools.html#itertools.pairwise
Can be removed when Python 3.9 support is dropped.
"""
iterator = iter(iterable)
a = next(iterator, None)
for b in iterator:
yield a, b
a = b
try: try:
cls_node = ast.parse(textwrap.dedent(inspect.getsource(cls))).body[0] cls_node = ast.parse(textwrap.dedent(inspect.getsource(cls))).body[0]
except (OSError, KeyError, TypeError): except (OSError, KeyError, TypeError):

View File

@ -15,12 +15,7 @@ plugins_loaded = False
def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]: def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]:
import sys from importlib.metadata import entry_points
if sys.version_info < (3, 10):
from importlib_metadata import entry_points
else:
from importlib.metadata import entry_points
allowed_plugins = envs.VLLM_PLUGINS allowed_plugins = envs.VLLM_PLUGINS

View File

@ -55,12 +55,7 @@ BUILTIN_LOGITS_PROCESSORS: list[type[LogitsProcessor]] = [
def _load_logitsprocs_plugins() -> list[type[LogitsProcessor]]: def _load_logitsprocs_plugins() -> list[type[LogitsProcessor]]:
"""Load all installed logit processor plugins""" """Load all installed logit processor plugins"""
import sys from importlib.metadata import entry_points
if sys.version_info < (3, 10):
from importlib_metadata import entry_points
else:
from importlib.metadata import entry_points
installed_logitsprocs_plugins = entry_points(group=LOGITSPROCS_GROUP) installed_logitsprocs_plugins = entry_points(group=LOGITSPROCS_GROUP)
if len(installed_logitsprocs_plugins) == 0: if len(installed_logitsprocs_plugins) == 0: