update to 2.9.1 PyTorch release

tests

xformers

xformers_release

torchao_pin_advance

Update conv.py

Update env_override.py

[Release 2.10] Test Torch 2.10 RC

triton

release_210_testing

release_210_testing

release_210_testing

release_210_testing

update_to_210

fix_210_resr

update_291_test

python_only_compile
This commit is contained in:
atalman 2025-11-11 14:15:01 -08:00
parent 09dc7c690c
commit a1abb57636
16 changed files with 48 additions and 47 deletions

View File

@ -38,7 +38,7 @@ repos:
rev: 0.9.1
hooks:
- id: pip-compile
args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --torch-backend, cu129, --python-platform, x86_64-manylinux_2_28, --python-version, "3.12"]
args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --extra-index-url, https://download.pytorch.org/whl/test/cu129, --python-platform, x86_64-manylinux_2_28, --python-version, "3.12"]
files: ^requirements/test\.(in|txt)$
- repo: local
hooks:

View File

@ -56,8 +56,8 @@ endif()
# requirements.txt files and should be kept consistent. The ROCm torch
# versions are derived from docker/Dockerfile.rocm
#
set(TORCH_SUPPORTED_VERSION_CUDA "2.9.1")
set(TORCH_SUPPORTED_VERSION_ROCM "2.9.1")
set(TORCH_SUPPORTED_VERSION_CUDA "2.10.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.10.0")
#
# Try to find python package with an executable that exactly matches
@ -432,7 +432,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
list(APPEND VLLM_EXT_SRC ${MARLIN_TEMPLATE_BF16_KERNEL_SRC})
endif()
if (MARLIN_SM75_ARCHS)
if (MARLIN_SM75_ARCHS)
file(GLOB MARLIN_TEMPLATE_SM75_KERNEL_SRC "csrc/quantization/gptq_marlin/sm75_kernel_*.cu")
set_gencode_flags_for_srcs(
SRCS "${MARLIN_TEMPLATE_SM75_KERNEL_SRC}"
@ -444,7 +444,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
list(APPEND VLLM_EXT_SRC ${MARLIN_TEMPLATE_SM75_KERNEL_SRC})
endif()
if (MARLIN_FP8_ARCHS)
if (MARLIN_FP8_ARCHS)
file(GLOB MARLIN_TEMPLATE_FP8_KERNEL_SRC "csrc/quantization/gptq_marlin/sm89_kernel_*.cu")
set_gencode_flags_for_srcs(
SRCS "${MARLIN_TEMPLATE_FP8_KERNEL_SRC}"
@ -1042,7 +1042,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
list(APPEND VLLM_MOE_EXT_SRC ${MARLIN_MOE_SRC})
endif()
if (MARLIN_MOE_SM75_ARCHS)
if (MARLIN_MOE_SM75_ARCHS)
file(GLOB MARLIN_MOE_SM75_SRC "csrc/moe/marlin_moe_wna16/sm75_kernel_*.cu")
set_gencode_flags_for_srcs(
SRCS "${MARLIN_MOE_SM75_SRC}"

View File

@ -55,7 +55,7 @@ ARG UV_INDEX_URL=${PIP_INDEX_URL}
ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
# PyTorch provides its own indexes for standard and nightly builds
ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl
ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl/test
# PIP supports multiple authentication schemes, including keyring
# By parameterizing the PIP_KEYRING_PROVIDER variable and setting it to
@ -135,7 +135,7 @@ WORKDIR /workspace
COPY requirements/common.txt requirements/common.txt
COPY requirements/cuda.txt requirements/cuda.txt
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \
uv pip install --python /opt/venv/bin/python3 --prerelease=allow -r requirements/cuda.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
# CUDA arch list used by torch
@ -303,7 +303,7 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
ENV UV_LINK_MODE=copy
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
uv pip install --python /opt/venv/bin/python3 --prerelease=allow -r requirements/build.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
WORKDIR /workspace
@ -367,7 +367,7 @@ COPY requirements/lint.txt requirements/lint.txt
COPY requirements/test.txt requirements/test.txt
COPY requirements/dev.txt requirements/dev.txt
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \
uv pip install --python /opt/venv/bin/python3 --prerelease=allow -r requirements/dev.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
#################### DEV IMAGE ####################
#################### vLLM installation IMAGE ####################
@ -465,7 +465,7 @@ ARG PYTORCH_CUDA_INDEX_BASE_URL
COPY requirements/common.txt /tmp/common.txt
COPY requirements/cuda.txt /tmp/requirements-cuda.txt
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -r /tmp/requirements-cuda.txt \
uv pip install --system --prerelease=allow -r /tmp/requirements-cuda.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && \
rm /tmp/requirements-cuda.txt /tmp/common.txt
@ -522,9 +522,10 @@ ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER
# Install vllm wheel first, so that torch etc will be installed.
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
--mount=type=cache,target=/root/.cache/uv \
uv pip install --system dist/*.whl --verbose \
uv pip install --prerelease=allow --system dist/*.whl --verbose \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
RUN --mount=type=cache,target=/root/.cache/uv \
. /etc/environment && \
uv pip list
@ -544,7 +545,7 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
# Install EP kernels wheels (pplx-kernels and DeepEP) that have been built in the `build` stage
RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \
--mount=type=cache,target=/root/.cache/uv \
uv pip install --system ep_kernels/dist/*.whl --verbose \
uv pip install --prerelease=allow --system ep_kernels/dist/*.whl --verbose \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
# CUDA image changed from /usr/local/nvidia to /usr/local/cuda in 12.8 but will
@ -587,7 +588,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
RUN --mount=type=cache,target=/root/.cache/uv \
CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
if [ "$CUDA_MAJOR" -ge 12 ]; then \
uv pip install --system -r requirements/dev.txt \
uv pip install --prerelease=allow --system -r requirements/dev.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
fi

View File

@ -26,7 +26,7 @@ FROM ubuntu:22.04 AS base-common
WORKDIR /workspace/
ARG PYTHON_VERSION=3.12
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/test/cpu"
# Install minimal dependencies and uv
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \

View File

@ -6,7 +6,7 @@ requires = [
"packaging>=24.2",
"setuptools>=77.0.3,<81.0.0",
"setuptools-scm>=8.0",
"torch == 2.9.1",
"torch == 2.10.0",
"wheel",
"jinja2",
]

View File

@ -4,7 +4,7 @@ ninja
packaging>=24.2
setuptools>=77.0.3,<81.0.0
setuptools-scm>=8
torch==2.9.1
torch==2.10.0
wheel
jinja2>=3.1.6
regex

View File

@ -5,9 +5,9 @@ numba == 0.61.2 # Required for N-gram speculative decoding
# Dependencies for NVIDIA GPUs
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
torch==2.9.1
torchaudio==2.9.1
torch==2.10.0
torchaudio==2.10.0
# These must be updated alongside torch
torchvision==0.24.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
torchvision==0.25.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# FlashInfer should be updated together with the Dockerfile
flashinfer-python==0.5.3

View File

@ -1,12 +1,11 @@
# Common dependencies
-r common.txt
--extra-index-url https://download.pytorch.org/whl/rocm6.4
torch==2.9.1
torchvision==0.24.1
torchaudio==2.9.1
triton==3.5.1
--extra-index-url https://download.pytorch.org/whl/test/rocm7.0
torch==2.10.0
torchvision==0.25.0
torchaudio==2.10.0
triton==3.6.0
cmake>=3.26.1,<4
packaging>=24.2
setuptools>=77.0.3,<80.0.0

View File

@ -24,9 +24,9 @@ soundfile # required for audio tests
jiwer # required for audio tests
tblib # for pickling test exceptions
timm >=1.0.17 # required for internvl and gemma3n-mm test
torch==2.9.1
torchaudio==2.9.1
torchvision==0.24.1
torch==2.10.0
torchaudio==2.10.0
torchvision==0.25.0
transformers_stream_generator # required for qwen-vl test
matplotlib # required for qwen-vl test
mistral_common[image,audio] >= 1.8.5 # required for voxtral test

View File

@ -608,7 +608,7 @@ nvidia-nvjitlink-cu12==12.9.86
# nvidia-cusolver-cu12
# nvidia-cusparse-cu12
# torch
nvidia-nvshmem-cu12==3.3.20
nvidia-nvshmem-cu12==3.4.5
# via torch
nvidia-nvtx-cu12==12.9.79
# via torch
@ -1123,7 +1123,7 @@ tomli==2.2.1
# via schemathesis
tomli-w==1.2.0
# via schemathesis
torch==2.9.1+cu129
torch==2.10.0+cu129
# via
# -r requirements/test.in
# accelerate
@ -1152,7 +1152,7 @@ torch==2.9.1+cu129
# torchvision
# vector-quantize-pytorch
# vocos
torchaudio==2.9.1+cu129
torchaudio==2.10.0+cu129
# via
# -r requirements/test.in
# encodec
@ -1165,7 +1165,7 @@ torchmetrics==1.7.4
# pytorch-lightning
# terratorch
# torchgeo
torchvision==0.24.1+cu129
torchvision==0.25.0+cu129
# via
# -r requirements/test.in
# lightly
@ -1206,7 +1206,7 @@ transformers==4.57.3
# transformers-stream-generator
transformers-stream-generator==0.0.5
# via -r requirements/test.in
triton==3.5.1
triton==3.6.0
# via torch
tritonclient==2.51.0
# via

View File

@ -56,7 +56,7 @@ def use_vllm_config(vllm_config: VllmConfig):
@pytest.mark.skipif(
not is_torch_equal_or_newer("2.10.0.dev"), reason="requires torch 2.10"
not is_torch_equal_or_newer("2.10.0"), reason="requires torch 2.10"
)
def test_no_dynamo_cache_entry(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as m:
@ -80,7 +80,7 @@ def test_no_dynamo_cache_entry(monkeypatch: pytest.MonkeyPatch):
@pytest.mark.skipif(
not is_torch_equal_or_newer("2.10.0.dev"), reason="requires torch 2.10"
not is_torch_equal_or_newer("2.10.0"), reason="requires torch 2.10"
)
def test_force_aot_load(monkeypatch: pytest.MonkeyPatch):
with tempfile.TemporaryDirectory() as tmpdirname, monkeypatch.context() as m:
@ -94,7 +94,7 @@ def test_force_aot_load(monkeypatch: pytest.MonkeyPatch):
@pytest.mark.skipif(
not is_torch_equal_or_newer("2.10.0.dev"), reason="requires torch 2.10"
not is_torch_equal_or_newer("2.10.0"), reason="requires torch 2.10"
)
def test_save_and_load(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as m:
@ -116,7 +116,7 @@ def test_save_and_load(monkeypatch: pytest.MonkeyPatch):
@pytest.mark.skipif(
not is_torch_equal_or_newer("2.10.0.dev"), reason="requires torch 2.10"
not is_torch_equal_or_newer("2.10.0"), reason="requires torch 2.10"
)
def test_shape_env(monkeypatch: pytest.MonkeyPatch):
"""
@ -149,7 +149,7 @@ def test_shape_env(monkeypatch: pytest.MonkeyPatch):
@pytest.mark.skipif(
not is_torch_equal_or_newer("2.10.0.dev"), reason="requires torch 2.10"
not is_torch_equal_or_newer("2.10.0"), reason="requires torch 2.10"
)
@create_new_process_for_each_test("spawn")
def test_gpt2_cache_hit(monkeypatch: pytest.MonkeyPatch):

View File

@ -40,7 +40,7 @@ def get_test_models():
@pytest.mark.parametrize("use_bytecode_hook", [True, False])
@pytest.mark.parametrize("evaluate_guards", [False, True])
@pytest.mark.skipif(
not is_torch_equal_or_newer("2.10.0.dev"), reason="requires torch 2.10"
not is_torch_equal_or_newer("2.10.0"), reason="requires torch 2.10"
)
def test_dynamic_shapes_compilation(
monkeypatch,

View File

@ -67,7 +67,8 @@ apt autoremove -y
echo 'import os; os.system("touch /tmp/changed.file")' >> vllm/__init__.py
VLLM_PRECOMPILED_WHEEL_COMMIT=$merge_base_commit VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e .
VLLM_PRECOMPILED_WHEEL_COMMIT=$merge_base_commit VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e . --extra-index-url https://download.pytorch.org/whl/test/cu129/
# Run the script
python3 -c 'import vllm'

View File

@ -316,7 +316,7 @@ def _support_torch_compile(
def _mark_dynamic_inputs(mod, type, *args, **kwargs):
def mark_dynamic(arg, dims):
if type == DynamicShapesType.UNBACKED:
if is_torch_equal_or_newer("2.10.0.dev"):
if is_torch_equal_or_newer("2.10.0"):
for dim in dims:
torch._dynamo.decorators.mark_unbacked(
arg, dim, hint_override=arg.size()[dim]
@ -356,7 +356,7 @@ def _support_torch_compile(
if isinstance(arg, torch.Tensor):
# In case dims is specified with negative indexing
dims = [arg.ndim + dim if dim < 0 else dim for dim in dims]
if is_torch_equal_or_newer("2.10.0.dev"):
if is_torch_equal_or_newer("2.10.0"):
for dim in dims:
torch._dynamo.decorators.mark_unbacked(
arg, dim, hint_override=arg.size()[dim]
@ -496,9 +496,9 @@ def _support_torch_compile(
fx_config_patches["backed_size_oblivious"] = True
# Prepare inductor config patches
# assume_32bit_indexing is only available in torch 2.10.0.dev+
# assume_32bit_indexing is only available in torch 2.10.0+
inductor_config_patches = {}
if is_torch_equal_or_newer("2.10.0.dev"):
if is_torch_equal_or_newer("2.10.0"):
inductor_config_patches["assume_32bit_indexing"] = True
with (

View File

@ -286,7 +286,7 @@ def use_aot_compile() -> bool:
default_value = (
"1"
if is_torch_equal_or_newer("2.10.0.dev") and not disable_compile_cache()
if is_torch_equal_or_newer("2.10.0") and not disable_compile_cache()
else "0"
)

View File

@ -974,7 +974,7 @@ def enable_batch_invariant_mode():
)
reduced_precision_val = (
(False, False) if is_torch_equal_or_newer("2.10.0.dev") else False
(False, False) if is_torch_equal_or_newer("2.10.0") else False
)
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = (
reduced_precision_val