mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-31 02:37:04 +08:00
update to 2.9.1 PyTorch release
tests xformers xformers_release torchao_pin_advance Update conv.py Update env_override.py [Release 2.10] Test Torch 2.10 RC triton release_210_testing release_210_testing release_210_testing release_210_testing update_to_210 fix_210_resr update_291_test python_only_compile
This commit is contained in:
parent
09dc7c690c
commit
a1abb57636
@ -38,7 +38,7 @@ repos:
|
|||||||
rev: 0.9.1
|
rev: 0.9.1
|
||||||
hooks:
|
hooks:
|
||||||
- id: pip-compile
|
- id: pip-compile
|
||||||
args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --torch-backend, cu129, --python-platform, x86_64-manylinux_2_28, --python-version, "3.12"]
|
args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --extra-index-url, https://download.pytorch.org/whl/test/cu129, --python-platform, x86_64-manylinux_2_28, --python-version, "3.12"]
|
||||||
files: ^requirements/test\.(in|txt)$
|
files: ^requirements/test\.(in|txt)$
|
||||||
- repo: local
|
- repo: local
|
||||||
hooks:
|
hooks:
|
||||||
|
|||||||
@ -56,8 +56,8 @@ endif()
|
|||||||
# requirements.txt files and should be kept consistent. The ROCm torch
|
# requirements.txt files and should be kept consistent. The ROCm torch
|
||||||
# versions are derived from docker/Dockerfile.rocm
|
# versions are derived from docker/Dockerfile.rocm
|
||||||
#
|
#
|
||||||
set(TORCH_SUPPORTED_VERSION_CUDA "2.9.1")
|
set(TORCH_SUPPORTED_VERSION_CUDA "2.10.0")
|
||||||
set(TORCH_SUPPORTED_VERSION_ROCM "2.9.1")
|
set(TORCH_SUPPORTED_VERSION_ROCM "2.10.0")
|
||||||
|
|
||||||
#
|
#
|
||||||
# Try to find python package with an executable that exactly matches
|
# Try to find python package with an executable that exactly matches
|
||||||
@ -432,7 +432,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
|
|||||||
list(APPEND VLLM_EXT_SRC ${MARLIN_TEMPLATE_BF16_KERNEL_SRC})
|
list(APPEND VLLM_EXT_SRC ${MARLIN_TEMPLATE_BF16_KERNEL_SRC})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (MARLIN_SM75_ARCHS)
|
if (MARLIN_SM75_ARCHS)
|
||||||
file(GLOB MARLIN_TEMPLATE_SM75_KERNEL_SRC "csrc/quantization/gptq_marlin/sm75_kernel_*.cu")
|
file(GLOB MARLIN_TEMPLATE_SM75_KERNEL_SRC "csrc/quantization/gptq_marlin/sm75_kernel_*.cu")
|
||||||
set_gencode_flags_for_srcs(
|
set_gencode_flags_for_srcs(
|
||||||
SRCS "${MARLIN_TEMPLATE_SM75_KERNEL_SRC}"
|
SRCS "${MARLIN_TEMPLATE_SM75_KERNEL_SRC}"
|
||||||
@ -444,7 +444,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
|
|||||||
list(APPEND VLLM_EXT_SRC ${MARLIN_TEMPLATE_SM75_KERNEL_SRC})
|
list(APPEND VLLM_EXT_SRC ${MARLIN_TEMPLATE_SM75_KERNEL_SRC})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (MARLIN_FP8_ARCHS)
|
if (MARLIN_FP8_ARCHS)
|
||||||
file(GLOB MARLIN_TEMPLATE_FP8_KERNEL_SRC "csrc/quantization/gptq_marlin/sm89_kernel_*.cu")
|
file(GLOB MARLIN_TEMPLATE_FP8_KERNEL_SRC "csrc/quantization/gptq_marlin/sm89_kernel_*.cu")
|
||||||
set_gencode_flags_for_srcs(
|
set_gencode_flags_for_srcs(
|
||||||
SRCS "${MARLIN_TEMPLATE_FP8_KERNEL_SRC}"
|
SRCS "${MARLIN_TEMPLATE_FP8_KERNEL_SRC}"
|
||||||
@ -1042,7 +1042,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
|
|||||||
list(APPEND VLLM_MOE_EXT_SRC ${MARLIN_MOE_SRC})
|
list(APPEND VLLM_MOE_EXT_SRC ${MARLIN_MOE_SRC})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (MARLIN_MOE_SM75_ARCHS)
|
if (MARLIN_MOE_SM75_ARCHS)
|
||||||
file(GLOB MARLIN_MOE_SM75_SRC "csrc/moe/marlin_moe_wna16/sm75_kernel_*.cu")
|
file(GLOB MARLIN_MOE_SM75_SRC "csrc/moe/marlin_moe_wna16/sm75_kernel_*.cu")
|
||||||
set_gencode_flags_for_srcs(
|
set_gencode_flags_for_srcs(
|
||||||
SRCS "${MARLIN_MOE_SM75_SRC}"
|
SRCS "${MARLIN_MOE_SM75_SRC}"
|
||||||
|
|||||||
@ -55,7 +55,7 @@ ARG UV_INDEX_URL=${PIP_INDEX_URL}
|
|||||||
ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
|
ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
|
||||||
|
|
||||||
# PyTorch provides its own indexes for standard and nightly builds
|
# PyTorch provides its own indexes for standard and nightly builds
|
||||||
ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl
|
ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl/test
|
||||||
|
|
||||||
# PIP supports multiple authentication schemes, including keyring
|
# PIP supports multiple authentication schemes, including keyring
|
||||||
# By parameterizing the PIP_KEYRING_PROVIDER variable and setting it to
|
# By parameterizing the PIP_KEYRING_PROVIDER variable and setting it to
|
||||||
@ -135,7 +135,7 @@ WORKDIR /workspace
|
|||||||
COPY requirements/common.txt requirements/common.txt
|
COPY requirements/common.txt requirements/common.txt
|
||||||
COPY requirements/cuda.txt requirements/cuda.txt
|
COPY requirements/cuda.txt requirements/cuda.txt
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \
|
uv pip install --python /opt/venv/bin/python3 --prerelease=allow -r requirements/cuda.txt \
|
||||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||||
|
|
||||||
# CUDA arch list used by torch
|
# CUDA arch list used by torch
|
||||||
@ -303,7 +303,7 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
|||||||
ENV UV_LINK_MODE=copy
|
ENV UV_LINK_MODE=copy
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
|
uv pip install --python /opt/venv/bin/python3 --prerelease=allow -r requirements/build.txt \
|
||||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||||
|
|
||||||
WORKDIR /workspace
|
WORKDIR /workspace
|
||||||
@ -367,7 +367,7 @@ COPY requirements/lint.txt requirements/lint.txt
|
|||||||
COPY requirements/test.txt requirements/test.txt
|
COPY requirements/test.txt requirements/test.txt
|
||||||
COPY requirements/dev.txt requirements/dev.txt
|
COPY requirements/dev.txt requirements/dev.txt
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \
|
uv pip install --python /opt/venv/bin/python3 --prerelease=allow -r requirements/dev.txt \
|
||||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||||
#################### DEV IMAGE ####################
|
#################### DEV IMAGE ####################
|
||||||
#################### vLLM installation IMAGE ####################
|
#################### vLLM installation IMAGE ####################
|
||||||
@ -465,7 +465,7 @@ ARG PYTORCH_CUDA_INDEX_BASE_URL
|
|||||||
COPY requirements/common.txt /tmp/common.txt
|
COPY requirements/common.txt /tmp/common.txt
|
||||||
COPY requirements/cuda.txt /tmp/requirements-cuda.txt
|
COPY requirements/cuda.txt /tmp/requirements-cuda.txt
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
uv pip install --system -r /tmp/requirements-cuda.txt \
|
uv pip install --system --prerelease=allow -r /tmp/requirements-cuda.txt \
|
||||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && \
|
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && \
|
||||||
rm /tmp/requirements-cuda.txt /tmp/common.txt
|
rm /tmp/requirements-cuda.txt /tmp/common.txt
|
||||||
|
|
||||||
@ -522,9 +522,10 @@ ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER
|
|||||||
# Install vllm wheel first, so that torch etc will be installed.
|
# Install vllm wheel first, so that torch etc will be installed.
|
||||||
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
|
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
|
||||||
--mount=type=cache,target=/root/.cache/uv \
|
--mount=type=cache,target=/root/.cache/uv \
|
||||||
uv pip install --system dist/*.whl --verbose \
|
uv pip install --prerelease=allow --system dist/*.whl --verbose \
|
||||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||||
|
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
. /etc/environment && \
|
. /etc/environment && \
|
||||||
uv pip list
|
uv pip list
|
||||||
@ -544,7 +545,7 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
|
|||||||
# Install EP kernels wheels (pplx-kernels and DeepEP) that have been built in the `build` stage
|
# Install EP kernels wheels (pplx-kernels and DeepEP) that have been built in the `build` stage
|
||||||
RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \
|
RUN --mount=type=bind,from=build,src=/tmp/ep_kernels_workspace/dist,target=/vllm-workspace/ep_kernels/dist \
|
||||||
--mount=type=cache,target=/root/.cache/uv \
|
--mount=type=cache,target=/root/.cache/uv \
|
||||||
uv pip install --system ep_kernels/dist/*.whl --verbose \
|
uv pip install --prerelease=allow --system ep_kernels/dist/*.whl --verbose \
|
||||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||||
|
|
||||||
# CUDA image changed from /usr/local/nvidia to /usr/local/cuda in 12.8 but will
|
# CUDA image changed from /usr/local/nvidia to /usr/local/cuda in 12.8 but will
|
||||||
@ -587,7 +588,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
|
|||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
|
CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
|
||||||
if [ "$CUDA_MAJOR" -ge 12 ]; then \
|
if [ "$CUDA_MAJOR" -ge 12 ]; then \
|
||||||
uv pip install --system -r requirements/dev.txt \
|
uv pip install --prerelease=allow --system -r requirements/dev.txt \
|
||||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
@ -26,7 +26,7 @@ FROM ubuntu:22.04 AS base-common
|
|||||||
WORKDIR /workspace/
|
WORKDIR /workspace/
|
||||||
|
|
||||||
ARG PYTHON_VERSION=3.12
|
ARG PYTHON_VERSION=3.12
|
||||||
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
|
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/test/cpu"
|
||||||
|
|
||||||
# Install minimal dependencies and uv
|
# Install minimal dependencies and uv
|
||||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||||
|
|||||||
@ -6,7 +6,7 @@ requires = [
|
|||||||
"packaging>=24.2",
|
"packaging>=24.2",
|
||||||
"setuptools>=77.0.3,<81.0.0",
|
"setuptools>=77.0.3,<81.0.0",
|
||||||
"setuptools-scm>=8.0",
|
"setuptools-scm>=8.0",
|
||||||
"torch == 2.9.1",
|
"torch == 2.10.0",
|
||||||
"wheel",
|
"wheel",
|
||||||
"jinja2",
|
"jinja2",
|
||||||
]
|
]
|
||||||
|
|||||||
@ -4,7 +4,7 @@ ninja
|
|||||||
packaging>=24.2
|
packaging>=24.2
|
||||||
setuptools>=77.0.3,<81.0.0
|
setuptools>=77.0.3,<81.0.0
|
||||||
setuptools-scm>=8
|
setuptools-scm>=8
|
||||||
torch==2.9.1
|
torch==2.10.0
|
||||||
wheel
|
wheel
|
||||||
jinja2>=3.1.6
|
jinja2>=3.1.6
|
||||||
regex
|
regex
|
||||||
|
|||||||
@ -5,9 +5,9 @@ numba == 0.61.2 # Required for N-gram speculative decoding
|
|||||||
|
|
||||||
# Dependencies for NVIDIA GPUs
|
# Dependencies for NVIDIA GPUs
|
||||||
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
|
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
|
||||||
torch==2.9.1
|
torch==2.10.0
|
||||||
torchaudio==2.9.1
|
torchaudio==2.10.0
|
||||||
# These must be updated alongside torch
|
# These must be updated alongside torch
|
||||||
torchvision==0.24.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
|
torchvision==0.25.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
|
||||||
# FlashInfer should be updated together with the Dockerfile
|
# FlashInfer should be updated together with the Dockerfile
|
||||||
flashinfer-python==0.5.3
|
flashinfer-python==0.5.3
|
||||||
|
|||||||
@ -1,12 +1,11 @@
|
|||||||
# Common dependencies
|
# Common dependencies
|
||||||
-r common.txt
|
-r common.txt
|
||||||
|
|
||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.4
|
--extra-index-url https://download.pytorch.org/whl/test/rocm7.0
|
||||||
torch==2.9.1
|
torch==2.10.0
|
||||||
torchvision==0.24.1
|
torchvision==0.25.0
|
||||||
torchaudio==2.9.1
|
torchaudio==2.10.0
|
||||||
|
triton==3.6.0
|
||||||
triton==3.5.1
|
|
||||||
cmake>=3.26.1,<4
|
cmake>=3.26.1,<4
|
||||||
packaging>=24.2
|
packaging>=24.2
|
||||||
setuptools>=77.0.3,<80.0.0
|
setuptools>=77.0.3,<80.0.0
|
||||||
|
|||||||
@ -24,9 +24,9 @@ soundfile # required for audio tests
|
|||||||
jiwer # required for audio tests
|
jiwer # required for audio tests
|
||||||
tblib # for pickling test exceptions
|
tblib # for pickling test exceptions
|
||||||
timm >=1.0.17 # required for internvl and gemma3n-mm test
|
timm >=1.0.17 # required for internvl and gemma3n-mm test
|
||||||
torch==2.9.1
|
torch==2.10.0
|
||||||
torchaudio==2.9.1
|
torchaudio==2.10.0
|
||||||
torchvision==0.24.1
|
torchvision==0.25.0
|
||||||
transformers_stream_generator # required for qwen-vl test
|
transformers_stream_generator # required for qwen-vl test
|
||||||
matplotlib # required for qwen-vl test
|
matplotlib # required for qwen-vl test
|
||||||
mistral_common[image,audio] >= 1.8.5 # required for voxtral test
|
mistral_common[image,audio] >= 1.8.5 # required for voxtral test
|
||||||
|
|||||||
@ -608,7 +608,7 @@ nvidia-nvjitlink-cu12==12.9.86
|
|||||||
# nvidia-cusolver-cu12
|
# nvidia-cusolver-cu12
|
||||||
# nvidia-cusparse-cu12
|
# nvidia-cusparse-cu12
|
||||||
# torch
|
# torch
|
||||||
nvidia-nvshmem-cu12==3.3.20
|
nvidia-nvshmem-cu12==3.4.5
|
||||||
# via torch
|
# via torch
|
||||||
nvidia-nvtx-cu12==12.9.79
|
nvidia-nvtx-cu12==12.9.79
|
||||||
# via torch
|
# via torch
|
||||||
@ -1123,7 +1123,7 @@ tomli==2.2.1
|
|||||||
# via schemathesis
|
# via schemathesis
|
||||||
tomli-w==1.2.0
|
tomli-w==1.2.0
|
||||||
# via schemathesis
|
# via schemathesis
|
||||||
torch==2.9.1+cu129
|
torch==2.10.0+cu129
|
||||||
# via
|
# via
|
||||||
# -r requirements/test.in
|
# -r requirements/test.in
|
||||||
# accelerate
|
# accelerate
|
||||||
@ -1152,7 +1152,7 @@ torch==2.9.1+cu129
|
|||||||
# torchvision
|
# torchvision
|
||||||
# vector-quantize-pytorch
|
# vector-quantize-pytorch
|
||||||
# vocos
|
# vocos
|
||||||
torchaudio==2.9.1+cu129
|
torchaudio==2.10.0+cu129
|
||||||
# via
|
# via
|
||||||
# -r requirements/test.in
|
# -r requirements/test.in
|
||||||
# encodec
|
# encodec
|
||||||
@ -1165,7 +1165,7 @@ torchmetrics==1.7.4
|
|||||||
# pytorch-lightning
|
# pytorch-lightning
|
||||||
# terratorch
|
# terratorch
|
||||||
# torchgeo
|
# torchgeo
|
||||||
torchvision==0.24.1+cu129
|
torchvision==0.25.0+cu129
|
||||||
# via
|
# via
|
||||||
# -r requirements/test.in
|
# -r requirements/test.in
|
||||||
# lightly
|
# lightly
|
||||||
@ -1206,7 +1206,7 @@ transformers==4.57.3
|
|||||||
# transformers-stream-generator
|
# transformers-stream-generator
|
||||||
transformers-stream-generator==0.0.5
|
transformers-stream-generator==0.0.5
|
||||||
# via -r requirements/test.in
|
# via -r requirements/test.in
|
||||||
triton==3.5.1
|
triton==3.6.0
|
||||||
# via torch
|
# via torch
|
||||||
tritonclient==2.51.0
|
tritonclient==2.51.0
|
||||||
# via
|
# via
|
||||||
|
|||||||
@ -56,7 +56,7 @@ def use_vllm_config(vllm_config: VllmConfig):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
not is_torch_equal_or_newer("2.10.0.dev"), reason="requires torch 2.10"
|
not is_torch_equal_or_newer("2.10.0"), reason="requires torch 2.10"
|
||||||
)
|
)
|
||||||
def test_no_dynamo_cache_entry(monkeypatch: pytest.MonkeyPatch):
|
def test_no_dynamo_cache_entry(monkeypatch: pytest.MonkeyPatch):
|
||||||
with monkeypatch.context() as m:
|
with monkeypatch.context() as m:
|
||||||
@ -80,7 +80,7 @@ def test_no_dynamo_cache_entry(monkeypatch: pytest.MonkeyPatch):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
not is_torch_equal_or_newer("2.10.0.dev"), reason="requires torch 2.10"
|
not is_torch_equal_or_newer("2.10.0"), reason="requires torch 2.10"
|
||||||
)
|
)
|
||||||
def test_force_aot_load(monkeypatch: pytest.MonkeyPatch):
|
def test_force_aot_load(monkeypatch: pytest.MonkeyPatch):
|
||||||
with tempfile.TemporaryDirectory() as tmpdirname, monkeypatch.context() as m:
|
with tempfile.TemporaryDirectory() as tmpdirname, monkeypatch.context() as m:
|
||||||
@ -94,7 +94,7 @@ def test_force_aot_load(monkeypatch: pytest.MonkeyPatch):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
not is_torch_equal_or_newer("2.10.0.dev"), reason="requires torch 2.10"
|
not is_torch_equal_or_newer("2.10.0"), reason="requires torch 2.10"
|
||||||
)
|
)
|
||||||
def test_save_and_load(monkeypatch: pytest.MonkeyPatch):
|
def test_save_and_load(monkeypatch: pytest.MonkeyPatch):
|
||||||
with monkeypatch.context() as m:
|
with monkeypatch.context() as m:
|
||||||
@ -116,7 +116,7 @@ def test_save_and_load(monkeypatch: pytest.MonkeyPatch):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
not is_torch_equal_or_newer("2.10.0.dev"), reason="requires torch 2.10"
|
not is_torch_equal_or_newer("2.10.0"), reason="requires torch 2.10"
|
||||||
)
|
)
|
||||||
def test_shape_env(monkeypatch: pytest.MonkeyPatch):
|
def test_shape_env(monkeypatch: pytest.MonkeyPatch):
|
||||||
"""
|
"""
|
||||||
@ -149,7 +149,7 @@ def test_shape_env(monkeypatch: pytest.MonkeyPatch):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
not is_torch_equal_or_newer("2.10.0.dev"), reason="requires torch 2.10"
|
not is_torch_equal_or_newer("2.10.0"), reason="requires torch 2.10"
|
||||||
)
|
)
|
||||||
@create_new_process_for_each_test("spawn")
|
@create_new_process_for_each_test("spawn")
|
||||||
def test_gpt2_cache_hit(monkeypatch: pytest.MonkeyPatch):
|
def test_gpt2_cache_hit(monkeypatch: pytest.MonkeyPatch):
|
||||||
|
|||||||
@ -40,7 +40,7 @@ def get_test_models():
|
|||||||
@pytest.mark.parametrize("use_bytecode_hook", [True, False])
|
@pytest.mark.parametrize("use_bytecode_hook", [True, False])
|
||||||
@pytest.mark.parametrize("evaluate_guards", [False, True])
|
@pytest.mark.parametrize("evaluate_guards", [False, True])
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
not is_torch_equal_or_newer("2.10.0.dev"), reason="requires torch 2.10"
|
not is_torch_equal_or_newer("2.10.0"), reason="requires torch 2.10"
|
||||||
)
|
)
|
||||||
def test_dynamic_shapes_compilation(
|
def test_dynamic_shapes_compilation(
|
||||||
monkeypatch,
|
monkeypatch,
|
||||||
|
|||||||
@ -67,7 +67,8 @@ apt autoremove -y
|
|||||||
|
|
||||||
echo 'import os; os.system("touch /tmp/changed.file")' >> vllm/__init__.py
|
echo 'import os; os.system("touch /tmp/changed.file")' >> vllm/__init__.py
|
||||||
|
|
||||||
VLLM_PRECOMPILED_WHEEL_COMMIT=$merge_base_commit VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e .
|
|
||||||
|
VLLM_PRECOMPILED_WHEEL_COMMIT=$merge_base_commit VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e . --extra-index-url https://download.pytorch.org/whl/test/cu129/
|
||||||
|
|
||||||
# Run the script
|
# Run the script
|
||||||
python3 -c 'import vllm'
|
python3 -c 'import vllm'
|
||||||
|
|||||||
@ -316,7 +316,7 @@ def _support_torch_compile(
|
|||||||
def _mark_dynamic_inputs(mod, type, *args, **kwargs):
|
def _mark_dynamic_inputs(mod, type, *args, **kwargs):
|
||||||
def mark_dynamic(arg, dims):
|
def mark_dynamic(arg, dims):
|
||||||
if type == DynamicShapesType.UNBACKED:
|
if type == DynamicShapesType.UNBACKED:
|
||||||
if is_torch_equal_or_newer("2.10.0.dev"):
|
if is_torch_equal_or_newer("2.10.0"):
|
||||||
for dim in dims:
|
for dim in dims:
|
||||||
torch._dynamo.decorators.mark_unbacked(
|
torch._dynamo.decorators.mark_unbacked(
|
||||||
arg, dim, hint_override=arg.size()[dim]
|
arg, dim, hint_override=arg.size()[dim]
|
||||||
@ -356,7 +356,7 @@ def _support_torch_compile(
|
|||||||
if isinstance(arg, torch.Tensor):
|
if isinstance(arg, torch.Tensor):
|
||||||
# In case dims is specified with negative indexing
|
# In case dims is specified with negative indexing
|
||||||
dims = [arg.ndim + dim if dim < 0 else dim for dim in dims]
|
dims = [arg.ndim + dim if dim < 0 else dim for dim in dims]
|
||||||
if is_torch_equal_or_newer("2.10.0.dev"):
|
if is_torch_equal_or_newer("2.10.0"):
|
||||||
for dim in dims:
|
for dim in dims:
|
||||||
torch._dynamo.decorators.mark_unbacked(
|
torch._dynamo.decorators.mark_unbacked(
|
||||||
arg, dim, hint_override=arg.size()[dim]
|
arg, dim, hint_override=arg.size()[dim]
|
||||||
@ -496,9 +496,9 @@ def _support_torch_compile(
|
|||||||
fx_config_patches["backed_size_oblivious"] = True
|
fx_config_patches["backed_size_oblivious"] = True
|
||||||
|
|
||||||
# Prepare inductor config patches
|
# Prepare inductor config patches
|
||||||
# assume_32bit_indexing is only available in torch 2.10.0.dev+
|
# assume_32bit_indexing is only available in torch 2.10.0+
|
||||||
inductor_config_patches = {}
|
inductor_config_patches = {}
|
||||||
if is_torch_equal_or_newer("2.10.0.dev"):
|
if is_torch_equal_or_newer("2.10.0"):
|
||||||
inductor_config_patches["assume_32bit_indexing"] = True
|
inductor_config_patches["assume_32bit_indexing"] = True
|
||||||
|
|
||||||
with (
|
with (
|
||||||
|
|||||||
@ -286,7 +286,7 @@ def use_aot_compile() -> bool:
|
|||||||
|
|
||||||
default_value = (
|
default_value = (
|
||||||
"1"
|
"1"
|
||||||
if is_torch_equal_or_newer("2.10.0.dev") and not disable_compile_cache()
|
if is_torch_equal_or_newer("2.10.0") and not disable_compile_cache()
|
||||||
else "0"
|
else "0"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -974,7 +974,7 @@ def enable_batch_invariant_mode():
|
|||||||
)
|
)
|
||||||
|
|
||||||
reduced_precision_val = (
|
reduced_precision_val = (
|
||||||
(False, False) if is_torch_equal_or_newer("2.10.0.dev") else False
|
(False, False) if is_torch_equal_or_newer("2.10.0") else False
|
||||||
)
|
)
|
||||||
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = (
|
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = (
|
||||||
reduced_precision_val
|
reduced_precision_val
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user