mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-23 07:45:01 +08:00
[Build/CI] Enabling AMD Entrypoints Test (#4834)
Co-authored-by: Alexey Kondratiev <alexey.kondratiev@amd.com>
This commit is contained in:
parent
546a97ef69
commit
943e72ca56
@ -60,7 +60,8 @@ steps:
|
|||||||
command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
|
command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
|
||||||
|
|
||||||
- label: Entrypoints Test
|
- label: Entrypoints Test
|
||||||
#mirror_hardwares: [amd]
|
mirror_hardwares: [amd]
|
||||||
|
|
||||||
commands:
|
commands:
|
||||||
# these tests have to be separated, because each one will allocate all posible GPU memory
|
# these tests have to be separated, because each one will allocate all posible GPU memory
|
||||||
- pytest -v -s entrypoints --ignore=entrypoints/test_server_oot_registration.py
|
- pytest -v -s entrypoints --ignore=entrypoints/test_server_oot_registration.py
|
||||||
|
|||||||
@ -92,19 +92,23 @@ RUN if [ "$BUILD_TRITON" = "1" ]; then \
|
|||||||
WORKDIR /vllm-workspace
|
WORKDIR /vllm-workspace
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
|
#RUN python3 -m pip install pynvml # to be removed eventually
|
||||||
RUN python3 -m pip install --upgrade pip numba
|
RUN python3 -m pip install --upgrade pip numba
|
||||||
|
|
||||||
# make sure punica kernels are built (for LoRA)
|
# make sure punica kernels are built (for LoRA)
|
||||||
ENV VLLM_INSTALL_PUNICA_KERNELS=1
|
ENV VLLM_INSTALL_PUNICA_KERNELS=1
|
||||||
|
# Workaround for ray >= 2.10.0
|
||||||
|
ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
|
||||||
|
|
||||||
|
ENV VLLM_NCCL_SO_PATH=/opt/rocm/lib/librccl.so
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
pip install -U -r requirements-rocm.txt \
|
pip install -U -r requirements-rocm.txt \
|
||||||
&& patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h ./rocm_patch/rocm_bf16.patch \
|
&& patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h ./rocm_patch/rocm_bf16.patch \
|
||||||
&& python3 setup.py install \
|
&& python3 setup.py install \
|
||||||
&& cp build/lib.linux-x86_64-cpython-39/vllm/_C.cpython-39-x86_64-linux-gnu.so vllm/ \
|
&& cp build/lib.linux-x86_64-cpython-39/vllm/_C.cpython-39-x86_64-linux-gnu.so vllm/ \
|
||||||
|
&& cp build/lib.linux-x86_64-cpython-39/vllm/_punica_C.cpython-39-x86_64-linux-gnu.so vllm/ \
|
||||||
&& cd ..
|
&& cd ..
|
||||||
|
|
||||||
RUN python3 -m pip install --upgrade pip
|
|
||||||
RUN python3 -m pip install --no-cache-dir ray[all]==2.9.3
|
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
|||||||
@ -2,4 +2,5 @@
|
|||||||
-r requirements-common.txt
|
-r requirements-common.txt
|
||||||
|
|
||||||
# Dependencies for AMD GPUs
|
# Dependencies for AMD GPUs
|
||||||
ray == 2.9.3
|
ray >= 2.10.0
|
||||||
|
pytest-asyncio
|
||||||
|
|||||||
@ -6,8 +6,12 @@ from typing import Dict, List, Optional, Tuple, Union
|
|||||||
import pytest
|
import pytest
|
||||||
import ray
|
import ray
|
||||||
import torch
|
import torch
|
||||||
from pynvml import (nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo,
|
|
||||||
nvmlInit)
|
from vllm.utils import is_hip
|
||||||
|
|
||||||
|
if (not is_hip()):
|
||||||
|
from pynvml import (nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo,
|
||||||
|
nvmlInit)
|
||||||
|
|
||||||
from vllm import LLM
|
from vllm import LLM
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user