mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-16 14:07:13 +08:00
updated
Signed-off-by: Robert Shaw <robshaw@redhat.com>
This commit is contained in:
parent
550f8a052c
commit
13729ad0af
18
Dockerfile
18
Dockerfile
@ -1,12 +1,20 @@
|
||||
ARG CUDA_VERSION=12.8.1
|
||||
from nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
|
||||
|
||||
RUN wget -qO- https://astral.sh/uv/install.sh | sh
|
||||
RUN apt update && apt install git -y && apt install curl -y
|
||||
|
||||
WORKDIR /workspace
|
||||
RUN git clone https://github.com/vllm-project/vllm.git && \
|
||||
VLLM_USE_PRECOMPILED=1 uv pip install -e .
|
||||
RUN git clone https://github.com/vllm-project/vllm.git
|
||||
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
||||
|
||||
# Install vllm.
|
||||
WORKDIR /workspace/vllm
|
||||
ENV VLLM_SHA=8ce3cad72fbd0dc6524e495ecddbbc58fd8fd09e
|
||||
RUN uv venv .vllm --python 3.12
|
||||
RUN . .vllm/bin/activate && VLLM_USE_PRECOMPILED=1 uv pip install -e .
|
||||
|
||||
# Checkout a specific commit.
|
||||
ENV VLLM_SHA=550f8a052cae03c7e14a46767f689ab09c1cc28d
|
||||
RUN git fetch && git checkout ${VLLM_SHA}
|
||||
|
||||
ENTRYPOINT ["/bin/bash"]
|
||||
|
||||
53
benchmarks/kernels/Justfile
Normal file
53
benchmarks/kernels/Justfile
Normal file
@ -0,0 +1,53 @@
|
||||
llama-scout-bf16:
|
||||
python3 benchmark_moe.py \
|
||||
--model meta-llama/Llama-4-Scout-17B-16E-Instruct \
|
||||
--tp-size 1 \
|
||||
--ep-size 8 \
|
||||
--tune
|
||||
|
||||
llama-scout-fp8:
|
||||
python3 benchmark_moe.py \
|
||||
--model meta-llama/Llama-4-Scout-17B-16E-Instruct \
|
||||
--tp-size 1 \
|
||||
--ep-size 8 \
|
||||
--dtype fp8_w8a8 \
|
||||
--tune
|
||||
|
||||
llama-maverick:
|
||||
python3 benchmark_moe.py \
|
||||
--model meta-llama/Llama-4-Maverick-17B-128E-Instruct \
|
||||
--tp-size 1 \
|
||||
--ep-size 8 \
|
||||
--dtype fp8_w8a8 \
|
||||
--tune
|
||||
|
||||
qwen-30b:
|
||||
python3 benchmark_moe.py \
|
||||
--model Qwen/Qwen3-30B-A3B \
|
||||
--tp-size 1 \
|
||||
--ep-size 8 \
|
||||
--tune
|
||||
|
||||
qwen-30b-fp8:
|
||||
python3 benchmark_moe.py \
|
||||
--model Qwen/Qwen3-30B-A3B-FP8 \
|
||||
--tp-size 1 \
|
||||
--ep-size 8 \
|
||||
--dtype fp8_w8a8 \
|
||||
--tune
|
||||
|
||||
qwen-235b:
|
||||
python3 benchmark_moe.py \
|
||||
--model Qwen/Qwen3-235B-A22B \
|
||||
--tp-size 1 \
|
||||
--ep-size 8 \
|
||||
--dtype fp8_w8a8 \
|
||||
--tune
|
||||
|
||||
deepseek-r1:
|
||||
python3 benchmark_moe.py \
|
||||
--model deepseek-ai/DeepSeek-R1-0528 \
|
||||
--tp-size 1 \
|
||||
--ep-size 8 \
|
||||
--dtype fp8_w8a8 \
|
||||
--tune
|
||||
Loading…
x
Reference in New Issue
Block a user