updated

Signed-off-by: Robert Shaw <robshaw@redhat.com>
2026-06-27 07:57:12 +08:00 · 2025-07-12 21:57:16 +00:00 · 2025-07-12 21:57:16 +00:00 · 13729ad0af
commit 13729ad0af
parent 550f8a052c
2 changed files with 66 additions and 5 deletions
--- a/18
+++ b/18
@ -1,12 +1,20 @@
 ARG CUDA_VERSION=12.8.1
-from nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
+FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04

-RUN wget -qO- https://astral.sh/uv/install.sh | sh
+RUN apt update && apt install git -y && apt install curl -y
 
 WORKDIR /workspace
-RUN git clone https://github.com/vllm-project/vllm.git && \
-    VLLM_USE_PRECOMPILED=1 uv pip install -e .
+RUN git clone https://github.com/vllm-project/vllm.git

+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+
+# Install vllm.
 WORKDIR /workspace/vllm
-ENV VLLM_SHA=8ce3cad72fbd0dc6524e495ecddbbc58fd8fd09e
+RUN uv venv .vllm --python 3.12
+RUN . .vllm/bin/activate && VLLM_USE_PRECOMPILED=1 uv pip install -e .
+
+# Checkout a specific commit.
+ENV VLLM_SHA=550f8a052cae03c7e14a46767f689ab09c1cc28d
 RUN git fetch && git checkout ${VLLM_SHA}
+
+ENTRYPOINT ["/bin/bash"]
--- a/benchmarks/kernels/Justfile
+++ b/benchmarks/kernels/Justfile
@ -0,0 +1,53 @@
+llama-scout-bf16:
+  python3 benchmark_moe.py \
+    --model meta-llama/Llama-4-Scout-17B-16E-Instruct \
+    --tp-size 1 \
+    --ep-size 8 \
+    --tune
+
+llama-scout-fp8:
+  python3 benchmark_moe.py \
+    --model meta-llama/Llama-4-Scout-17B-16E-Instruct \
+    --tp-size 1 \
+    --ep-size 8 \
+    --dtype fp8_w8a8 \
+    --tune
+
+llama-maverick:
+  python3 benchmark_moe.py \
+    --model meta-llama/Llama-4-Maverick-17B-128E-Instruct \
+    --tp-size 1 \
+    --ep-size 8 \
+    --dtype fp8_w8a8 \
+    --tune
+
+qwen-30b:
+  python3 benchmark_moe.py \
+    --model Qwen/Qwen3-30B-A3B \
+    --tp-size 1 \
+    --ep-size 8 \
+    --tune
+
+qwen-30b-fp8:
+  python3 benchmark_moe.py \
+    --model Qwen/Qwen3-30B-A3B-FP8 \
+    --tp-size 1 \
+    --ep-size 8 \
+    --dtype fp8_w8a8 \
+    --tune
+
+qwen-235b:
+  python3 benchmark_moe.py \
+    --model Qwen/Qwen3-235B-A22B \
+    --tp-size 1 \
+    --ep-size 8 \
+    --dtype fp8_w8a8 \
+    --tune
+
+deepseek-r1:
+  python3 benchmark_moe.py \
+    --model deepseek-ai/DeepSeek-R1-0528 \
+    --tp-size 1 \
+    --ep-size 8 \
+    --dtype fp8_w8a8 \
+    --tune