diff --git a/Dockerfile b/Dockerfile index d66dbba91d376..692d5059b7f49 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,20 @@ ARG CUDA_VERSION=12.8.1 -from nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 +FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 -RUN wget -qO- https://astral.sh/uv/install.sh | sh +RUN apt update && apt install git -y && apt install curl -y WORKDIR /workspace -RUN git clone https://github.com/vllm-project/vllm.git && \ - VLLM_USE_PRECOMPILED=1 uv pip install -e . +RUN git clone https://github.com/vllm-project/vllm.git +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ + +# Install vllm. WORKDIR /workspace/vllm -ENV VLLM_SHA=8ce3cad72fbd0dc6524e495ecddbbc58fd8fd09e +RUN uv venv .vllm --python 3.12 +RUN . .vllm/bin/activate && VLLM_USE_PRECOMPILED=1 uv pip install -e . + +# Checkout a specific commit. +ENV VLLM_SHA=550f8a052cae03c7e14a46767f689ab09c1cc28d RUN git fetch && git checkout ${VLLM_SHA} + +ENTRYPOINT ["/bin/bash"] diff --git a/benchmarks/kernels/Justfile b/benchmarks/kernels/Justfile new file mode 100644 index 0000000000000..6b2a8c67357bb --- /dev/null +++ b/benchmarks/kernels/Justfile @@ -0,0 +1,53 @@ +llama-scout-bf16: + python3 benchmark_moe.py \ + --model meta-llama/Llama-4-Scout-17B-16E-Instruct \ + --tp-size 1 \ + --ep-size 8 \ + --tune + +llama-scout-fp8: + python3 benchmark_moe.py \ + --model meta-llama/Llama-4-Scout-17B-16E-Instruct \ + --tp-size 1 \ + --ep-size 8 \ + --dtype fp8_w8a8 \ + --tune + +llama-maverick: + python3 benchmark_moe.py \ + --model meta-llama/Llama-4-Maverick-17B-128E-Instruct \ + --tp-size 1 \ + --ep-size 8 \ + --dtype fp8_w8a8 \ + --tune + +qwen-30b: + python3 benchmark_moe.py \ + --model Qwen/Qwen3-30B-A3B \ + --tp-size 1 \ + --ep-size 8 \ + --tune + +qwen-30b-fp8: + python3 benchmark_moe.py \ + --model Qwen/Qwen3-30B-A3B-FP8 \ + --tp-size 1 \ + --ep-size 8 \ + --dtype fp8_w8a8 \ + --tune + +qwen-235b: + python3 benchmark_moe.py \ + --model Qwen/Qwen3-235B-A22B \ + --tp-size 1 \ + --ep-size 8 \ + --dtype fp8_w8a8 \ + --tune + +deepseek-r1: + python3 benchmark_moe.py \ + --model deepseek-ai/DeepSeek-R1-0528 \ + --tp-size 1 \ + --ep-size 8 \ + --dtype fp8_w8a8 \ + --tune