mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 09:55:38 +08:00
[CI][CPU] Smoke test for Apple Silicon using GHA MacOS runner (#28688)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
5f3cd7f7f2
commit
d54a18a47e
73
.github/workflows/macos-smoke-test.yml
vendored
Normal file
73
.github/workflows/macos-smoke-test.yml
vendored
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
name: macOS Apple Silicon Smoke Test
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch: # Manual trigger
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
macos-m1-smoke-test:
|
||||||
|
runs-on: macos-latest
|
||||||
|
timeout-minutes: 20
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: astral-sh/setup-uv@v4
|
||||||
|
with:
|
||||||
|
enable-cache: true
|
||||||
|
python-version: '3.12'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
uv pip install -r requirements/cpu-build.txt
|
||||||
|
uv pip install -r requirements/cpu.txt
|
||||||
|
|
||||||
|
- name: Build vLLM
|
||||||
|
run: uv pip install -v -e .
|
||||||
|
env:
|
||||||
|
CMAKE_BUILD_PARALLEL_LEVEL: 4
|
||||||
|
|
||||||
|
- name: Verify installation
|
||||||
|
run: |
|
||||||
|
python -c "import vllm; print(f'vLLM version: {vllm.__version__}')"
|
||||||
|
python -c "import torch; print(f'PyTorch: {torch.__version__}')"
|
||||||
|
|
||||||
|
- name: Smoke test vllm serve
|
||||||
|
timeout-minutes: 10
|
||||||
|
run: |
|
||||||
|
# Start server in background
|
||||||
|
vllm serve Qwen/Qwen3-0.6B \
|
||||||
|
--max-model-len=2048 \
|
||||||
|
--load-format=dummy \
|
||||||
|
--enforce-eager \
|
||||||
|
--port 8000 &
|
||||||
|
|
||||||
|
SERVER_PID=$!
|
||||||
|
|
||||||
|
# Wait for server to start
|
||||||
|
for i in {1..30}; do
|
||||||
|
if curl -s http://localhost:8000/health > /dev/null; then
|
||||||
|
echo "Server started successfully"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if [ "$i" -eq 30 ]; then
|
||||||
|
echo "Server failed to start"
|
||||||
|
kill "$SERVER_PID"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
|
||||||
|
# Test health endpoint
|
||||||
|
curl -f http://localhost:8000/health
|
||||||
|
|
||||||
|
# Test completion
|
||||||
|
curl -f http://localhost:8000/v1/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"prompt": "Hello",
|
||||||
|
"max_tokens": 5
|
||||||
|
}'
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
kill "$SERVER_PID"
|
||||||
Loading…
x
Reference in New Issue
Block a user