mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 17:45:19 +08:00
[CI][CPU] Smoke test for Apple Silicon using GHA MacOS runner (#28688)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
5f3cd7f7f2
commit
d54a18a47e
73
.github/workflows/macos-smoke-test.yml
vendored
Normal file
73
.github/workflows/macos-smoke-test.yml
vendored
Normal file
@ -0,0 +1,73 @@
|
||||
name: macOS Apple Silicon Smoke Test
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Manual trigger
|
||||
|
||||
jobs:
|
||||
macos-m1-smoke-test:
|
||||
runs-on: macos-latest
|
||||
timeout-minutes: 20
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv pip install -r requirements/cpu-build.txt
|
||||
uv pip install -r requirements/cpu.txt
|
||||
|
||||
- name: Build vLLM
|
||||
run: uv pip install -v -e .
|
||||
env:
|
||||
CMAKE_BUILD_PARALLEL_LEVEL: 4
|
||||
|
||||
- name: Verify installation
|
||||
run: |
|
||||
python -c "import vllm; print(f'vLLM version: {vllm.__version__}')"
|
||||
python -c "import torch; print(f'PyTorch: {torch.__version__}')"
|
||||
|
||||
- name: Smoke test vllm serve
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
# Start server in background
|
||||
vllm serve Qwen/Qwen3-0.6B \
|
||||
--max-model-len=2048 \
|
||||
--load-format=dummy \
|
||||
--enforce-eager \
|
||||
--port 8000 &
|
||||
|
||||
SERVER_PID=$!
|
||||
|
||||
# Wait for server to start
|
||||
for i in {1..30}; do
|
||||
if curl -s http://localhost:8000/health > /dev/null; then
|
||||
echo "Server started successfully"
|
||||
break
|
||||
fi
|
||||
if [ "$i" -eq 30 ]; then
|
||||
echo "Server failed to start"
|
||||
kill "$SERVER_PID"
|
||||
exit 1
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
|
||||
# Test health endpoint
|
||||
curl -f http://localhost:8000/health
|
||||
|
||||
# Test completion
|
||||
curl -f http://localhost:8000/v1/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "Qwen/Qwen3-0.6B",
|
||||
"prompt": "Hello",
|
||||
"max_tokens": 5
|
||||
}'
|
||||
|
||||
# Cleanup
|
||||
kill "$SERVER_PID"
|
||||
Loading…
x
Reference in New Issue
Block a user