[CI][CPU] Smoke test for Apple Silicon using GHA MacOS runner (#28688)

Signed-off-by: mgoin <mgoin64@gmail.com>
2026-03-16 10:07:11 +08:00 · 2025-11-14 11:37:18 -05:00 · 2025-11-14 11:37:18 -05:00 · d54a18a47e
commit d54a18a47e
parent 5f3cd7f7f2
1 changed files with 73 additions and 0 deletions
--- a/.github/workflows/macos-smoke-test.yml
+++ b/.github/workflows/macos-smoke-test.yml
@ -0,0 +1,73 @@
+name: macOS Apple Silicon Smoke Test
+
+on:
+  workflow_dispatch:  # Manual trigger
+
+jobs:
+  macos-m1-smoke-test:
+    runs-on: macos-latest
+    timeout-minutes: 20
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: astral-sh/setup-uv@v4
+        with:
+          enable-cache: true
+          python-version: '3.12'
+
+      - name: Install dependencies
+        run: |
+          uv pip install -r requirements/cpu-build.txt
+          uv pip install -r requirements/cpu.txt
+
+      - name: Build vLLM
+        run: uv pip install -v -e .
+        env:
+          CMAKE_BUILD_PARALLEL_LEVEL: 4
+
+      - name: Verify installation
+        run: |
+          python -c "import vllm; print(f'vLLM version: {vllm.__version__}')"
+          python -c "import torch; print(f'PyTorch: {torch.__version__}')"
+
+      - name: Smoke test vllm serve
+        timeout-minutes: 10
+        run: |
+          # Start server in background
+          vllm serve Qwen/Qwen3-0.6B \
+            --max-model-len=2048 \
+            --load-format=dummy \
+            --enforce-eager \
+            --port 8000 &
+
+          SERVER_PID=$!
+
+          # Wait for server to start
+          for i in {1..30}; do
+            if curl -s http://localhost:8000/health > /dev/null; then
+              echo "Server started successfully"
+              break
+            fi
+            if [ "$i" -eq 30 ]; then
+              echo "Server failed to start"
+              kill "$SERVER_PID"
+              exit 1
+            fi
+            sleep 2
+          done
+
+          # Test health endpoint
+          curl -f http://localhost:8000/health
+
+          # Test completion
+          curl -f http://localhost:8000/v1/completions \
+            -H "Content-Type: application/json" \
+            -d '{
+              "model": "Qwen/Qwen3-0.6B",
+              "prompt": "Hello",
+              "max_tokens": 5
+            }'
+
+          # Cleanup
+          kill "$SERVER_PID"