name: macOS Apple Silicon Smoke Test on: push: branches: - main workflow_dispatch: # Manual trigger jobs: macos-m1-smoke-test: runs-on: macos-latest timeout-minutes: 30 steps: - uses: actions/checkout@v6.0.1 - uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: | requirements/**/*.txt pyproject.toml python-version: '3.12' - name: Create virtual environment run: | uv venv echo "$GITHUB_WORKSPACE/.venv/bin" >> "$GITHUB_PATH" - name: Install dependencies and build vLLM run: | uv pip install -r requirements/cpu.txt --index-strategy unsafe-best-match uv pip install -e . env: CMAKE_BUILD_PARALLEL_LEVEL: 4 - name: Verify installation run: | python -c "import vllm; print(f'vLLM version: {vllm.__version__}')" - name: Smoke test vllm serve run: | # Start server in background vllm serve Qwen/Qwen3-0.6B \ --max-model-len=2K \ --load-format=dummy \ --hf-overrides '{"num_hidden_layers": 2}' \ --enforce-eager \ --port 8000 & SERVER_PID=$! # Wait for server to start for i in {1..30}; do if curl -s http://localhost:8000/health > /dev/null; then echo "Server started successfully" break fi if [ "$i" -eq 30 ]; then echo "Server failed to start" kill "$SERVER_PID" exit 1 fi sleep 2 done # Test health endpoint curl -f http://localhost:8000/health # Test completion curl -f http://localhost:8000/v1/completions \ -H "Content-Type: application/json" \ -d '{ "model": "Qwen/Qwen3-0.6B", "prompt": "Hello", "max_tokens": 5 }' # Cleanup kill "$SERVER_PID"