[CI] Shard tests for LoRA and Kernels to speed up (#3445)

This commit is contained in:
Simon Mo 2024-03-17 14:56:30 -07:00 committed by GitHub
parent abfc4f3387
commit 93348d9458
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 10 additions and 5 deletions

View File

@ -33,9 +33,9 @@ steps:
- label: Entrypoints Test - label: Entrypoints Test
command: pytest -v -s entrypoints command: pytest -v -s entrypoints
- label: Kernels Test - label: Kernels Test %N
command: pytest -v -s kernels command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
soft_fail: true parallelism: 4
- label: Models Test - label: Models Test
commands: commands:
@ -55,8 +55,9 @@ steps:
- label: Speculative decoding tests - label: Speculative decoding tests
command: pytest -v -s spec_decode command: pytest -v -s spec_decode
- label: LoRA Test - label: LoRA Test %N
command: pytest -v -s lora --forked command: pytest -v -s lora --forked --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
parallelism: 4
- label: Metrics Test - label: Metrics Test
command: pytest -v -s metrics command: pytest -v -s metrics

View File

@ -20,6 +20,9 @@ steps:
agents: agents:
queue: kubernetes queue: kubernetes
soft_fail: {{ step.soft_fail or false }} soft_fail: {{ step.soft_fail or false }}
{% if step.parallelism %}
parallelism: {{ step.parallelism }}
{% endif %}
retry: retry:
automatic: automatic:
- exit_status: -1 # Agent was lost - exit_status: -1 # Agent was lost

View File

@ -16,6 +16,7 @@ pytest
pytest-forked pytest-forked
pytest-asyncio pytest-asyncio
pytest-rerunfailures pytest-rerunfailures
pytest-shard
httpx httpx
einops # required for MPT einops # required for MPT
openai openai