diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index b1602dd9496ba..95cd5b1989ee2 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -184,6 +184,7 @@ steps:
 
 - label: Distributed Tests (A100)
   gpu: a100
+  num_gpus: 4
   commands: 
   # NOTE: don't test llama model here, it seems hf implementation is buggy
   # see https://github.com/vllm-project/vllm/pull/5689 for details
diff --git a/.buildkite/test-template-aws.j2 b/.buildkite/test-template-aws.j2
index 08146bf4454cc..fb34b787e0cbd 100644
--- a/.buildkite/test-template-aws.j2
+++ b/.buildkite/test-template-aws.j2
@@ -75,7 +75,7 @@ steps:
             - "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'"
             resources:
               limits:
-                nvidia.com/gpu: 8
+                nvidia.com/gpu: {{ step.num_gpus or 1 }}
             volumeMounts:
             - name: devshm
               mountPath: /dev/shm