[Doc] Polish example for torchrun dp (#25899)

This commit is contained in:
Zhuohan Li 2025-09-29 14:31:34 -07:00 committed by GitHub
parent fea3e476aa
commit 8eb0a1d906
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4,6 +4,11 @@
experimental support for data-parallel inference with torchrun experimental support for data-parallel inference with torchrun
Note the data load balancing and distribution is done out of the vllm engine, Note the data load balancing and distribution is done out of the vllm engine,
no internal lb supported in external_launcher mode. no internal lb supported in external_launcher mode.
To run this example:
```bash
$ torchrun --nproc-per-node=2 examples/offline_inference/torchrun_dp_example.py
```
""" """
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
@ -14,7 +19,7 @@ prompts = [
"The president of the United States is", "The president of the United States is",
"The capital of France is", "The capital of France is",
"The future of AI is", "The future of AI is",
] * 50 ]
# Create sampling parameters, the same across all ranks # Create sampling parameters, the same across all ranks
sampling_params = SamplingParams(temperature=0.8, top_p=0.95) sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
@ -45,14 +50,13 @@ prompts = [
outputs = llm.generate(prompts, sampling_params) outputs = llm.generate(prompts, sampling_params)
# all ranks will have the same outputs
print("-" * 50)
for output in outputs: for output in outputs:
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text generated_text = output.outputs[0].text
print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}\n") print(
print("-" * 50) f"DP Rank: {dp_rank} Prompt: {prompt!r}\nGenerated text: {generated_text!r}\n"
)
""" """
Further tips: Further tips: