From 4deb9c88cac5768ddb0124f4cfe9a6766541e828 Mon Sep 17 00:00:00 2001 From: Zhuohan Li Date: Mon, 29 Sep 2025 14:31:34 -0700 Subject: [PATCH] [Doc] Polish example for torchrun dp (#25899) Signed-off-by: yewentao256 --- .../offline_inference/torchrun_dp_example.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/offline_inference/torchrun_dp_example.py b/examples/offline_inference/torchrun_dp_example.py index 8e888a100254e..295d1637528cd 100644 --- a/examples/offline_inference/torchrun_dp_example.py +++ b/examples/offline_inference/torchrun_dp_example.py @@ -4,6 +4,11 @@ experimental support for data-parallel inference with torchrun Note the data load balancing and distribution is done out of the vllm engine, no internal lb supported in external_launcher mode. + +To run this example: +```bash +$ torchrun --nproc-per-node=2 examples/offline_inference/torchrun_dp_example.py +``` """ from vllm import LLM, SamplingParams @@ -14,7 +19,7 @@ prompts = [ "The president of the United States is", "The capital of France is", "The future of AI is", -] * 50 +] # Create sampling parameters, the same across all ranks sampling_params = SamplingParams(temperature=0.8, top_p=0.95) @@ -45,14 +50,13 @@ prompts = [ outputs = llm.generate(prompts, sampling_params) - -# all ranks will have the same outputs -print("-" * 50) for output in outputs: prompt = output.prompt generated_text = output.outputs[0].text - print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}\n") - print("-" * 50) + print( + f"DP Rank: {dp_rank} Prompt: {prompt!r}\nGenerated text: {generated_text!r}\n" + ) + """ Further tips: