[BugFix] Fix torchrun DP with LLM class (#27395)

Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com>
2025-12-16 09:25:34 +08:00 · 2025-10-24 01:11:37 -07:00 · 2025-10-24 01:11:37 -07:00 · e0ef8a2920
commit e0ef8a2920
parent 42efe609ba
1 changed files with 4 additions and 3 deletions
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@ -286,10 +286,11 @@ class LLM:
            structured_outputs_instance = StructuredOutputsConfig()

        # warn about single-process data parallel usage.
-        _dps = int(kwargs.get("data_parallel_size", 1))
-        if _dps > 1:
+        _dp_size = int(kwargs.get("data_parallel_size", 1))
+        _distributed_executor_backend = kwargs.get("distributed_executor_backend")
+        if _dp_size > 1 and not _distributed_executor_backend == "external_launcher":
            raise ValueError(
-                f"LLM(data_parallel_size={_dps}) is not supported for single-"
+                f"LLM(data_parallel_size={_dp_size}) is not supported for single-"
                "process usage and may hang. Please use "
                "the explicit multi-process data-parallel example at "
                "'examples/offline_inference/data_parallel.py'."