[bugfix] add seed in torchrun_example.py (#15980)

Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
youkaichao 2025-04-03 12:25:01 +08:00 committed by GitHub
parent 37bfee92bf
commit 8b664706aa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 10 additions and 0 deletions

View File

@ -23,10 +23,14 @@ sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
# Use `distributed_executor_backend="external_launcher"` so that
# this llm engine/instance only creates one worker.
# it is important to set an explicit seed to make sure that
# all ranks have the same random seed, so that sampling can be
# deterministic across ranks.
llm = LLM(
model="facebook/opt-125m",
tensor_parallel_size=2,
distributed_executor_backend="external_launcher",
seed=0,
)
outputs = llm.generate(prompts, sampling_params)

View File

@ -761,6 +761,12 @@ class ModelConfig:
self,
parallel_config: "ParallelConfig",
) -> None:
if parallel_config.distributed_executor_backend == "external_launcher":
assert self.seed is not None, (
"Seed must be set when using external launcher backend to "
"make sure sampling results are the same across workers.")
total_num_attention_heads = getattr(self.hf_text_config,
"num_attention_heads", 0)
tensor_parallel_size = parallel_config.tensor_parallel_size