[BugFix] Set default random seed to 0 for V1 (#17929)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2026-03-20 03:07:57 +08:00 · 2025-05-13 00:52:19 -07:00 · 2025-05-13 00:52:19 -07:00 · 2ff297dce9
commit 2ff297dce9
parent 8dd0671bac
1 changed files with 20 additions and 1 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -261,7 +261,8 @@ class ModelConfig:
    - "float" is shorthand for FP32 precision.\n
    - "float32" for FP32 precision."""
    seed: Optional[int] = None
-    """Random seed for reproducibility."""
+    """Random seed for reproducibility. Initialized to None in V0, but
+    initialized to 0 in V1."""
    hf_config_path: Optional[str] = None
    """Name or path of the Hugging Face config to use. If unspecified, model
    name or path will be used."""
@ -441,6 +442,24 @@ class ModelConfig:
        return hashlib.sha256(str(factors).encode()).hexdigest()

    def __post_init__(self) -> None:
+        # Set the default seed to 0 in V1.
+        # NOTE(woosuk): In V0, we set the default seed to None because the
+        # driver worker shares the same process as the user process, and thus
+        # setting a seed affects the user process as well.
+        # In V1, we use separate processes for workers (unless
+        # VLLM_ENABLE_V1_MULTIPROCESSING=0), so setting a seed here
+        # doesn't affect the user process. However, without a consistent seed,
+        # different tensor parallel workers would sample different tokens,
+        # leading to inconsistent results.
+        if envs.VLLM_USE_V1 and self.seed is None:
+            self.seed = 0
+            if not envs.VLLM_ENABLE_V1_MULTIPROCESSING:
+                logger.warning(
+                    "The global random seed is set to %d. Since "
+                    "VLLM_ENABLE_V1_MULTIPROCESSING is set to False, this may "
+                    "affect the random state of the Python process that "
+                    "launched vLLM.", self.seed)
+
        self.model = maybe_model_redirect(self.model)
        # The tokenizer is consistent with the model by default.
        if self.tokenizer is None: