[Misc] Set default value of seed to None (#14274)

Signed-off-by: மனோஜ்குமார் பழனிச்சாமி <smartmanoj42857@gmail.com>
2025-12-13 02:55:01 +08:00 · 2025-03-07 16:10:01 +05:30 · 2025-03-07 16:10:01 +05:30 · cc10281498
commit cc10281498
parent 05fb6718f0
9 changed files with 15 additions and 7 deletions
--- a/tests/distributed/test_torchrun_example.py
+++ b/tests/distributed/test_torchrun_example.py
@ -25,7 +25,8 @@ llm = LLM(model="facebook/opt-125m",
          tensor_parallel_size=2,
          distributed_executor_backend="external_launcher",
          gpu_memory_utilization=random.uniform(0.7, 0.9),
-          swap_space=random.randint(1, 4))
+          swap_space=random.randint(1, 4),
          seed=0)
 outputs = llm.generate(prompts, sampling_params)
--- a/tests/entrypoints/llm/test_encode.py
+++ b/tests/entrypoints/llm/test_encode.py
@ -34,7 +34,8 @@ def llm():
              max_num_batched_tokens=32768,
              tensor_parallel_size=1,
              gpu_memory_utilization=0.75,
-              enforce_eager=True)
+              enforce_eager=True,
              seed=0)
    with llm.deprecate_legacy_api():
        yield weakref.proxy(llm)
--- a/tests/entrypoints/llm/test_guided_generate.py
+++ b/tests/entrypoints/llm/test_guided_generate.py
@ -21,7 +21,7 @@ GUIDED_DECODING_BACKENDS = ["outlines", "lm-format-enforcer", "xgrammar"]
 def llm():
    # pytest caches the fixture so we use weakref.proxy to
    # enable garbage collection
-    llm = LLM(model=MODEL_NAME, max_model_len=1024)
+    llm = LLM(model=MODEL_NAME, max_model_len=1024, seed=0)
    with llm.deprecate_legacy_api():
        yield weakref.proxy(llm)
--- a/tests/entrypoints/openai/test_chat_echo.py
+++ b/tests/entrypoints/openai/test_chat_echo.py
@ -24,6 +24,8 @@ def server():
        "4080",
        "--chat-template",
        DUMMY_CHAT_TEMPLATE,
        "--seed",
        "0",
    ]
    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
--- a/tests/entrypoints/openai/test_metrics.py
+++ b/tests/entrypoints/openai/test_metrics.py
@ -47,6 +47,8 @@ def default_server_args():
        "--enforce-eager",
        "--max-num-seqs",
        "128",
        "--seed",
        "0",
    ]
--- a/tests/entrypoints/openai/test_root_path.py
+++ b/tests/entrypoints/openai/test_root_path.py
@ -30,6 +30,8 @@ def server():
        "/" + ROOT_PATH,
        "--chat-template",
        DUMMY_CHAT_TEMPLATE,
        "--seed",
        "0",
    ]
    envs = os.environ.copy()
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@ -104,7 +104,7 @@ class EngineArgs:
    config_format: ConfigFormat = ConfigFormat.AUTO
    dtype: str = 'auto'
    kv_cache_dtype: str = 'auto'
-    seed: int = 0
+    seed: Optional[int] = None
    max_model_len: Optional[int] = None
    # Note: Specifying a custom executor backend by passing a class
    # is intended for expert use only. The API may change without
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@ -169,7 +169,7 @@ class LLM:
        quantization: Optional[str] = None,
        revision: Optional[str] = None,
        tokenizer_revision: Optional[str] = None,
-        seed: int = 0,
+        seed: Optional[int] = None,
        gpu_memory_utilization: float = 0.9,
        swap_space: float = 4,
        cpu_offload_gb: float = 0,
--- a/vllm/utils.py
+++ b/vllm/utils.py
@ -644,7 +644,7 @@ def create_kv_caches_with_random_flash(
    head_size: int,
    cache_dtype: Optional[Union[str, torch.dtype]],
    model_dtype: Optional[Union[str, torch.dtype]] = None,
-    seed: int = 0,
+    seed: Optional[int] = None,
    device: Optional[str] = "cuda",
 ) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
    from vllm.platforms import current_platform
@ -681,7 +681,7 @@ def create_kv_caches_with_random(
    head_size: int,
    cache_dtype: Optional[Union[str, torch.dtype]],
    model_dtype: Optional[Union[str, torch.dtype]] = None,
-    seed: int = 0,
+    seed: Optional[int] = None,
    device: Optional[str] = "cuda",
 ) -> tuple[list[torch.Tensor], list[torch.Tensor]]: