mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 02:55:01 +08:00
[Misc] Set default value of seed to None (#14274)
Signed-off-by: மனோஜ்குமார் பழனிச்சாமி <smartmanoj42857@gmail.com>
This commit is contained in:
parent
05fb6718f0
commit
cc10281498
@ -25,7 +25,8 @@ llm = LLM(model="facebook/opt-125m",
|
|||||||
tensor_parallel_size=2,
|
tensor_parallel_size=2,
|
||||||
distributed_executor_backend="external_launcher",
|
distributed_executor_backend="external_launcher",
|
||||||
gpu_memory_utilization=random.uniform(0.7, 0.9),
|
gpu_memory_utilization=random.uniform(0.7, 0.9),
|
||||||
swap_space=random.randint(1, 4))
|
swap_space=random.randint(1, 4),
|
||||||
|
seed=0)
|
||||||
|
|
||||||
outputs = llm.generate(prompts, sampling_params)
|
outputs = llm.generate(prompts, sampling_params)
|
||||||
|
|
||||||
|
|||||||
@ -34,7 +34,8 @@ def llm():
|
|||||||
max_num_batched_tokens=32768,
|
max_num_batched_tokens=32768,
|
||||||
tensor_parallel_size=1,
|
tensor_parallel_size=1,
|
||||||
gpu_memory_utilization=0.75,
|
gpu_memory_utilization=0.75,
|
||||||
enforce_eager=True)
|
enforce_eager=True,
|
||||||
|
seed=0)
|
||||||
|
|
||||||
with llm.deprecate_legacy_api():
|
with llm.deprecate_legacy_api():
|
||||||
yield weakref.proxy(llm)
|
yield weakref.proxy(llm)
|
||||||
|
|||||||
@ -21,7 +21,7 @@ GUIDED_DECODING_BACKENDS = ["outlines", "lm-format-enforcer", "xgrammar"]
|
|||||||
def llm():
|
def llm():
|
||||||
# pytest caches the fixture so we use weakref.proxy to
|
# pytest caches the fixture so we use weakref.proxy to
|
||||||
# enable garbage collection
|
# enable garbage collection
|
||||||
llm = LLM(model=MODEL_NAME, max_model_len=1024)
|
llm = LLM(model=MODEL_NAME, max_model_len=1024, seed=0)
|
||||||
|
|
||||||
with llm.deprecate_legacy_api():
|
with llm.deprecate_legacy_api():
|
||||||
yield weakref.proxy(llm)
|
yield weakref.proxy(llm)
|
||||||
|
|||||||
@ -24,6 +24,8 @@ def server():
|
|||||||
"4080",
|
"4080",
|
||||||
"--chat-template",
|
"--chat-template",
|
||||||
DUMMY_CHAT_TEMPLATE,
|
DUMMY_CHAT_TEMPLATE,
|
||||||
|
"--seed",
|
||||||
|
"0",
|
||||||
]
|
]
|
||||||
|
|
||||||
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
||||||
|
|||||||
@ -47,6 +47,8 @@ def default_server_args():
|
|||||||
"--enforce-eager",
|
"--enforce-eager",
|
||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
"128",
|
"128",
|
||||||
|
"--seed",
|
||||||
|
"0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -30,6 +30,8 @@ def server():
|
|||||||
"/" + ROOT_PATH,
|
"/" + ROOT_PATH,
|
||||||
"--chat-template",
|
"--chat-template",
|
||||||
DUMMY_CHAT_TEMPLATE,
|
DUMMY_CHAT_TEMPLATE,
|
||||||
|
"--seed",
|
||||||
|
"0",
|
||||||
]
|
]
|
||||||
envs = os.environ.copy()
|
envs = os.environ.copy()
|
||||||
|
|
||||||
|
|||||||
@ -104,7 +104,7 @@ class EngineArgs:
|
|||||||
config_format: ConfigFormat = ConfigFormat.AUTO
|
config_format: ConfigFormat = ConfigFormat.AUTO
|
||||||
dtype: str = 'auto'
|
dtype: str = 'auto'
|
||||||
kv_cache_dtype: str = 'auto'
|
kv_cache_dtype: str = 'auto'
|
||||||
seed: int = 0
|
seed: Optional[int] = None
|
||||||
max_model_len: Optional[int] = None
|
max_model_len: Optional[int] = None
|
||||||
# Note: Specifying a custom executor backend by passing a class
|
# Note: Specifying a custom executor backend by passing a class
|
||||||
# is intended for expert use only. The API may change without
|
# is intended for expert use only. The API may change without
|
||||||
|
|||||||
@ -169,7 +169,7 @@ class LLM:
|
|||||||
quantization: Optional[str] = None,
|
quantization: Optional[str] = None,
|
||||||
revision: Optional[str] = None,
|
revision: Optional[str] = None,
|
||||||
tokenizer_revision: Optional[str] = None,
|
tokenizer_revision: Optional[str] = None,
|
||||||
seed: int = 0,
|
seed: Optional[int] = None,
|
||||||
gpu_memory_utilization: float = 0.9,
|
gpu_memory_utilization: float = 0.9,
|
||||||
swap_space: float = 4,
|
swap_space: float = 4,
|
||||||
cpu_offload_gb: float = 0,
|
cpu_offload_gb: float = 0,
|
||||||
|
|||||||
@ -644,7 +644,7 @@ def create_kv_caches_with_random_flash(
|
|||||||
head_size: int,
|
head_size: int,
|
||||||
cache_dtype: Optional[Union[str, torch.dtype]],
|
cache_dtype: Optional[Union[str, torch.dtype]],
|
||||||
model_dtype: Optional[Union[str, torch.dtype]] = None,
|
model_dtype: Optional[Union[str, torch.dtype]] = None,
|
||||||
seed: int = 0,
|
seed: Optional[int] = None,
|
||||||
device: Optional[str] = "cuda",
|
device: Optional[str] = "cuda",
|
||||||
) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
|
) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
@ -681,7 +681,7 @@ def create_kv_caches_with_random(
|
|||||||
head_size: int,
|
head_size: int,
|
||||||
cache_dtype: Optional[Union[str, torch.dtype]],
|
cache_dtype: Optional[Union[str, torch.dtype]],
|
||||||
model_dtype: Optional[Union[str, torch.dtype]] = None,
|
model_dtype: Optional[Union[str, torch.dtype]] = None,
|
||||||
seed: int = 0,
|
seed: Optional[int] = None,
|
||||||
device: Optional[str] = "cuda",
|
device: Optional[str] = "cuda",
|
||||||
) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
|
) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user