mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-23 05:44:28 +08:00
[Bug] Fix Test in Batch Invariant (#26128)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
6273fe8d3d
commit
4ba8875749
@ -292,8 +292,11 @@ def LLM_with_max_seqs(
|
||||
# Allow some CPU offload if needed.
|
||||
swap_space=swap_space,
|
||||
# Keep things lean and CI-friendly.
|
||||
dtype="float16",
|
||||
dtype="auto",
|
||||
# Single-GPU by default; override externally if desired.
|
||||
tensor_parallel_size=int(os.getenv("VLLM_TP_SIZE", "1")),
|
||||
trust_remote_code=os.getenv("VLLM_TRUST_REMOTE_CODE", "0") == "1",
|
||||
enable_prefix_caching=False,
|
||||
# Enable for MOE models
|
||||
# enable_expert_parallel=True,
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user