mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-13 19:17:02 +08:00
fix ci issue distributed 4 gpu test (#20204)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
a29e62ea34
commit
d45417b804
@ -64,6 +64,18 @@ def parse_args():
|
||||
parser.add_argument(
|
||||
"--trust-remote-code", action="store_true", help="Trust remote code."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-num-seqs",
|
||||
type=int,
|
||||
default=64,
|
||||
help=("Maximum number of sequences to be processed in a single iteration."),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gpu-memory-utilization",
|
||||
type=float,
|
||||
default=0.8,
|
||||
help=("Fraction of GPU memory vLLM is allowed to allocate (0.0, 1.0]."),
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@ -77,6 +89,8 @@ def main(
|
||||
GPUs_per_dp_rank,
|
||||
enforce_eager,
|
||||
trust_remote_code,
|
||||
max_num_seqs,
|
||||
gpu_memory_utilization,
|
||||
):
|
||||
os.environ["VLLM_DP_RANK"] = str(global_dp_rank)
|
||||
os.environ["VLLM_DP_RANK_LOCAL"] = str(local_dp_rank)
|
||||
@ -127,6 +141,8 @@ def main(
|
||||
enforce_eager=enforce_eager,
|
||||
enable_expert_parallel=True,
|
||||
trust_remote_code=trust_remote_code,
|
||||
max_num_seqs=max_num_seqs,
|
||||
gpu_memory_utilization=gpu_memory_utilization,
|
||||
)
|
||||
outputs = llm.generate(prompts, sampling_params)
|
||||
# Print the outputs.
|
||||
@ -181,6 +197,8 @@ if __name__ == "__main__":
|
||||
tp_size,
|
||||
args.enforce_eager,
|
||||
args.trust_remote_code,
|
||||
args.max_num_seqs,
|
||||
args.gpu_memory_utilization,
|
||||
),
|
||||
)
|
||||
proc.start()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user