mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-14 05:21:19 +08:00
[TPU][Bugfix] fix OOM issue in CI test (#21550)
Signed-off-by: Chengji Yao <chengjiyao@google.com>
This commit is contained in:
parent
85d051f026
commit
40d86ee412
@ -59,7 +59,7 @@ def test_basic(
|
|||||||
# actually test chunked prompt
|
# actually test chunked prompt
|
||||||
max_num_batched_tokens=1024,
|
max_num_batched_tokens=1024,
|
||||||
max_model_len=8192,
|
max_model_len=8192,
|
||||||
gpu_memory_utilization=0.7,
|
gpu_memory_utilization=0.95,
|
||||||
max_num_seqs=max_num_seqs,
|
max_num_seqs=max_num_seqs,
|
||||||
tensor_parallel_size=tensor_parallel_size) as vllm_model:
|
tensor_parallel_size=tensor_parallel_size) as vllm_model:
|
||||||
vllm_outputs = vllm_model.generate_greedy(example_prompts,
|
vllm_outputs = vllm_model.generate_greedy(example_prompts,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user