mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-09 01:57:02 +08:00
[CI Fix] Try fixing eagle e2e test OOM by reducing block allocation (#20213)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
4d36693687
commit
7b1895e6ce
@ -370,6 +370,10 @@ def test_llama2_eagle_e2e_greedy_correctness(vllm_runner, common_llm_kwargs,
|
||||
@pytest.mark.parametrize(
|
||||
"common_llm_kwargs",
|
||||
[{
|
||||
# 2 for small prompt, 256//16 for generated.
|
||||
"num_gpu_blocks_override": 2 + 256 // 16,
|
||||
"max_model_len": (2 + 256 // 16) * 16,
|
||||
|
||||
# Skip cuda graph recording for fast test.
|
||||
"enforce_eager": True,
|
||||
|
||||
@ -420,6 +424,10 @@ def test_llama3_eagle_e2e_greedy_correctness(vllm_runner, common_llm_kwargs,
|
||||
@pytest.mark.parametrize(
|
||||
"common_llm_kwargs",
|
||||
[{
|
||||
# 2 for small prompt, 256//16 for generated.
|
||||
"num_gpu_blocks_override": 2 + 256 // 16,
|
||||
"max_model_len": (2 + 256 // 16) * 16,
|
||||
|
||||
# Skip cuda graph recording for fast test.
|
||||
"enforce_eager": True,
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user