mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 09:06:03 +08:00
[Kernel][ROCM] Upstream prefix prefill speed up for vLLM V1 (#13305)
Signed-off-by: Sage Moore <sage@neuralmagic.com> Signed-off-by: root <root@banff-cyxtera-s73-5.ctr.dcgpu> Signed-off-by: Aleksandr Malyshev <maleksan@amd.com> Signed-off-by: root <root@banff-cyxtera-s65-4.amd.com> Signed-off-by: maleksan85 <maleksan@amd.com> Signed-off-by: <> Co-authored-by: Sage Moore <sage@neuralmagic.com> Co-authored-by: root <root@banff-cyxtera-s73-5.ctr.dcgpu> Co-authored-by: Aleksandr Malyshev <maleksan@amd.com> Co-authored-by: qli88 <qiang.li2@amd.com> Co-authored-by: root <root@banff-cyxtera-s65-4.amd.com>
This commit is contained in:
parent
f67e9e9f22
commit
bc7c4d206b
@ -195,15 +195,15 @@ def test_lookahead_greedy_equality_with_preemption(baseline_llm_generator,
|
||||
])
|
||||
@pytest.mark.parametrize("per_test_common_llm_kwargs",
|
||||
[{
|
||||
"block_size": 8,
|
||||
"block_size": 16,
|
||||
"max_num_batched_tokens": 2,
|
||||
"max_num_seqs": 2,
|
||||
}, {
|
||||
"block_size": 8,
|
||||
"block_size": 16,
|
||||
"max_num_batched_tokens": 3,
|
||||
"max_num_seqs": 2,
|
||||
}, {
|
||||
"block_size": 8,
|
||||
"block_size": 16,
|
||||
"max_num_batched_tokens": 256,
|
||||
"max_num_seqs": 10,
|
||||
}])
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user