mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 00:05:52 +08:00
[V1][CI] Fix failed v1-test because of min_p (#13316)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
a12934d3ec
commit
e7eea5a520
@ -62,6 +62,7 @@ def _construct_expected_sampling_metadata(
|
|||||||
repetition_penalties = [1.0 for _ in range(num_reqs)]
|
repetition_penalties = [1.0 for _ in range(num_reqs)]
|
||||||
top_k = [0 for _ in range(num_reqs)]
|
top_k = [0 for _ in range(num_reqs)]
|
||||||
top_p = [0.0 for _ in range(num_reqs)]
|
top_p = [0.0 for _ in range(num_reqs)]
|
||||||
|
min_p = [0.0 for _ in range(num_reqs)]
|
||||||
temperature = [0.0 for _ in range(num_reqs)]
|
temperature = [0.0 for _ in range(num_reqs)]
|
||||||
stop_token_ids: List[Set[int]] = [set() for _ in range(num_reqs)]
|
stop_token_ids: List[Set[int]] = [set() for _ in range(num_reqs)]
|
||||||
min_tokens = [0 for _ in range(num_reqs)]
|
min_tokens = [0 for _ in range(num_reqs)]
|
||||||
@ -80,12 +81,12 @@ def _construct_expected_sampling_metadata(
|
|||||||
req.sampling_params.repetition_penalty)
|
req.sampling_params.repetition_penalty)
|
||||||
top_k[index_in_input_batch] = req.sampling_params.top_k
|
top_k[index_in_input_batch] = req.sampling_params.top_k
|
||||||
top_p[index_in_input_batch] = req.sampling_params.top_p
|
top_p[index_in_input_batch] = req.sampling_params.top_p
|
||||||
|
min_p[index_in_input_batch] = req.sampling_params.min_p
|
||||||
temperature[index_in_input_batch] = req.sampling_params.temperature
|
temperature[index_in_input_batch] = req.sampling_params.temperature
|
||||||
stop_token_ids[
|
stop_token_ids[
|
||||||
index_in_input_batch] = req.sampling_params.all_stop_token_ids
|
index_in_input_batch] = req.sampling_params.all_stop_token_ids
|
||||||
min_tokens[index_in_input_batch] = req.sampling_params.min_tokens
|
min_tokens[index_in_input_batch] = req.sampling_params.min_tokens
|
||||||
logit_bias[index_in_input_batch] = req.sampling_params.logit_bias
|
logit_bias[index_in_input_batch] = req.sampling_params.logit_bias
|
||||||
|
|
||||||
return SamplingMetadata(
|
return SamplingMetadata(
|
||||||
temperature=torch.tensor(temperature, dtype=torch.float,
|
temperature=torch.tensor(temperature, dtype=torch.float,
|
||||||
device=device),
|
device=device),
|
||||||
@ -95,6 +96,8 @@ def _construct_expected_sampling_metadata(
|
|||||||
top_k=torch.tensor(top_k, dtype=torch.int, device=device),
|
top_k=torch.tensor(top_k, dtype=torch.int, device=device),
|
||||||
no_top_p=all(x == 1.0 for x in top_p),
|
no_top_p=all(x == 1.0 for x in top_p),
|
||||||
no_top_k=all(x == 0 for x in top_k),
|
no_top_k=all(x == 0 for x in top_k),
|
||||||
|
min_p=torch.tensor(min_p, dtype=torch.float, device=device),
|
||||||
|
no_min_p=all(x == 0.0 for x in min_p),
|
||||||
generators={},
|
generators={},
|
||||||
max_num_logprobs=0,
|
max_num_logprobs=0,
|
||||||
prompt_token_ids=make_tensor_with_pad(
|
prompt_token_ids=make_tensor_with_pad(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user