mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-20 07:45:01 +08:00
check input length of sonnet samples (#16423)
Signed-off-by: alexey-belyakov <alexey.belyakov@intel.com>
This commit is contained in:
parent
268c325078
commit
3e397a9484
@ -489,7 +489,7 @@ class SonnetDataset(BenchmarkDataset):
|
|||||||
prefix_lines = self.data[:num_prefix_lines]
|
prefix_lines = self.data[:num_prefix_lines]
|
||||||
|
|
||||||
samples = []
|
samples = []
|
||||||
for _ in range(num_requests):
|
while len(samples) < num_requests:
|
||||||
extra_lines = random.choices(self.data,
|
extra_lines = random.choices(self.data,
|
||||||
k=num_input_lines - num_prefix_lines)
|
k=num_input_lines - num_prefix_lines)
|
||||||
prompt = f"{base_prompt}{''.join(prefix_lines + extra_lines)}"
|
prompt = f"{base_prompt}{''.join(prefix_lines + extra_lines)}"
|
||||||
@ -497,6 +497,7 @@ class SonnetDataset(BenchmarkDataset):
|
|||||||
prompt_formatted = tokenizer.apply_chat_template(
|
prompt_formatted = tokenizer.apply_chat_template(
|
||||||
msg, add_generation_prompt=True, tokenize=False)
|
msg, add_generation_prompt=True, tokenize=False)
|
||||||
prompt_len = len(tokenizer(prompt_formatted).input_ids)
|
prompt_len = len(tokenizer(prompt_formatted).input_ids)
|
||||||
|
if prompt_len <= input_len:
|
||||||
samples.append(
|
samples.append(
|
||||||
SampleRequest(
|
SampleRequest(
|
||||||
prompt=prompt_formatted
|
prompt=prompt_formatted
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user