diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py index 80a9246aa0b79..5d2a26cd443c0 100644 --- a/benchmarks/benchmark_dataset.py +++ b/benchmarks/benchmark_dataset.py @@ -865,7 +865,15 @@ class InstructCoderDataset(HuggingFaceDataset): for item in self.data: if len(sampled_requests) >= num_requests: break - prompt = f"{item['instruction']}:\n{item['input']}" + prompt = f"{item['input']}\n\n{item['instruction']} Just output \ + the code, do not include any explanation." + + # apply template + prompt = tokenizer.apply_chat_template( + [{"role": "user", "content": prompt}], + add_generation_prompt=True, + tokenize=False, + ) prompt_len = len(tokenizer(prompt).input_ids) sampled_requests.append( SampleRequest( diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index 0ef3e0254cc4f..f795a12568e05 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -880,7 +880,19 @@ class InstructCoderDataset(HuggingFaceDataset): for item in self.data: if len(sampled_requests) >= num_requests: break - prompt = f"{item['instruction']}:\n{item['input']}" + prompt = f"{item['input']}\n\n{item['instruction']} Just output \ + the code, do not include any explanation." + + # apply template + prompt = tokenizer.apply_chat_template( + [{ + "role": "user", + "content": prompt + }], + add_generation_prompt=True, + tokenize=False, + ) + prompt_len = len(tokenizer(prompt).input_ids) sampled_requests.append( SampleRequest(