From 135cf55cd1d83cd4e18266e343a59e6d9f87856f Mon Sep 17 00:00:00 2001
From: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com>
Date: Tue, 3 Jun 2025 18:26:33 -0400
Subject: [PATCH] [V1][Spec Decode][Ngram] 1.35x gain -> 1.95x gain on
 InstructCoder with prompt fix (#18971)

---
 benchmarks/benchmark_dataset.py | 10 +++++++++-
 vllm/benchmarks/datasets.py     | 14 +++++++++++++-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py
index 80a9246aa0b79..5d2a26cd443c0 100644
--- a/benchmarks/benchmark_dataset.py
+++ b/benchmarks/benchmark_dataset.py
@@ -865,7 +865,15 @@ class InstructCoderDataset(HuggingFaceDataset):
         for item in self.data:
             if len(sampled_requests) >= num_requests:
                 break
-            prompt = f"{item['instruction']}:\n{item['input']}"
+            prompt = f"{item['input']}\n\n{item['instruction']} Just output \
+            the code, do not include any explanation."
+
+            # apply template
+            prompt = tokenizer.apply_chat_template(
+                [{"role": "user", "content": prompt}],
+                add_generation_prompt=True,
+                tokenize=False,
+            )
             prompt_len = len(tokenizer(prompt).input_ids)
             sampled_requests.append(
                 SampleRequest(
diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py
index 0ef3e0254cc4f..f795a12568e05 100644
--- a/vllm/benchmarks/datasets.py
+++ b/vllm/benchmarks/datasets.py
@@ -880,7 +880,19 @@ class InstructCoderDataset(HuggingFaceDataset):
         for item in self.data:
             if len(sampled_requests) >= num_requests:
                 break
-            prompt = f"{item['instruction']}:\n{item['input']}"
+            prompt = f"{item['input']}\n\n{item['instruction']} Just output \
+            the code, do not include any explanation."
+
+            # apply template
+            prompt = tokenizer.apply_chat_template(
+                [{
+                    "role": "user",
+                    "content": prompt
+                }],
+                add_generation_prompt=True,
+                tokenize=False,
+            )
+
             prompt_len = len(tokenizer(prompt).input_ids)
             sampled_requests.append(
                 SampleRequest(