[Spec Decode][Benchmark] Add Spec Bench Dataset for benchmarking (#23563)

Signed-off-by: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com>
2026-07-23 17:57:31 +08:00 · 2025-09-08 13:32:42 -04:00 · 2025-09-08 13:32:42 -04:00 · 3feeeb9fea
commit 3feeeb9fea
parent 6f4a82f8b5
1 changed files with 79 additions and 1 deletions
--- a/vllm/benchmarks/datasets.py
+++ b/vllm/benchmarks/datasets.py
@ -1020,7 +1020,7 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
        default="random",
        choices=[
            "sharegpt", "burstgpt", "sonnet", "random", "random-mm", "hf", 
-            "custom", "prefix_repetition"
+            "custom", "prefix_repetition", "spec_bench"
        ],
        help="Name of the dataset to benchmark on.",
    )
@ -1053,6 +1053,22 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
        "Skip applying chat template to prompt, used only for custom dataset.",
    )
    spec_bench_group = parser.add_argument_group("spec bench dataset options")
    spec_bench_group.add_argument(
        "--spec-bench-output-len",
        type=int,
        default=256,
        help=
        "Num of output tokens per request, used only for spec bench dataset.",
    )
    spec_bench_group.add_argument(
        "--spec-bench-category",
        type=str,
        default=None,
        help=
        "Category for spec bench dataset. If None, use all categories.",
    )
    sonnet_group = parser.add_argument_group("sonnet dataset options")
    sonnet_group.add_argument(
        "--sonnet-input-len",
@ -1404,6 +1420,14 @@ def get_samples(args, tokenizer) -> list[SampleRequest]:
    else:
        # For datasets that follow a similar structure, use a mapping.
        dataset_mapping = {
            "spec_bench":
            lambda: SpecBench(dataset_path=args.dataset_path, 
                              category=args.spec_bench_category).sample(
                num_requests=args.num_prompts,
                tokenizer=tokenizer,
                output_len=args.spec_bench_output_len,
                request_id_prefix=args.request_id_prefix,
            ),
            "sharegpt": lambda: ShareGPTDataset(
                random_seed=args.seed, dataset_path=args.dataset_path
            ).sample(
@ -1541,6 +1565,14 @@ class CustomDataset(BenchmarkDataset):
        request_id_prefix: str = "",
        **kwargs,
    ) -> list:
        # load all data if needed
        self.num_available_samples = len(self.data)
        if num_requests <= 0:
            num_requests = self.num_available_samples
            logger.info("num_requests is set to 0 or negative, "
                        "so using all available samples: %d",
                        num_requests)
        sampled_requests = []
        for i, item in enumerate(self.data):
            if len(sampled_requests) >= num_requests:
@ -1572,6 +1604,52 @@ class CustomDataset(BenchmarkDataset):
        return sampled_requests
 # -----------------------------------------------------------------------------
 # Spec Bench Dataset Implementation
 # -----------------------------------------------------------------------------
 class SpecBench(CustomDataset):
    """
    Implements the SpecBench dataset: https://github.com/hemingkx/Spec-Bench
    Download the dataset using: 
    wget https://raw.githubusercontent.com/hemingkx/Spec-Bench/refs/heads/main/data/spec_bench/question.jsonl
    """ # noqa: E501
    def __init__(self, **kwargs) -> None:
        self.category = kwargs.pop("category", None)
        super().__init__(**kwargs)
        self.load_data()
    def load_data(self) -> None:
        if self.dataset_path is None:
            raise ValueError("dataset_path must be provided for loading data.")
        self.data = []
        # Load the JSONL file
        jsonl_data = pd.read_json(path_or_buf=self.dataset_path,
                                    lines=True)
        # check if the JSONL file has a 'turns' column
        if "turns" not in jsonl_data.columns:
            raise ValueError("JSONL file must contain a 'turns' column.")
        for _, row in jsonl_data.iterrows():
            # sample only from a specific category if specified
            if (not self.category) or (self.category == row['category']):
                prompt = row["turns"][0]
                self.data.append({"prompt": prompt})
        random.seed(self.random_seed)
        random.shuffle(self.data)
    def sample(self, **kwargs) -> list:
        # leverage CustomDataset sample
        kwargs["skip_chat_template"] = False
        return super().sample(**kwargs)
 # -----------------------------------------------------------------------------
 # Sonnet Dataset Implementation
 # -----------------------------------------------------------------------------