[Benchmarks] Throw usage error when using dataset-name random and dataset-path together (#24819)

Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
2026-07-11 14:27:19 +08:00 · 2025-09-14 10:50:01 -07:00 · 2025-09-14 10:50:01 -07:00 · ff68035932
commit ff68035932
parent 1177dd53e9
1 changed files with 22 additions and 0 deletions
--- a/vllm/benchmarks/datasets.py
+++ b/vllm/benchmarks/datasets.py
@ -11,6 +11,7 @@ generation. Supported dataset types include:
  - HuggingFace
  - VisionArena
 """
+import argparse
 import ast
 import base64
 import io
@ -1019,6 +1020,25 @@ class ShareGPTDataset(BenchmarkDataset):
        return samples


+class _ValidateDatasetArgs(argparse.Action):
+    """Argparse action to validate dataset name and path compatibility."""
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, values)
+        
+        # Get current values of both dataset_name and dataset_path
+        dataset_name = getattr(namespace, 'dataset_name', 'random')
+        dataset_path = getattr(namespace, 'dataset_path', None)
+        
+        # Validate the combination
+        if dataset_name == "random" and dataset_path is not None:
+            parser.error(
+                "Cannot use 'random' dataset with --dataset-path. "
+                "Please specify the appropriate --dataset-name (e.g., "
+                "'sharegpt', 'custom', 'sonnet') for your dataset file: "
+                f"{dataset_path}"
+            )
+
+
 def add_dataset_parser(parser: FlexibleArgumentParser):
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument(
@ -1031,6 +1051,7 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
        "--dataset-name",
        type=str,
        default="random",
+        action=_ValidateDatasetArgs,
        choices=[
            "sharegpt", "burstgpt", "sonnet", "random", "random-mm", "hf", 
            "custom", "prefix_repetition", "spec_bench"
@ -1046,6 +1067,7 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
        "--dataset-path",
        type=str,
        default=None,
+        action=_ValidateDatasetArgs,
        help="Path to the sharegpt/sonnet dataset. "
        "Or the huggingface dataset ID if using HF dataset.",
    )