[Benchmark]Fix error message (#15866)

Signed-off-by: wangli <wangli858794774@gmail.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com>
2025-12-10 03:15:20 +08:00 · 2025-04-02 16:32:24 +08:00 · 2025-04-02 16:32:24 +08:00 · aa557e6422
commit aa557e6422
parent 0e00d40e4f
2 changed files with 14 additions and 12 deletions
--- a/benchmarks/benchmark_dataset.py
+++ b/benchmarks/benchmark_dataset.py
@ -582,15 +582,6 @@ class HuggingFaceDataset(BenchmarkDataset):
    ) -> None:
        super().__init__(dataset_path=dataset_path, **kwargs)

-        # Validate dataset path
-        if self.SUPPORTED_DATASET_PATHS and \
-            self.dataset_path not in self.SUPPORTED_DATASET_PATHS:
-            raise ValueError(
-                f"{self.__class__.__name__} "
-                f"only supports: {', '.join(self.SUPPORTED_DATASET_PATHS)}. "
-                "Please consider contributing if you would "
-                "like to add support for additional dataset formats.")
-
        self.dataset_split = dataset_split
        self.dataset_subset = dataset_subset
        self.load_data()
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@ -50,9 +50,9 @@ except ImportError:
    from argparse import ArgumentParser as FlexibleArgumentParser

 from benchmark_dataset import (BurstGPTDataset, ConversationDataset,
-                               InstructCoderDataset, RandomDataset,
-                               SampleRequest, ShareGPTDataset, SonnetDataset,
-                               VisionArenaDataset)
+                               HuggingFaceDataset, InstructCoderDataset,
+                               RandomDataset, SampleRequest, ShareGPTDataset,
+                               SonnetDataset, VisionArenaDataset)
 from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json

 MILLISECONDS_TO_SECONDS_CONVERSION = 1000
@ -595,6 +595,17 @@ def main(args: argparse.Namespace):
            args.hf_split = "train"
        elif args.dataset_path in ConversationDataset.SUPPORTED_DATASET_PATHS:
            dataset_class = ConversationDataset
+        else:
+            supported_datasets = set([
+                dataset_name for cls in HuggingFaceDataset.__subclasses__()
+                for dataset_name in cls.SUPPORTED_DATASET_PATHS
+            ])
+            raise ValueError(
+                f"Unsupported dataset path: {args.dataset_path}. "
+                "Huggingface dataset only supports dataset_path"
+                f" from one of following: {supported_datasets}. "
+                "Please consider contributing if you would "
+                "like to add support for additional dataset formats.")
        input_requests = dataset_class(
            dataset_path=args.dataset_path,
            dataset_subset=args.hf_subset,