[Misc] Add packages for benchmark as extra dependency (#19089)

Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
Isotr0py 2025-06-04 19:18:48 +08:00 committed by GitHub
parent 2669a0d7b5
commit 8711bc5e68
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 20 additions and 22 deletions

View File

@ -77,6 +77,8 @@ vllm complete --quick "The future of AI is"
Run benchmark tests for latency online serving throughput and offline inference throughput. Run benchmark tests for latency online serving throughput and offline inference throughput.
To use benchmark commands, please install with extra dependencies using `pip install vllm[bench]`.
Available Commands: Available Commands:
```bash ```bash

View File

@ -688,6 +688,7 @@ setup(
ext_modules=ext_modules, ext_modules=ext_modules,
install_requires=get_requirements(), install_requires=get_requirements(),
extras_require={ extras_require={
"bench": ["pandas", "datasets"],
"tensorizer": ["tensorizer>=2.9.0"], "tensorizer": ["tensorizer>=2.9.0"],
"fastsafetensors": ["fastsafetensors >= 0.1.10"], "fastsafetensors": ["fastsafetensors >= 0.1.10"],
"runai": ["runai-model-streamer", "runai-model-streamer-s3", "boto3"], "runai": ["runai-model-streamer", "runai-model-streamer-s3", "boto3"],

View File

@ -24,7 +24,6 @@ from io import BytesIO
from typing import Any, Callable, Optional, Union from typing import Any, Callable, Optional, Union
import numpy as np import numpy as np
import pandas as pd
from PIL import Image from PIL import Image
from transformers import PreTrainedTokenizerBase from transformers import PreTrainedTokenizerBase
@ -33,6 +32,23 @@ from vllm.lora.utils import get_adapter_absolute_path
from vllm.multimodal import MultiModalDataDict from vllm.multimodal import MultiModalDataDict
from vllm.multimodal.image import convert_image_mode from vllm.multimodal.image import convert_image_mode
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_lora_tokenizer from vllm.transformers_utils.tokenizer import AnyTokenizer, get_lora_tokenizer
from vllm.utils import PlaceholderModule
try:
from datasets import load_dataset
except ImportError:
datasets = PlaceholderModule("datasets")
load_dataset = datasets.placeholder_attr("load_dataset")
try:
import pandas as pd
except ImportError:
pd = PlaceholderModule("pandas")
try:
import librosa
except ImportError:
librosa = PlaceholderModule("librosa")
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -636,13 +652,6 @@ class BurstGPTDataset(BenchmarkDataset):
if self.dataset_path is None: if self.dataset_path is None:
raise ValueError("dataset_path must be provided for loading data.") raise ValueError("dataset_path must be provided for loading data.")
try:
import pandas as pd
except ImportError as e:
raise ImportError(
"Pandas is required for BurstGPTDataset. Please install it "
"using `pip install pandas`.") from e
df = pd.read_csv(self.dataset_path) df = pd.read_csv(self.dataset_path)
# Filter to keep only GPT-4 rows. # Filter to keep only GPT-4 rows.
gpt4_df = df[df["Model"] == "GPT-4"] gpt4_df = df[df["Model"] == "GPT-4"]
@ -717,13 +726,6 @@ class HuggingFaceDataset(BenchmarkDataset):
def load_data(self) -> None: def load_data(self) -> None:
"""Load data from HuggingFace datasets.""" """Load data from HuggingFace datasets."""
try:
from datasets import load_dataset
except ImportError as e:
raise ImportError(
"Hugging Face datasets library is required for this dataset. "
"Please install it using `pip install datasets`.") from e
self.data = load_dataset( self.data = load_dataset(
self.dataset_path, self.dataset_path,
name=self.dataset_subset, name=self.dataset_subset,
@ -1147,13 +1149,6 @@ class ASRDataset(HuggingFaceDataset):
output_len: Optional[int] = None, output_len: Optional[int] = None,
**kwargs, **kwargs,
) -> list: ) -> list:
try:
import librosa
except ImportError as e:
raise ImportError(
"librosa is required for ASRDataset. Please install it "
"using `pip install librosa`.") from e
output_len = (output_len output_len = (output_len
if output_len is not None else self.DEFAULT_OUTPUT_LEN) if output_len is not None else self.DEFAULT_OUTPUT_LEN)
prompt = ASRDataset.TRANSCRIPTION_PREAMBLE prompt = ASRDataset.TRANSCRIPTION_PREAMBLE