Pre-commit

Signed-off-by: Reagan <reaganjlee@gmail.com>
This commit is contained in:
Reagan 2025-12-16 16:46:06 -08:00
parent e3dd9108cb
commit 033e8b20ab
4 changed files with 18 additions and 22 deletions

View File

@ -223,7 +223,9 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
"run-batch": create_parser(openai_run_batch.make_arg_parser), "run-batch": create_parser(openai_run_batch.make_arg_parser),
# Benchmark CLI # Benchmark CLI
"bench_latency": create_parser(bench_latency.add_cli_args), "bench_latency": create_parser(bench_latency.add_cli_args),
"bench_multimodal_processor": create_parser(bench_multimodal_processor.add_cli_args), "bench_multimodal_processor": create_parser(
bench_multimodal_processor.add_cli_args
),
"bench_serve": create_parser(bench_serve.add_cli_args), "bench_serve": create_parser(bench_serve.add_cli_args),
"bench_sweep_plot": create_parser(bench_sweep_plot.add_cli_args), "bench_sweep_plot": create_parser(bench_sweep_plot.add_cli_args),
"bench_sweep_plot_pareto": create_parser(bench_sweep_plot_pareto.add_cli_args), "bench_sweep_plot_pareto": create_parser(bench_sweep_plot_pareto.add_cli_args),

View File

@ -86,7 +86,8 @@ def collect_mm_processor_stats(
if debug and not any(stats_by_stage.values()): if debug and not any(stats_by_stage.values()):
print( print(
"Warning: No MM processor stats found. Ensure --enable-mm-processor-stats is set." "Warning: No MM processor stats found. "
"Ensure --enable-mm-processor-stats is set."
) )
return stats_by_stage return stats_by_stage
@ -136,28 +137,27 @@ def generate_random_multimodal_prompts(
) -> tuple[list[list[dict]], list[int]]: ) -> tuple[list[list[dict]], list[int]]:
""" """
Generate random multimodal prompts with synthetic images and text tokens. Generate random multimodal prompts with synthetic images and text tokens.
Returns: Returns:
tuple: (prompts, expected_output_lens) tuple: (prompts, expected_output_lens)
- prompts: List of OpenAI chat format messages with text and images - prompts: List of OpenAI chat format messages with text and images
- expected_output_lens: List of expected output lengths - expected_output_lens: List of expected output lengths
""" """
from PIL import Image from PIL import Image
from vllm.benchmarks.datasets import process_image from vllm.benchmarks.datasets import process_image
rng = np.random.default_rng(seed) rng = np.random.default_rng(seed)
prompts = [] prompts = []
expected_output_lens = [] expected_output_lens = []
for i in range(num_prompts): for i in range(num_prompts):
vocab_size = tokenizer.vocab_size vocab_size = tokenizer.vocab_size
prompt_token_ids = rng.integers( prompt_token_ids = rng.integers(0, vocab_size, size=input_len).tolist()
0, vocab_size, size=input_len
).tolist()
text_prompt = tokenizer.decode(prompt_token_ids) text_prompt = tokenizer.decode(prompt_token_ids)
mm_items = [] mm_items = []
for _ in range(num_images): for _ in range(num_images):
# Generate random RGB image # Generate random RGB image
@ -168,13 +168,13 @@ def generate_random_multimodal_prompts(
# Process to OpenAI format # Process to OpenAI format
mm_item = process_image(image) mm_item = process_image(image)
mm_items.append(mm_item) mm_items.append(mm_item)
# Create chat format: text + images # Create chat format: text + images
content = [{"type": "text", "text": text_prompt}] content = [{"type": "text", "text": text_prompt}]
content.extend(mm_items) content.extend(mm_items)
prompts.append([{"role": "user", "content": content}]) prompts.append([{"role": "user", "content": content}])
expected_output_lens.append(output_len) expected_output_lens.append(output_len)
return prompts, expected_output_lens return prompts, expected_output_lens
@ -382,7 +382,6 @@ def add_cli_args(parser: argparse.ArgumentParser) -> None:
def main(args: argparse.Namespace) -> None: def main(args: argparse.Namespace) -> None:
"""Main entry point for the multimodal processor benchmark.""" """Main entry point for the multimodal processor benchmark."""
from datetime import datetime
print("Starting multimodal processor benchmark...") print("Starting multimodal processor benchmark...")
result = benchmark_multimodal_processor(args) result = benchmark_multimodal_processor(args)

View File

@ -19,4 +19,3 @@ class BenchmarkMultimodalProcessorSubcommand(BenchmarkSubcommandBase):
@staticmethod @staticmethod
def cmd(args: argparse.Namespace) -> None: def cmd(args: argparse.Namespace) -> None:
main(args) main(args)

View File

@ -25,7 +25,6 @@ import regex as re
import torch import torch
from typing_extensions import TypeVar, assert_never from typing_extensions import TypeVar, assert_never
import vllm.envs as envs
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
from vllm.transformers_utils.processor import cached_processor_from_config from vllm.transformers_utils.processor import cached_processor_from_config
@ -126,7 +125,9 @@ class MultiModalProcessorTimingStats:
} }
def get_timing_stats_from_engine_client(engine_client: Any) -> dict[str, dict[str, float]]: def get_timing_stats_from_engine_client(
engine_client: Any,
) -> dict[str, dict[str, float]]:
""" """
Get all timing stats from the context associated with the engine client. Get all timing stats from the context associated with the engine client.
@ -2334,11 +2335,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
if request_id is not None: if request_id is not None:
self.info.ctx.create_timing_stats(request_id) self.info.ctx.create_timing_stats(request_id)
stats = (
self.info.ctx.get_timing_stats(request_id)
if request_id is not None
else None
)
mm_items = self._to_mm_items(mm_data) mm_items = self._to_mm_items(mm_data)
if tokenization_kwargs is None: if tokenization_kwargs is None: