diff --git a/docs/mkdocs/hooks/generate_argparse.py b/docs/mkdocs/hooks/generate_argparse.py index db8151ffaa392..9bdb1a0bb2d11 100644 --- a/docs/mkdocs/hooks/generate_argparse.py +++ b/docs/mkdocs/hooks/generate_argparse.py @@ -223,7 +223,9 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool): "run-batch": create_parser(openai_run_batch.make_arg_parser), # Benchmark CLI "bench_latency": create_parser(bench_latency.add_cli_args), - "bench_multimodal_processor": create_parser(bench_multimodal_processor.add_cli_args), + "bench_multimodal_processor": create_parser( + bench_multimodal_processor.add_cli_args + ), "bench_serve": create_parser(bench_serve.add_cli_args), "bench_sweep_plot": create_parser(bench_sweep_plot.add_cli_args), "bench_sweep_plot_pareto": create_parser(bench_sweep_plot_pareto.add_cli_args), diff --git a/vllm/benchmarks/multimodal_processor.py b/vllm/benchmarks/multimodal_processor.py index 3e3645fc14841..eee33ff66c6f4 100644 --- a/vllm/benchmarks/multimodal_processor.py +++ b/vllm/benchmarks/multimodal_processor.py @@ -86,7 +86,8 @@ def collect_mm_processor_stats( if debug and not any(stats_by_stage.values()): print( - "Warning: No MM processor stats found. Ensure --enable-mm-processor-stats is set." + "Warning: No MM processor stats found. " + "Ensure --enable-mm-processor-stats is set." ) return stats_by_stage @@ -136,28 +137,27 @@ def generate_random_multimodal_prompts( ) -> tuple[list[list[dict]], list[int]]: """ Generate random multimodal prompts with synthetic images and text tokens. - + Returns: tuple: (prompts, expected_output_lens) - prompts: List of OpenAI chat format messages with text and images - expected_output_lens: List of expected output lengths """ from PIL import Image + from vllm.benchmarks.datasets import process_image - + rng = np.random.default_rng(seed) - + prompts = [] expected_output_lens = [] - + for i in range(num_prompts): vocab_size = tokenizer.vocab_size - prompt_token_ids = rng.integers( - 0, vocab_size, size=input_len - ).tolist() - + prompt_token_ids = rng.integers(0, vocab_size, size=input_len).tolist() + text_prompt = tokenizer.decode(prompt_token_ids) - + mm_items = [] for _ in range(num_images): # Generate random RGB image @@ -168,13 +168,13 @@ def generate_random_multimodal_prompts( # Process to OpenAI format mm_item = process_image(image) mm_items.append(mm_item) - + # Create chat format: text + images content = [{"type": "text", "text": text_prompt}] content.extend(mm_items) prompts.append([{"role": "user", "content": content}]) expected_output_lens.append(output_len) - + return prompts, expected_output_lens @@ -382,7 +382,6 @@ def add_cli_args(parser: argparse.ArgumentParser) -> None: def main(args: argparse.Namespace) -> None: """Main entry point for the multimodal processor benchmark.""" - from datetime import datetime print("Starting multimodal processor benchmark...") result = benchmark_multimodal_processor(args) diff --git a/vllm/entrypoints/cli/benchmark/multimodal_processor.py b/vllm/entrypoints/cli/benchmark/multimodal_processor.py index 97f1a18e7bed3..0bfa3595e2efc 100644 --- a/vllm/entrypoints/cli/benchmark/multimodal_processor.py +++ b/vllm/entrypoints/cli/benchmark/multimodal_processor.py @@ -19,4 +19,3 @@ class BenchmarkMultimodalProcessorSubcommand(BenchmarkSubcommandBase): @staticmethod def cmd(args: argparse.Namespace) -> None: main(args) - diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index 2664423294e82..d6acaef3168aa 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -25,7 +25,6 @@ import regex as re import torch from typing_extensions import TypeVar, assert_never -import vllm.envs as envs from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.transformers_utils.processor import cached_processor_from_config @@ -126,7 +125,9 @@ class MultiModalProcessorTimingStats: } -def get_timing_stats_from_engine_client(engine_client: Any) -> dict[str, dict[str, float]]: +def get_timing_stats_from_engine_client( + engine_client: Any, +) -> dict[str, dict[str, float]]: """ Get all timing stats from the context associated with the engine client. @@ -2334,11 +2335,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]): if request_id is not None: self.info.ctx.create_timing_stats(request_id) - stats = ( - self.info.ctx.get_timing_stats(request_id) - if request_id is not None - else None - ) mm_items = self._to_mm_items(mm_data) if tokenization_kwargs is None: