Pre-commit

Signed-off-by: Reagan <reaganjlee@gmail.com>
2026-07-21 04:27:09 +08:00 · 2025-12-16 16:46:06 -08:00 · 2025-12-16 16:46:06 -08:00 · 033e8b20ab
commit 033e8b20ab
parent e3dd9108cb
4 changed files with 18 additions and 22 deletions
--- a/docs/mkdocs/hooks/generate_argparse.py
+++ b/docs/mkdocs/hooks/generate_argparse.py
@ -223,7 +223,9 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
        "run-batch": create_parser(openai_run_batch.make_arg_parser),
        # Benchmark CLI
        "bench_latency": create_parser(bench_latency.add_cli_args),
-        "bench_multimodal_processor": create_parser(bench_multimodal_processor.add_cli_args),
+        "bench_multimodal_processor": create_parser(
            bench_multimodal_processor.add_cli_args
        ),
        "bench_serve": create_parser(bench_serve.add_cli_args),
        "bench_sweep_plot": create_parser(bench_sweep_plot.add_cli_args),
        "bench_sweep_plot_pareto": create_parser(bench_sweep_plot_pareto.add_cli_args),
--- a/vllm/benchmarks/multimodal_processor.py
+++ b/vllm/benchmarks/multimodal_processor.py
@ -86,7 +86,8 @@ def collect_mm_processor_stats(
    if debug and not any(stats_by_stage.values()):
        print(
-            "Warning: No MM processor stats found. Ensure --enable-mm-processor-stats is set."
+            "Warning: No MM processor stats found. "
            "Ensure --enable-mm-processor-stats is set."
        )
    return stats_by_stage
@ -136,28 +137,27 @@ def generate_random_multimodal_prompts(
 ) -> tuple[list[list[dict]], list[int]]:
    """
    Generate random multimodal prompts with synthetic images and text tokens.
-    
+
    Returns:
        tuple: (prompts, expected_output_lens)
            - prompts: List of OpenAI chat format messages with text and images
            - expected_output_lens: List of expected output lengths
    """
    from PIL import Image
    from vllm.benchmarks.datasets import process_image
-    
+
    rng = np.random.default_rng(seed)
-    
+
    prompts = []
    expected_output_lens = []
-    
+
    for i in range(num_prompts):
        vocab_size = tokenizer.vocab_size
-        prompt_token_ids = rng.integers(
+        prompt_token_ids = rng.integers(0, vocab_size, size=input_len).tolist()
-            0, vocab_size, size=input_len
+
        ).tolist()
        text_prompt = tokenizer.decode(prompt_token_ids)
-        
+
        mm_items = []
        for _ in range(num_images):
            # Generate random RGB image
@ -168,13 +168,13 @@ def generate_random_multimodal_prompts(
            # Process to OpenAI format
            mm_item = process_image(image)
            mm_items.append(mm_item)
-        
+
        # Create chat format: text + images
        content = [{"type": "text", "text": text_prompt}]
        content.extend(mm_items)
        prompts.append([{"role": "user", "content": content}])
        expected_output_lens.append(output_len)
-    
+
    return prompts, expected_output_lens
@ -382,7 +382,6 @@ def add_cli_args(parser: argparse.ArgumentParser) -> None:
 def main(args: argparse.Namespace) -> None:
    """Main entry point for the multimodal processor benchmark."""
    from datetime import datetime
    print("Starting multimodal processor benchmark...")
    result = benchmark_multimodal_processor(args)
--- a/vllm/entrypoints/cli/benchmark/multimodal_processor.py
+++ b/vllm/entrypoints/cli/benchmark/multimodal_processor.py
@ -19,4 +19,3 @@ class BenchmarkMultimodalProcessorSubcommand(BenchmarkSubcommandBase):
    @staticmethod
    def cmd(args: argparse.Namespace) -> None:
        main(args)
--- a/vllm/multimodal/processing.py
+++ b/vllm/multimodal/processing.py
@ -25,7 +25,6 @@ import regex as re
 import torch
 from typing_extensions import TypeVar, assert_never
 import vllm.envs as envs
 from vllm.logger import init_logger
 from vllm.tokenizers import TokenizerLike
 from vllm.transformers_utils.processor import cached_processor_from_config
@ -126,7 +125,9 @@ class MultiModalProcessorTimingStats:
        }
-def get_timing_stats_from_engine_client(engine_client: Any) -> dict[str, dict[str, float]]:
+def get_timing_stats_from_engine_client(
    engine_client: Any,
 ) -> dict[str, dict[str, float]]:
    """
    Get all timing stats from the context associated with the engine client.
@ -2334,11 +2335,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
        if request_id is not None:
            self.info.ctx.create_timing_stats(request_id)
        stats = (
            self.info.ctx.get_timing_stats(request_id)
            if request_id is not None
            else None
        )
        mm_items = self._to_mm_items(mm_data)
        if tokenization_kwargs is None: