From 169be31b01381ba3a935c8080bb53804ec3de9a2 Mon Sep 17 00:00:00 2001
From: Reagan <reaganjlee@gmail.com>
Date: Tue, 16 Dec 2025 16:50:56 -0800
Subject: [PATCH] cleanup

Signed-off-by: Reagan <reaganjlee@gmail.com>
---
 vllm/benchmarks/multimodal_processor.py | 14 +-------------
 vllm/multimodal/processing.py           |  1 -
 2 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/vllm/benchmarks/multimodal_processor.py b/vllm/benchmarks/multimodal_processor.py
index eee33ff66c6f4..889e2272c3cb8 100644
--- a/vllm/benchmarks/multimodal_processor.py
+++ b/vllm/benchmarks/multimodal_processor.py
@@ -160,16 +160,13 @@ def generate_random_multimodal_prompts(
 
         mm_items = []
         for _ in range(num_images):
-            # Generate random RGB image
             random_pixels = rng.integers(
                 0, 256, (image_height, image_width, 3), dtype=np.uint8
             )
             image = Image.fromarray(random_pixels)
-            # Process to OpenAI format
             mm_item = process_image(image)
             mm_items.append(mm_item)
 
-        # Create chat format: text + images
         content = [{"type": "text", "text": text_prompt}]
         content.extend(mm_items)
         prompts.append([{"role": "user", "content": content}])
@@ -189,7 +186,6 @@ def benchmark_multimodal_processor(
     engine_args = EngineArgs.from_cli_args(args)
     llm = LLM(**dataclasses.asdict(engine_args))
 
-    # Validate max_model_len
     assert llm.llm_engine.model_config.max_model_len >= (
         args.input_len + args.output_len
     ), (
@@ -197,7 +193,6 @@ def benchmark_multimodal_processor(
         "the sum of input_len and output_len."
     )
 
-    # Generate random multimodal prompts
     seed = getattr(args, "seed", 0)
     tokenizer = llm.get_tokenizer()
     prompts, expected_output_lens = generate_random_multimodal_prompts(
@@ -211,11 +206,10 @@ def benchmark_multimodal_processor(
         seed=seed,
     )
 
-    # Create sampling params
     sampling_params = [
         SamplingParams(
             n=1,
-            temperature=0.0,  # Greedy sampling for deterministic speed benchmarks
+            temperature=0.0,
             max_tokens=output_len,
             detokenize=True,
         )
@@ -228,9 +222,6 @@ def benchmark_multimodal_processor(
 
     freeze_gc_heap()
 
-    # MM processor stats are automatically enabled via set_defaults
-    # No need to check or raise error
-
     debug = getattr(args, "debug_mm_stats", False)
 
     print(f"Processing {len(prompts)} requests...")
@@ -312,13 +303,10 @@ def add_cli_args(parser: argparse.ArgumentParser) -> None:
     """Add CLI arguments for the multimodal processor benchmark."""
     from vllm.engine.arg_utils import EngineArgs
 
-    # Add EngineArgs (no conflict since we removed dataset parser)
     EngineArgs.add_cli_args(parser)
 
-    # Automatically enable MM processor stats (required for this benchmark)
     parser.set_defaults(enable_mm_processor_stats=True)
 
-    # Random generation arguments (similar to latency.py)
     parser.add_argument(
         "--num-prompts",
         type=int,
diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py
index d6acaef3168aa..fbfe48436d681 100644
--- a/vllm/multimodal/processing.py
+++ b/vllm/multimodal/processing.py
@@ -74,7 +74,6 @@ logger = init_logger(__name__)
 
 _S = TypeVar("_S", str, list[int])
 
-# Context variable to store the current request_id during preprocessing
 _request_id_context: contextvars.ContextVar[str | None] = contextvars.ContextVar(
     "_request_id_context", default=None
 )