From 169be31b01381ba3a935c8080bb53804ec3de9a2 Mon Sep 17 00:00:00 2001 From: Reagan Date: Tue, 16 Dec 2025 16:50:56 -0800 Subject: [PATCH] cleanup Signed-off-by: Reagan --- vllm/benchmarks/multimodal_processor.py | 14 +------------- vllm/multimodal/processing.py | 1 - 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/vllm/benchmarks/multimodal_processor.py b/vllm/benchmarks/multimodal_processor.py index eee33ff66c6f4..889e2272c3cb8 100644 --- a/vllm/benchmarks/multimodal_processor.py +++ b/vllm/benchmarks/multimodal_processor.py @@ -160,16 +160,13 @@ def generate_random_multimodal_prompts( mm_items = [] for _ in range(num_images): - # Generate random RGB image random_pixels = rng.integers( 0, 256, (image_height, image_width, 3), dtype=np.uint8 ) image = Image.fromarray(random_pixels) - # Process to OpenAI format mm_item = process_image(image) mm_items.append(mm_item) - # Create chat format: text + images content = [{"type": "text", "text": text_prompt}] content.extend(mm_items) prompts.append([{"role": "user", "content": content}]) @@ -189,7 +186,6 @@ def benchmark_multimodal_processor( engine_args = EngineArgs.from_cli_args(args) llm = LLM(**dataclasses.asdict(engine_args)) - # Validate max_model_len assert llm.llm_engine.model_config.max_model_len >= ( args.input_len + args.output_len ), ( @@ -197,7 +193,6 @@ def benchmark_multimodal_processor( "the sum of input_len and output_len." ) - # Generate random multimodal prompts seed = getattr(args, "seed", 0) tokenizer = llm.get_tokenizer() prompts, expected_output_lens = generate_random_multimodal_prompts( @@ -211,11 +206,10 @@ def benchmark_multimodal_processor( seed=seed, ) - # Create sampling params sampling_params = [ SamplingParams( n=1, - temperature=0.0, # Greedy sampling for deterministic speed benchmarks + temperature=0.0, max_tokens=output_len, detokenize=True, ) @@ -228,9 +222,6 @@ def benchmark_multimodal_processor( freeze_gc_heap() - # MM processor stats are automatically enabled via set_defaults - # No need to check or raise error - debug = getattr(args, "debug_mm_stats", False) print(f"Processing {len(prompts)} requests...") @@ -312,13 +303,10 @@ def add_cli_args(parser: argparse.ArgumentParser) -> None: """Add CLI arguments for the multimodal processor benchmark.""" from vllm.engine.arg_utils import EngineArgs - # Add EngineArgs (no conflict since we removed dataset parser) EngineArgs.add_cli_args(parser) - # Automatically enable MM processor stats (required for this benchmark) parser.set_defaults(enable_mm_processor_stats=True) - # Random generation arguments (similar to latency.py) parser.add_argument( "--num-prompts", type=int, diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index d6acaef3168aa..fbfe48436d681 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -74,7 +74,6 @@ logger = init_logger(__name__) _S = TypeVar("_S", str, list[int]) -# Context variable to store the current request_id during preprocessing _request_id_context: contextvars.ContextVar[str | None] = contextvars.ContextVar( "_request_id_context", default=None )