cleaner

Signed-off-by: Reagan <reaganjlee@gmail.com>
2026-07-06 10:07:09 +08:00 · 2025-12-18 18:36:58 -08:00 · 2025-12-18 18:36:58 -08:00 · ae0c59e57e
commit ae0c59e57e
parent a81128725c
4 changed files with 8 additions and 32 deletions
--- a/vllm/benchmarks/mm_processor.py
+++ b/vllm/benchmarks/mm_processor.py
@ -2,12 +2,12 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 r"""Benchmark multimodal processor latency.

-This benchmark measures the latency of the multimodal processor module
+This benchmark measures the latency of the mm processor module
 using randomly generated multimodal prompts with synthetic images.
 MM processor stats are automatically enabled.

 Run:
-    vllm bench multimodal-processor \
+    vllm bench mm-processor \
        --model <your_model> \
        --num-prompts 10 \
        --input-len 1024 \
@ -55,7 +55,6 @@ class MultimodalProcessorBenchmarkMetrics:

 def collect_mm_processor_stats(
    llm_engine: Any,
-    debug: bool = False,
 ) -> dict[str, list[float]]:
    """
    Collect multimodal processor timing stats.
@ -84,12 +83,6 @@ def collect_mm_processor_stats(
        )
        stats_by_stage["total_time"].append(stats_dict.get("total_time", 0.0))

-    if debug and not any(stats_by_stage.values()):
-        print(
-            "Warning: No MM processor stats found. "
-            "Ensure --enable-mm-processor-stats is set."
-        )
-
    return stats_by_stage


@ -222,8 +215,6 @@ def benchmark_multimodal_processor(

    freeze_gc_heap()

-    debug = getattr(args, "debug_mm_stats", False)
-
    print(f"Processing {len(prompts)} requests...")
    start_time = time.perf_counter()

@ -236,7 +227,6 @@ def benchmark_multimodal_processor(

    mm_stats_by_stage = collect_mm_processor_stats(
        llm.llm_engine,
-        debug=debug,
    )

    if not any(mm_stats_by_stage.values()):
@ -350,11 +340,6 @@ def add_cli_args(parser: argparse.ArgumentParser) -> None:
        default=None,
        help="Path to save the benchmark results in JSON format.",
    )
-    parser.add_argument(
-        "--debug-mm-stats",
-        action="store_true",
-        help="Enable debug logging for MM processor stats collection.",
-    )
    parser.add_argument(
        "--metric-percentiles",
        type=str,
@ -444,7 +429,7 @@ def main(args: argparse.Namespace) -> None:

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
-        description="Benchmark multimodal processor latency"
+        description="Benchmark mm processor latency"
    )
    add_cli_args(parser)
    args = parser.parse_args()
--- a/vllm/config/observability.py
+++ b/vllm/config/observability.py
@ -66,8 +66,8 @@ class ObservabilityConfig:

    enable_mm_processor_stats: bool = False
    """Enable collection of timing statistics for multimodal processor operations.
-    This can be useful for performance analysis and debugging. Defaults to `False`
-    (disabled)."""
+    This is for internal use only (e.g., benchmarks) and is not exposed as a CLI
+    argument. Defaults to `False` (disabled)."""

    @cached_property
    def collect_model_forward_time(self) -> bool:
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@ -1041,10 +1041,9 @@ class EngineArgs:
            "--enable-layerwise-nvtx-tracing",
            **observability_kwargs["enable_layerwise_nvtx_tracing"],
        )
-        observability_group.add_argument(
-            "--enable-mm-processor-stats",
-            **observability_kwargs["enable_mm_processor_stats"],
-        )
+        # Note: --enable-mm-processor-stats is intentionally not exposed as a CLI
+        # argument. It can be set programmatically via parser.set_defaults() for
+        # internal use (e.g., benchmarks), but is not part of the public API.

        # Scheduler arguments
        scheduler_kwargs = get_kwargs(SchedulerConfig)
--- a/vllm/multimodal/registry.py
+++ b/vllm/multimodal/registry.py
@ -158,8 +158,6 @@ class MultiModalRegistry:
        if not model_config.is_multimodal_model:
            return {}

-        if observability_config is None:
-            observability_config = ObservabilityConfig()
        processor = self.create_processor(
            model_config, observability_config, cache=cache
        )
@ -189,8 +187,6 @@ class MultiModalRegistry:
        if not model_config.is_multimodal_model:
            return {}

-        if observability_config is None:
-            observability_config = ObservabilityConfig()
        processor = self.create_processor(
            model_config, observability_config, cache=cache
        )
@ -299,8 +295,6 @@ class MultiModalRegistry:

        The model is identified by `model_config`.
        """
-        if observability_config is None:
-            observability_config = ObservabilityConfig()
        processor = self.create_processor(
            model_config, observability_config, cache=cache
        )
@ -337,8 +331,6 @@ class MultiModalRegistry:

        The model is identified by `model_config`.
        """
-        if observability_config is None:
-            observability_config = ObservabilityConfig()
        processor = self.create_processor(
            model_config, observability_config, cache=cache
        )