mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-25 10:53:41 +08:00
cleaner
Signed-off-by: Reagan <reaganjlee@gmail.com>
This commit is contained in:
parent
a81128725c
commit
ae0c59e57e
@ -2,12 +2,12 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
r"""Benchmark multimodal processor latency.
|
||||
|
||||
This benchmark measures the latency of the multimodal processor module
|
||||
This benchmark measures the latency of the mm processor module
|
||||
using randomly generated multimodal prompts with synthetic images.
|
||||
MM processor stats are automatically enabled.
|
||||
|
||||
Run:
|
||||
vllm bench multimodal-processor \
|
||||
vllm bench mm-processor \
|
||||
--model <your_model> \
|
||||
--num-prompts 10 \
|
||||
--input-len 1024 \
|
||||
@ -55,7 +55,6 @@ class MultimodalProcessorBenchmarkMetrics:
|
||||
|
||||
def collect_mm_processor_stats(
|
||||
llm_engine: Any,
|
||||
debug: bool = False,
|
||||
) -> dict[str, list[float]]:
|
||||
"""
|
||||
Collect multimodal processor timing stats.
|
||||
@ -84,12 +83,6 @@ def collect_mm_processor_stats(
|
||||
)
|
||||
stats_by_stage["total_time"].append(stats_dict.get("total_time", 0.0))
|
||||
|
||||
if debug and not any(stats_by_stage.values()):
|
||||
print(
|
||||
"Warning: No MM processor stats found. "
|
||||
"Ensure --enable-mm-processor-stats is set."
|
||||
)
|
||||
|
||||
return stats_by_stage
|
||||
|
||||
|
||||
@ -222,8 +215,6 @@ def benchmark_multimodal_processor(
|
||||
|
||||
freeze_gc_heap()
|
||||
|
||||
debug = getattr(args, "debug_mm_stats", False)
|
||||
|
||||
print(f"Processing {len(prompts)} requests...")
|
||||
start_time = time.perf_counter()
|
||||
|
||||
@ -236,7 +227,6 @@ def benchmark_multimodal_processor(
|
||||
|
||||
mm_stats_by_stage = collect_mm_processor_stats(
|
||||
llm.llm_engine,
|
||||
debug=debug,
|
||||
)
|
||||
|
||||
if not any(mm_stats_by_stage.values()):
|
||||
@ -350,11 +340,6 @@ def add_cli_args(parser: argparse.ArgumentParser) -> None:
|
||||
default=None,
|
||||
help="Path to save the benchmark results in JSON format.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--debug-mm-stats",
|
||||
action="store_true",
|
||||
help="Enable debug logging for MM processor stats collection.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--metric-percentiles",
|
||||
type=str,
|
||||
@ -444,7 +429,7 @@ def main(args: argparse.Namespace) -> None:
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Benchmark multimodal processor latency"
|
||||
description="Benchmark mm processor latency"
|
||||
)
|
||||
add_cli_args(parser)
|
||||
args = parser.parse_args()
|
||||
|
||||
@ -66,8 +66,8 @@ class ObservabilityConfig:
|
||||
|
||||
enable_mm_processor_stats: bool = False
|
||||
"""Enable collection of timing statistics for multimodal processor operations.
|
||||
This can be useful for performance analysis and debugging. Defaults to `False`
|
||||
(disabled)."""
|
||||
This is for internal use only (e.g., benchmarks) and is not exposed as a CLI
|
||||
argument. Defaults to `False` (disabled)."""
|
||||
|
||||
@cached_property
|
||||
def collect_model_forward_time(self) -> bool:
|
||||
|
||||
@ -1041,10 +1041,9 @@ class EngineArgs:
|
||||
"--enable-layerwise-nvtx-tracing",
|
||||
**observability_kwargs["enable_layerwise_nvtx_tracing"],
|
||||
)
|
||||
observability_group.add_argument(
|
||||
"--enable-mm-processor-stats",
|
||||
**observability_kwargs["enable_mm_processor_stats"],
|
||||
)
|
||||
# Note: --enable-mm-processor-stats is intentionally not exposed as a CLI
|
||||
# argument. It can be set programmatically via parser.set_defaults() for
|
||||
# internal use (e.g., benchmarks), but is not part of the public API.
|
||||
|
||||
# Scheduler arguments
|
||||
scheduler_kwargs = get_kwargs(SchedulerConfig)
|
||||
|
||||
@ -158,8 +158,6 @@ class MultiModalRegistry:
|
||||
if not model_config.is_multimodal_model:
|
||||
return {}
|
||||
|
||||
if observability_config is None:
|
||||
observability_config = ObservabilityConfig()
|
||||
processor = self.create_processor(
|
||||
model_config, observability_config, cache=cache
|
||||
)
|
||||
@ -189,8 +187,6 @@ class MultiModalRegistry:
|
||||
if not model_config.is_multimodal_model:
|
||||
return {}
|
||||
|
||||
if observability_config is None:
|
||||
observability_config = ObservabilityConfig()
|
||||
processor = self.create_processor(
|
||||
model_config, observability_config, cache=cache
|
||||
)
|
||||
@ -299,8 +295,6 @@ class MultiModalRegistry:
|
||||
|
||||
The model is identified by `model_config`.
|
||||
"""
|
||||
if observability_config is None:
|
||||
observability_config = ObservabilityConfig()
|
||||
processor = self.create_processor(
|
||||
model_config, observability_config, cache=cache
|
||||
)
|
||||
@ -337,8 +331,6 @@ class MultiModalRegistry:
|
||||
|
||||
The model is identified by `model_config`.
|
||||
"""
|
||||
if observability_config is None:
|
||||
observability_config = ObservabilityConfig()
|
||||
processor = self.create_processor(
|
||||
model_config, observability_config, cache=cache
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user