mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-20 04:27:08 +08:00
Pre-commit
Signed-off-by: Reagan <reaganjlee@gmail.com>
This commit is contained in:
parent
e3dd9108cb
commit
033e8b20ab
@ -223,7 +223,9 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
|
|||||||
"run-batch": create_parser(openai_run_batch.make_arg_parser),
|
"run-batch": create_parser(openai_run_batch.make_arg_parser),
|
||||||
# Benchmark CLI
|
# Benchmark CLI
|
||||||
"bench_latency": create_parser(bench_latency.add_cli_args),
|
"bench_latency": create_parser(bench_latency.add_cli_args),
|
||||||
"bench_multimodal_processor": create_parser(bench_multimodal_processor.add_cli_args),
|
"bench_multimodal_processor": create_parser(
|
||||||
|
bench_multimodal_processor.add_cli_args
|
||||||
|
),
|
||||||
"bench_serve": create_parser(bench_serve.add_cli_args),
|
"bench_serve": create_parser(bench_serve.add_cli_args),
|
||||||
"bench_sweep_plot": create_parser(bench_sweep_plot.add_cli_args),
|
"bench_sweep_plot": create_parser(bench_sweep_plot.add_cli_args),
|
||||||
"bench_sweep_plot_pareto": create_parser(bench_sweep_plot_pareto.add_cli_args),
|
"bench_sweep_plot_pareto": create_parser(bench_sweep_plot_pareto.add_cli_args),
|
||||||
|
|||||||
@ -86,7 +86,8 @@ def collect_mm_processor_stats(
|
|||||||
|
|
||||||
if debug and not any(stats_by_stage.values()):
|
if debug and not any(stats_by_stage.values()):
|
||||||
print(
|
print(
|
||||||
"Warning: No MM processor stats found. Ensure --enable-mm-processor-stats is set."
|
"Warning: No MM processor stats found. "
|
||||||
|
"Ensure --enable-mm-processor-stats is set."
|
||||||
)
|
)
|
||||||
|
|
||||||
return stats_by_stage
|
return stats_by_stage
|
||||||
@ -136,28 +137,27 @@ def generate_random_multimodal_prompts(
|
|||||||
) -> tuple[list[list[dict]], list[int]]:
|
) -> tuple[list[list[dict]], list[int]]:
|
||||||
"""
|
"""
|
||||||
Generate random multimodal prompts with synthetic images and text tokens.
|
Generate random multimodal prompts with synthetic images and text tokens.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tuple: (prompts, expected_output_lens)
|
tuple: (prompts, expected_output_lens)
|
||||||
- prompts: List of OpenAI chat format messages with text and images
|
- prompts: List of OpenAI chat format messages with text and images
|
||||||
- expected_output_lens: List of expected output lengths
|
- expected_output_lens: List of expected output lengths
|
||||||
"""
|
"""
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from vllm.benchmarks.datasets import process_image
|
from vllm.benchmarks.datasets import process_image
|
||||||
|
|
||||||
rng = np.random.default_rng(seed)
|
rng = np.random.default_rng(seed)
|
||||||
|
|
||||||
prompts = []
|
prompts = []
|
||||||
expected_output_lens = []
|
expected_output_lens = []
|
||||||
|
|
||||||
for i in range(num_prompts):
|
for i in range(num_prompts):
|
||||||
vocab_size = tokenizer.vocab_size
|
vocab_size = tokenizer.vocab_size
|
||||||
prompt_token_ids = rng.integers(
|
prompt_token_ids = rng.integers(0, vocab_size, size=input_len).tolist()
|
||||||
0, vocab_size, size=input_len
|
|
||||||
).tolist()
|
|
||||||
|
|
||||||
text_prompt = tokenizer.decode(prompt_token_ids)
|
text_prompt = tokenizer.decode(prompt_token_ids)
|
||||||
|
|
||||||
mm_items = []
|
mm_items = []
|
||||||
for _ in range(num_images):
|
for _ in range(num_images):
|
||||||
# Generate random RGB image
|
# Generate random RGB image
|
||||||
@ -168,13 +168,13 @@ def generate_random_multimodal_prompts(
|
|||||||
# Process to OpenAI format
|
# Process to OpenAI format
|
||||||
mm_item = process_image(image)
|
mm_item = process_image(image)
|
||||||
mm_items.append(mm_item)
|
mm_items.append(mm_item)
|
||||||
|
|
||||||
# Create chat format: text + images
|
# Create chat format: text + images
|
||||||
content = [{"type": "text", "text": text_prompt}]
|
content = [{"type": "text", "text": text_prompt}]
|
||||||
content.extend(mm_items)
|
content.extend(mm_items)
|
||||||
prompts.append([{"role": "user", "content": content}])
|
prompts.append([{"role": "user", "content": content}])
|
||||||
expected_output_lens.append(output_len)
|
expected_output_lens.append(output_len)
|
||||||
|
|
||||||
return prompts, expected_output_lens
|
return prompts, expected_output_lens
|
||||||
|
|
||||||
|
|
||||||
@ -382,7 +382,6 @@ def add_cli_args(parser: argparse.ArgumentParser) -> None:
|
|||||||
|
|
||||||
def main(args: argparse.Namespace) -> None:
|
def main(args: argparse.Namespace) -> None:
|
||||||
"""Main entry point for the multimodal processor benchmark."""
|
"""Main entry point for the multimodal processor benchmark."""
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
print("Starting multimodal processor benchmark...")
|
print("Starting multimodal processor benchmark...")
|
||||||
result = benchmark_multimodal_processor(args)
|
result = benchmark_multimodal_processor(args)
|
||||||
|
|||||||
@ -19,4 +19,3 @@ class BenchmarkMultimodalProcessorSubcommand(BenchmarkSubcommandBase):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def cmd(args: argparse.Namespace) -> None:
|
def cmd(args: argparse.Namespace) -> None:
|
||||||
main(args)
|
main(args)
|
||||||
|
|
||||||
|
|||||||
@ -25,7 +25,6 @@ import regex as re
|
|||||||
import torch
|
import torch
|
||||||
from typing_extensions import TypeVar, assert_never
|
from typing_extensions import TypeVar, assert_never
|
||||||
|
|
||||||
import vllm.envs as envs
|
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.tokenizers import TokenizerLike
|
from vllm.tokenizers import TokenizerLike
|
||||||
from vllm.transformers_utils.processor import cached_processor_from_config
|
from vllm.transformers_utils.processor import cached_processor_from_config
|
||||||
@ -126,7 +125,9 @@ class MultiModalProcessorTimingStats:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_timing_stats_from_engine_client(engine_client: Any) -> dict[str, dict[str, float]]:
|
def get_timing_stats_from_engine_client(
|
||||||
|
engine_client: Any,
|
||||||
|
) -> dict[str, dict[str, float]]:
|
||||||
"""
|
"""
|
||||||
Get all timing stats from the context associated with the engine client.
|
Get all timing stats from the context associated with the engine client.
|
||||||
|
|
||||||
@ -2334,11 +2335,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
|
|||||||
if request_id is not None:
|
if request_id is not None:
|
||||||
self.info.ctx.create_timing_stats(request_id)
|
self.info.ctx.create_timing_stats(request_id)
|
||||||
|
|
||||||
stats = (
|
|
||||||
self.info.ctx.get_timing_stats(request_id)
|
|
||||||
if request_id is not None
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
mm_items = self._to_mm_items(mm_data)
|
mm_items = self._to_mm_items(mm_data)
|
||||||
|
|
||||||
if tokenization_kwargs is None:
|
if tokenization_kwargs is None:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user