mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-25 07:15:50 +08:00
cleanup
Signed-off-by: Reagan <reaganjlee@gmail.com>
This commit is contained in:
parent
fd6c862ba1
commit
169be31b01
@ -160,16 +160,13 @@ def generate_random_multimodal_prompts(
|
||||
|
||||
mm_items = []
|
||||
for _ in range(num_images):
|
||||
# Generate random RGB image
|
||||
random_pixels = rng.integers(
|
||||
0, 256, (image_height, image_width, 3), dtype=np.uint8
|
||||
)
|
||||
image = Image.fromarray(random_pixels)
|
||||
# Process to OpenAI format
|
||||
mm_item = process_image(image)
|
||||
mm_items.append(mm_item)
|
||||
|
||||
# Create chat format: text + images
|
||||
content = [{"type": "text", "text": text_prompt}]
|
||||
content.extend(mm_items)
|
||||
prompts.append([{"role": "user", "content": content}])
|
||||
@ -189,7 +186,6 @@ def benchmark_multimodal_processor(
|
||||
engine_args = EngineArgs.from_cli_args(args)
|
||||
llm = LLM(**dataclasses.asdict(engine_args))
|
||||
|
||||
# Validate max_model_len
|
||||
assert llm.llm_engine.model_config.max_model_len >= (
|
||||
args.input_len + args.output_len
|
||||
), (
|
||||
@ -197,7 +193,6 @@ def benchmark_multimodal_processor(
|
||||
"the sum of input_len and output_len."
|
||||
)
|
||||
|
||||
# Generate random multimodal prompts
|
||||
seed = getattr(args, "seed", 0)
|
||||
tokenizer = llm.get_tokenizer()
|
||||
prompts, expected_output_lens = generate_random_multimodal_prompts(
|
||||
@ -211,11 +206,10 @@ def benchmark_multimodal_processor(
|
||||
seed=seed,
|
||||
)
|
||||
|
||||
# Create sampling params
|
||||
sampling_params = [
|
||||
SamplingParams(
|
||||
n=1,
|
||||
temperature=0.0, # Greedy sampling for deterministic speed benchmarks
|
||||
temperature=0.0,
|
||||
max_tokens=output_len,
|
||||
detokenize=True,
|
||||
)
|
||||
@ -228,9 +222,6 @@ def benchmark_multimodal_processor(
|
||||
|
||||
freeze_gc_heap()
|
||||
|
||||
# MM processor stats are automatically enabled via set_defaults
|
||||
# No need to check or raise error
|
||||
|
||||
debug = getattr(args, "debug_mm_stats", False)
|
||||
|
||||
print(f"Processing {len(prompts)} requests...")
|
||||
@ -312,13 +303,10 @@ def add_cli_args(parser: argparse.ArgumentParser) -> None:
|
||||
"""Add CLI arguments for the multimodal processor benchmark."""
|
||||
from vllm.engine.arg_utils import EngineArgs
|
||||
|
||||
# Add EngineArgs (no conflict since we removed dataset parser)
|
||||
EngineArgs.add_cli_args(parser)
|
||||
|
||||
# Automatically enable MM processor stats (required for this benchmark)
|
||||
parser.set_defaults(enable_mm_processor_stats=True)
|
||||
|
||||
# Random generation arguments (similar to latency.py)
|
||||
parser.add_argument(
|
||||
"--num-prompts",
|
||||
type=int,
|
||||
|
||||
@ -74,7 +74,6 @@ logger = init_logger(__name__)
|
||||
|
||||
_S = TypeVar("_S", str, list[int])
|
||||
|
||||
# Context variable to store the current request_id during preprocessing
|
||||
_request_id_context: contextvars.ContextVar[str | None] = contextvars.ContextVar(
|
||||
"_request_id_context", default=None
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user