From 34868b106a8a1c3f2f1e1f0cff7e48d2d0d29a35 Mon Sep 17 00:00:00 2001 From: Roger Wang <136131678+ywang96@users.noreply.github.com> Date: Wed, 19 Mar 2025 21:46:06 -0700 Subject: [PATCH] [Doc] Update Mistral Small 3.1/Pixtral example (#15184) Signed-off-by: Roger Wang --- .../offline_inference/{pixtral.py => mistral-small.py} | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) rename examples/offline_inference/{pixtral.py => mistral-small.py} (92%) diff --git a/examples/offline_inference/pixtral.py b/examples/offline_inference/mistral-small.py similarity index 92% rename from examples/offline_inference/pixtral.py rename to examples/offline_inference/mistral-small.py index 5379f4562162f..43be2aa80773f 100644 --- a/examples/offline_inference/pixtral.py +++ b/examples/offline_inference/mistral-small.py @@ -6,14 +6,16 @@ import argparse from vllm import LLM from vllm.sampling_params import SamplingParams -# This script is an offline demo for running Mistral-Small-3 +# This script is an offline demo for running Mistral-Small-3.1 # # If you want to run a server/client setup, please follow this code: # # - Server: # # ```bash -# vllm serve mistralai/Mistral-Small-3.1-24B-Instruct-2503 --tokenizer-mode mistral --limit-mm-per-prompt 'image=4' --max-model-len 16384 +# vllm serve mistralai/Mistral-Small-3.1-24B-Instruct-2503 \ +# --tokenizer-mode mistral --config-format mistral --load-format mistral \ +# --limit-mm-per-prompt 'image=4' --max-model-len 16384 # ``` # # - Client: @@ -51,6 +53,8 @@ def run_simple_demo(args: argparse.Namespace): llm = LLM( model=model_name, tokenizer_mode="mistral", + config_format="mistral", + load_format="mistral", max_model_len=4096, max_num_seqs=2, disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache, @@ -91,6 +95,8 @@ def run_advanced_demo(args: argparse.Namespace): llm = LLM( model=model_name, tokenizer_mode="mistral", + config_format="mistral", + load_format="mistral", limit_mm_per_prompt={"image": max_img_per_msg}, max_model_len=max_img_per_msg * max_tokens_per_img, disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,