From 34868b106a8a1c3f2f1e1f0cff7e48d2d0d29a35 Mon Sep 17 00:00:00 2001
From: Roger Wang <136131678+ywang96@users.noreply.github.com>
Date: Wed, 19 Mar 2025 21:46:06 -0700
Subject: [PATCH] [Doc] Update Mistral Small 3.1/Pixtral example (#15184)

Signed-off-by: Roger Wang <ywang@roblox.com>
---
 .../offline_inference/{pixtral.py => mistral-small.py} | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)
 rename examples/offline_inference/{pixtral.py => mistral-small.py} (92%)

diff --git a/examples/offline_inference/pixtral.py b/examples/offline_inference/mistral-small.py
similarity index 92%
rename from examples/offline_inference/pixtral.py
rename to examples/offline_inference/mistral-small.py
index 5379f4562162f..43be2aa80773f 100644
--- a/examples/offline_inference/pixtral.py
+++ b/examples/offline_inference/mistral-small.py
@@ -6,14 +6,16 @@ import argparse
 from vllm import LLM
 from vllm.sampling_params import SamplingParams
 
-# This script is an offline demo for running Mistral-Small-3
+# This script is an offline demo for running Mistral-Small-3.1
 #
 # If you want to run a server/client setup, please follow this code:
 #
 # - Server:
 #
 # ```bash
-# vllm serve mistralai/Mistral-Small-3.1-24B-Instruct-2503 --tokenizer-mode mistral --limit-mm-per-prompt 'image=4' --max-model-len 16384
+# vllm serve mistralai/Mistral-Small-3.1-24B-Instruct-2503 \
+#   --tokenizer-mode mistral --config-format mistral --load-format mistral \
+#   --limit-mm-per-prompt 'image=4' --max-model-len 16384
 # ```
 #
 # - Client:
@@ -51,6 +53,8 @@ def run_simple_demo(args: argparse.Namespace):
     llm = LLM(
         model=model_name,
         tokenizer_mode="mistral",
+        config_format="mistral",
+        load_format="mistral",
         max_model_len=4096,
         max_num_seqs=2,
         disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
@@ -91,6 +95,8 @@ def run_advanced_demo(args: argparse.Namespace):
     llm = LLM(
         model=model_name,
         tokenizer_mode="mistral",
+        config_format="mistral",
+        load_format="mistral",
         limit_mm_per_prompt={"image": max_img_per_msg},
         max_model_len=max_img_per_msg * max_tokens_per_img,
         disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,