From 57430fc95c8a94a7c68b3d525e3b8823b0f2433f Mon Sep 17 00:00:00 2001
From: Julien Denize <40604584+juliendenize@users.noreply.github.com>
Date: Fri, 21 Nov 2025 22:58:59 +0100
Subject: [PATCH] Default model load/config/tokenizer to `mistral` format if
 relevant files exist (#28659)

Signed-off-by: Julien Denize <julien.denize@mistral.ai>
Signed-off-by: Julien Denize <40604584+juliendenize@users.noreply.github.com>
Signed-off-by: mgoin <mgoin64@gmail.com>
Signed-off-by: Michael Goin <mgoin64@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: mgoin <mgoin64@gmail.com>
---
 docs/features/tool_calling.md                 | 23 +++++--
 .../language/generation/test_mistral.py       |  2 +-
 tests/models/multimodal/test_mapping.py       | 14 ++++-
 .../models/quantization/test_bitsandbytes.py  |  3 +
 tests/tool_use/utils.py                       |  6 ++
 tests/transformers_utils/test_config.py       | 62 +++++++++++++++++++
 tests/transformers_utils/test_utils.py        |  6 +-
 .../llm/test_struct_output_generate.py        | 14 ++++-
 vllm/config/model.py                          |  9 +--
 vllm/model_executor/model_loader/__init__.py  |  2 +
 .../model_loader/default_loader.py            | 20 +++++-
 vllm/transformers_utils/config.py             | 46 +++++++++++++-
 vllm/transformers_utils/configs/mistral.py    |  2 +-
 vllm/transformers_utils/tokenizer.py          | 30 +++++----
 vllm/v1/engine/processor.py                   | 25 +++++++-
 15 files changed, 230 insertions(+), 34 deletions(-)
 create mode 100644 tests/transformers_utils/test_config.py

diff --git a/docs/features/tool_calling.md b/docs/features/tool_calling.md
index 7e6c69e717dba..dd79ba19b7247 100644
--- a/docs/features/tool_calling.md
+++ b/docs/features/tool_calling.md
@@ -142,7 +142,7 @@ Flags: `--tool-call-parser hermes`
 Supported models:
 
 * `mistralai/Mistral-7B-Instruct-v0.3` (confirmed)
-* Additional mistral function-calling models are compatible as well.
+* Additional Mistral function-calling models are compatible as well.
 
 Known issues:
 
@@ -158,12 +158,25 @@ Known issues:
 
 Recommended flags:
 
-1. To use [mistral-common](https://github.com/mistralai/mistral-common) the official Mistral tokenization backend:
+1. To use the official Mistral AI's format:
 
-    `--tokenizer_mode mistral --config_format mistral --load_format mistral --tool-call-parser mistral`
+    `--tool-call-parser mistral`
 
-2. To use the default Transformers tokenization backend:
-    `--tool-call-parser mistral --chat-template examples/tool_chat_template_mistral_parallel.jinja`
+2. To use the Transformers format when available:
+
+    `--tokenizer_mode hf --config_format hf --load_format hf --tool-call-parser mistral --chat-template examples/tool_chat_template_mistral_parallel.jinja`
+
+!!! note
+    Models officially released by Mistral AI have two possible formats:
+
+    1. The official format that is used by default with `auto` or `mistral` arguments:
+
+        `--tokenizer_mode mistral --config_format mistral --load_format mistral`
+        This format uses [mistral-common](https://github.com/mistralai/mistral-common), the Mistral AI's tokenizer backend.
+
+    2. The Transformers format, when available, that is used with `hf` arguments:
+
+        `--tokenizer_mode hf --config_format hf --load_format hf --chat-template examples/tool_chat_template_mistral_parallel.jinja`
 
 ### Llama Models (`llama3_json`)
 
diff --git a/tests/models/language/generation/test_mistral.py b/tests/models/language/generation/test_mistral.py
index 0ae83ec16020a..80e337d570a36 100644
--- a/tests/models/language/generation/test_mistral.py
+++ b/tests/models/language/generation/test_mistral.py
@@ -208,7 +208,7 @@ def test_mistral_format(
     with vllm_runner(
         model,
         dtype=dtype,
-        tokenizer_mode="auto",
+        tokenizer_mode="hf",
         load_format="safetensors",
         config_format="hf",
     ) as hf_format_model:
diff --git a/tests/models/multimodal/test_mapping.py b/tests/models/multimodal/test_mapping.py
index 2f38dc450ef96..0d2eaca95504e 100644
--- a/tests/models/multimodal/test_mapping.py
+++ b/tests/models/multimodal/test_mapping.py
@@ -50,12 +50,24 @@ def test_hf_model_weights_mapper(model_arch: str):
     model_info.check_available_online(on_fail="skip")
     model_info.check_transformers_version(on_fail="skip")
 
+    is_mistral_model = model_arch in [
+        "Mistral3ForConditionalGeneration",
+        "PixtralForConditionalGeneration",
+        "VoxtralForConditionalGeneration",
+    ]
+
+    if not is_mistral_model or model_info.tokenizer_mode == "mistral":
+        tokenizer_mode = model_info.tokenizer_mode
+    else:
+        tokenizer_mode = "hf"
+
     model_id = model_info.default
 
     model_config = ModelConfig(
         model_id,
         tokenizer=model_info.tokenizer or model_id,
-        tokenizer_mode=model_info.tokenizer_mode,
+        tokenizer_mode=tokenizer_mode,
+        config_format="hf",
         revision=model_info.revision,
         trust_remote_code=model_info.trust_remote_code,
         hf_overrides=model_info.hf_overrides,
diff --git a/tests/models/quantization/test_bitsandbytes.py b/tests/models/quantization/test_bitsandbytes.py
index dc4b4546e451b..5b8aaa299fdc1 100644
--- a/tests/models/quantization/test_bitsandbytes.py
+++ b/tests/models/quantization/test_bitsandbytes.py
@@ -259,6 +259,9 @@ def validate_generated_texts(
         tensor_parallel_size=vllm_tp_size,
         enforce_eager=False,
         default_torch_num_threads=1,
+        tokenizer_mode="hf",
+        load_format="hf",
+        config_format="hf",
     ) as llm:
         vllm_outputs = llm.generate_greedy(prompts, max_tokens)
         vllm_logs = log_generated_texts(prompts, vllm_outputs, "VllmRunner")
diff --git a/tests/tool_use/utils.py b/tests/tool_use/utils.py
index d188b21863812..7584b903156b7 100644
--- a/tests/tool_use/utils.py
+++ b/tests/tool_use/utils.py
@@ -128,6 +128,12 @@ CONFIGS: dict[str, ServerConfig] = {
         "arguments": [
             "--enforce-eager",
             "--no-enable-prefix-caching",
+            "--tokenizer_mode",
+            "hf",
+            "--load_format",
+            "hf",
+            "--config_format",
+            "hf",
             "--tool-call-parser",
             "mistral",
             "--chat-template",
diff --git a/tests/transformers_utils/test_config.py b/tests/transformers_utils/test_config.py
new file mode 100644
index 0000000000000..de28ab5f99e8c
--- /dev/null
+++ b/tests/transformers_utils/test_config.py
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+import tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, call, patch
+
+import pytest
+
+from vllm.transformers_utils.config import list_filtered_repo_files
+
+
+@pytest.mark.parametrize(
+    "allow_patterns,expected_relative_files",
+    [
+        (
+            ["*.json", "correct*.txt"],
+            ["json_file.json", "subfolder/correct.txt", "correct_2.txt"],
+        ),
+    ],
+)
+def test_list_filtered_repo_files(
+    allow_patterns: list[str], expected_relative_files: list[str]
+):
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        # Prep folder and files
+        path_tmp_dir = Path(tmp_dir)
+        subfolder = path_tmp_dir / "subfolder"
+        subfolder.mkdir()
+        (path_tmp_dir / "json_file.json").touch()
+        (path_tmp_dir / "correct_2.txt").touch()
+        (path_tmp_dir / "uncorrect.txt").touch()
+        (path_tmp_dir / "uncorrect.jpeg").touch()
+        (subfolder / "correct.txt").touch()
+        (subfolder / "uncorrect_sub.txt").touch()
+
+        def _glob_path() -> list[str]:
+            return [
+                str(file.relative_to(path_tmp_dir))
+                for file in path_tmp_dir.glob("**/*")
+                if file.is_file()
+            ]
+
+        # Patch list_repo_files called by fn
+        with patch(
+            "vllm.transformers_utils.config.list_repo_files",
+            MagicMock(return_value=_glob_path()),
+        ) as mock_list_repo_files:
+            out_files = sorted(
+                list_filtered_repo_files(
+                    tmp_dir, allow_patterns, "revision", "model", "token"
+                )
+            )
+        assert out_files == sorted(expected_relative_files)
+        assert mock_list_repo_files.call_count == 1
+        assert mock_list_repo_files.call_args_list[0] == call(
+            repo_id=tmp_dir,
+            revision="revision",
+            repo_type="model",
+            token="token",
+        )
diff --git a/tests/transformers_utils/test_utils.py b/tests/transformers_utils/test_utils.py
index beaef04d766bf..bfe1cec76c138 100644
--- a/tests/transformers_utils/test_utils.py
+++ b/tests/transformers_utils/test_utils.py
@@ -2,7 +2,11 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 
-from vllm.transformers_utils.utils import is_cloud_storage, is_gcs, is_s3
+from vllm.transformers_utils.utils import (
+    is_cloud_storage,
+    is_gcs,
+    is_s3,
+)
 
 
 def test_is_gcs():
diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py
index a00600b87eca1..d1b037b7956cf 100644
--- a/tests/v1/entrypoints/llm/test_struct_output_generate.py
+++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py
@@ -46,11 +46,15 @@ EAGLE_SPEC_CONFIG = {
 
 PARAMS_MODELS_BACKENDS_TOKENIZER_MODE = [
     ("mistralai/Ministral-8B-Instruct-2410", "xgrammar", "auto", None),
-    ("mistralai/Ministral-8B-Instruct-2410", "guidance", "auto", None),
+    # FIXME: Since "auto" will use Mistral tokenizer and these backends do not support
+    # it, we skip these tests for now.
+    # ("mistralai/Ministral-8B-Instruct-2410", "guidance", "auto", None),
+    # ("mistralai/Ministral-8B-Instruct-2410", "lm-format-enforcer", "auto", None),
+    ("mistralai/Ministral-8B-Instruct-2410", "guidance", "hf", None),
     pytest.param(
         "mistralai/Ministral-8B-Instruct-2410",
         "lm-format-enforcer",
-        "auto",
+        "hf",
         None,
         marks=pytest.mark.skip(
             reason=(
@@ -80,7 +84,7 @@ PARAMS_MODELS_BACKENDS_TOKENIZER_MODE = [
     # ("mistralai/Ministral-8B-Instruct-2410", "outlines", "mistral", None),
     # ("Qwen/Qwen2.5-1.5B-Instruct", "guidance", "auto"),
     ("mistralai/Ministral-8B-Instruct-2410", "outlines", "auto", NGRAM_SPEC_CONFIG),
-    ("mistralai/Ministral-8B-Instruct-2410", "guidance", "auto", NGRAM_SPEC_CONFIG),
+    ("mistralai/Ministral-8B-Instruct-2410", "guidance", "hf", NGRAM_SPEC_CONFIG),
     ("Qwen/Qwen2.5-1.5B-Instruct", "xgrammar", "auto", NGRAM_SPEC_CONFIG),
     ("meta-llama/Meta-Llama-3.1-8B-Instruct", "xgrammar", "auto", EAGLE_SPEC_CONFIG),
 ]
@@ -151,6 +155,8 @@ def test_structured_output(
         ),
         seed=120,
         tokenizer_mode=tokenizer_mode,
+        load_format="auto" if not model_name.startswith("mistralai/") else "hf",
+        config_format="auto" if not model_name.startswith("mistralai/") else "hf",
         speculative_config=speculative_config,
     )
 
@@ -720,6 +726,8 @@ def test_structured_output_auto_mode(
         max_model_len=1024,
         structured_outputs_config=dict(backend="auto"),
         tokenizer_mode=tokenizer_mode,
+        load_format="auto",
+        config_format="auto",
     )
 
     sampling_params = SamplingParams(
diff --git a/vllm/config/model.py b/vllm/config/model.py
index 8f59673f4e1c3..49688e17cf932 100644
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -81,7 +81,7 @@ TaskOption = Literal[
     "transcription",
     "draft",
 ]
-TokenizerMode = Literal["auto", "slow", "mistral", "custom"]
+TokenizerMode = Literal["auto", "hf", "slow", "mistral", "custom"]
 ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"]
 LogprobsMode = Literal[
     "raw_logits", "raw_logprobs", "processed_logits", "processed_logprobs"
@@ -130,7 +130,8 @@ class ModelConfig:
     name or path will be used."""
     tokenizer_mode: TokenizerMode = "auto"
     """Tokenizer mode:\n
-    - "auto" will use the fast tokenizer if available.\n
+    - "auto" will use "hf" tokenizer if Mistral's tokenizer is not available.\n
+    - "hf" will use the fast tokenizer if available.\n
     - "slow" will always use the slow tokenizer.\n
     - "mistral" will always use the tokenizer from `mistral_common`.\n
     - "custom" will use --tokenizer to select the preregistered tokenizer."""
@@ -241,8 +242,8 @@ class ModelConfig:
     first one."""
     config_format: str | ConfigFormat = "auto"
     """The format of the model config to load:\n
-    - "auto" will try to load the config in hf format if available else it
-    will try to load in mistral format.\n
+    - "auto" will try to load the config in hf format if available after trying
+    to load in mistral format.\n
     - "hf" will load the config in hf format.\n
     - "mistral" will load the config in mistral format."""
     hf_token: bool | str | None = None
diff --git a/vllm/model_executor/model_loader/__init__.py b/vllm/model_executor/model_loader/__init__.py
index 301f2d00bf404..052d2cfc1099e 100644
--- a/vllm/model_executor/model_loader/__init__.py
+++ b/vllm/model_executor/model_loader/__init__.py
@@ -30,6 +30,7 @@ logger = init_logger(__name__)
 # if a new load format is added here
 LoadFormats = Literal[
     "auto",
+    "hf",
     "bitsandbytes",
     "dummy",
     "fastsafetensors",
@@ -45,6 +46,7 @@ LoadFormats = Literal[
 ]
 _LOAD_FORMAT_TO_MODEL_LOADER: dict[str, type[BaseModelLoader]] = {
     "auto": DefaultModelLoader,
+    "hf": DefaultModelLoader,
     "bitsandbytes": BitsAndBytesModelLoader,
     "dummy": DummyModelLoader,
     "fastsafetensors": DefaultModelLoader,
diff --git a/vllm/model_executor/model_loader/default_loader.py b/vllm/model_executor/model_loader/default_loader.py
index 67aa584c6bda2..7401a7a0e2dbb 100644
--- a/vllm/model_executor/model_loader/default_loader.py
+++ b/vllm/model_executor/model_loader/default_loader.py
@@ -31,6 +31,7 @@ from vllm.model_executor.model_loader.weight_utils import (
     safetensors_weights_iterator,
 )
 from vllm.platforms import current_platform
+from vllm.transformers_utils.config import list_filtered_repo_files
 
 logger = init_logger(__name__)
 
@@ -96,8 +97,25 @@ class DefaultModelLoader(BaseModelLoader):
         load_format = self.load_config.load_format
         use_safetensors = False
         index_file = SAFE_WEIGHTS_INDEX_NAME
-        # Some quantized models use .pt files for storing the weights.
+
+        # First check for 'auto' format that mistral files format are present.
+        # This is to load mistral models with official format by default.
         if load_format == "auto":
+            load_format = (
+                "mistral"
+                if len(
+                    list_filtered_repo_files(
+                        model_name_or_path=model_name_or_path,
+                        allow_patterns=["consolidated*.safetensors"],
+                        revision=revision,
+                    )
+                )
+                > 0
+                else "hf"
+            )
+
+        # Some quantized models use .pt files for storing the weights.
+        if load_format == "hf":
             allow_patterns = ["*.safetensors", "*.bin"]
         elif load_format == "safetensors" or load_format == "fastsafetensors":
             use_safetensors = True
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index df24738477e76..9eac7bb50afa6 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import fnmatch
 import json
 import os
 import time
@@ -355,6 +356,41 @@ def list_repo_files(
     return with_retry(lookup_files, "Error retrieving file list")
 
 
+def list_filtered_repo_files(
+    model_name_or_path: str,
+    allow_patterns: list[str],
+    revision: str | None = None,
+    repo_type: str | None = None,
+    token: str | bool | None = None,
+) -> list[str]:
+    try:
+        all_files = list_repo_files(
+            repo_id=model_name_or_path,
+            revision=revision,
+            token=token,
+            repo_type=repo_type,
+        )
+    except Exception:
+        logger.error(
+            "Error retrieving file list. Please ensure your `model_name_or_path`"
+            "`repo_type`, `token` and `revision` arguments are correctly set. "
+            "Returning an empty list."
+        )
+        return []
+
+    file_list = []
+    # Filter patterns on filenames
+    for pattern in allow_patterns:
+        file_list.extend(
+            [
+                file
+                for file in all_files
+                if fnmatch.fnmatch(os.path.basename(file), pattern)
+            ]
+        )
+    return file_list
+
+
 def file_exists(
     repo_id: str,
     file_name: str,
@@ -619,10 +655,14 @@ def get_config(
 
     if config_format == "auto":
         try:
-            if is_gguf or file_or_path_exists(model, HF_CONFIG_NAME, revision=revision):
-                config_format = "hf"
-            elif file_or_path_exists(model, MISTRAL_CONFIG_NAME, revision=revision):
+            # First check for Mistral to avoid defaulting to
+            # Transformers implementation.
+            if file_or_path_exists(model, MISTRAL_CONFIG_NAME, revision=revision):
                 config_format = "mistral"
+            elif is_gguf or file_or_path_exists(
+                model, HF_CONFIG_NAME, revision=revision
+            ):
+                config_format = "hf"
             else:
                 raise ValueError(
                     "Could not detect config format for no config file found. "
diff --git a/vllm/transformers_utils/configs/mistral.py b/vllm/transformers_utils/configs/mistral.py
index fe202b2ed1568..8da4ab35c56c3 100644
--- a/vllm/transformers_utils/configs/mistral.py
+++ b/vllm/transformers_utils/configs/mistral.py
@@ -118,7 +118,7 @@ def _remap_general_mistral_args(config: dict) -> dict:
         "model_type": ("model_type", "transformer"),
         "hidden_act": ("activation", "silu"),
         "tie_word_embeddings": ("tied_embeddings", False),
-        "max_seq_len": ("max_seq_len", 128_000),
+        "max_seq_len": ("max_seq_len", config.get("max_position_embeddings", 128_000)),
         "max_position_embeddings": ("max_position_embeddings", 128_000),
     }
 
diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py
index a393568909d27..233076741503d 100644
--- a/vllm/transformers_utils/tokenizer.py
+++ b/vllm/transformers_utils/tokenizer.py
@@ -3,8 +3,8 @@
 
 import contextlib
 import copy
+import importlib.util
 import os
-import warnings
 from functools import lru_cache
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, TypeAlias
@@ -15,7 +15,10 @@ from typing_extensions import assert_never
 
 from vllm import envs
 from vllm.logger import init_logger
-from vllm.transformers_utils.config import get_sentence_transformer_tokenizer_config
+from vllm.transformers_utils.config import (
+    get_sentence_transformer_tokenizer_config,
+    list_filtered_repo_files,
+)
 from vllm.transformers_utils.tokenizers import MistralTokenizer
 from vllm.transformers_utils.utils import check_gguf_file
 
@@ -182,25 +185,29 @@ def get_tokenizer(
         kwargs["gguf_file"] = Path(tokenizer_name).name
         tokenizer_name = Path(tokenizer_name).parent
 
-    # if tokenizer is from official mistral org
-    is_from_mistral_org = str(tokenizer_name).split("/")[0] == "mistralai"
-    if is_from_mistral_org and tokenizer_mode != "mistral":
-        warnings.warn(
-            "It is strongly recommended to run mistral models with "
-            '`--tokenizer-mode "mistral"` to ensure correct '
-            "encoding and decoding.",
-            FutureWarning,
-            stacklevel=2,
+    # if `tokenizer_mode` == "auto", check if tokenizer can be loaded via Mistral format
+    # first to use official Mistral tokenizer if possible.
+    mistral_common_installed = importlib.util.find_spec("mistral_common") is not None
+    if tokenizer_mode == "auto" and mistral_common_installed:
+        allow_patterns = ["tekken.json", "tokenizer.model.v*"]
+        files_list = list_filtered_repo_files(
+            model_name_or_path=str(tokenizer_name),
+            allow_patterns=allow_patterns,
+            revision=revision,
         )
+        if len(files_list) > 0:
+            tokenizer_mode = "mistral"
 
     tokenizer: AnyTokenizer
     if tokenizer_mode == "mistral":
+        logger.debug_once(f"Loading MistralTokenizer from {tokenizer_name}")
         tokenizer = MistralTokenizer.from_pretrained(
             str(tokenizer_name), revision=revision
         )
     elif tokenizer_mode == "custom":
         from vllm.transformers_utils.tokenizer_base import TokenizerRegistry
 
+        logger.debug_once(f"Loading CustomTokenizer from {tokenizer_name}")
         tokenizer = TokenizerRegistry.get_tokenizer(
             str(tokenizer_name),
             *args,
@@ -210,6 +217,7 @@ def get_tokenizer(
         )
     else:
         try:
+            logger.debug_once(f"Loading AutoTokenizer from {tokenizer_name}")
             tokenizer = AutoTokenizer.from_pretrained(
                 tokenizer_name,
                 *args,
diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py
index 905ad406b307e..af4f0e410e253 100644
--- a/vllm/v1/engine/processor.py
+++ b/vllm/v1/engine/processor.py
@@ -20,6 +20,7 @@ from vllm.multimodal.utils import argsort_mm_positions
 from vllm.pooling_params import PoolingParams
 from vllm.sampling_params import SamplingParams
 from vllm.transformers_utils.tokenizer import AnyTokenizer
+from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer
 from vllm.utils import length_from_prompt_token_ids_or_embeds
 from vllm.v1.engine import EngineCoreRequest
 from vllm.v1.metrics.stats import MultiModalCacheStats
@@ -300,12 +301,24 @@ class Processor:
             # allows <|special_token|> and similar, see
             # https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#special-tokens
             # Without tokenizer these are disallowed in grammars.
+            if isinstance(self.tokenizer, MistralTokenizer):
+                raise ValueError(
+                    "Mistral tokenizer is not supported for the 'guidance' "
+                    "structured output backend. Please use ['xgrammar', 'outlines'] "
+                    "backends or tokenizer_mode='hf' instead."
+                )
             validate_guidance_grammar(params, tokenizer=None)
         elif backend == "outlines":
             # outlines backend
             validate_structured_output_request_outlines(params)
         elif backend == "lm-format-enforcer":
             # lm format enforcer backend
+            if isinstance(self.tokenizer, MistralTokenizer):
+                raise ValueError(
+                    "Mistral tokenizer is not supported for the 'lm-format-enforcer' "
+                    "structured output backend. Please use ['xgrammar', 'outlines'] "
+                    "backends or tokenizer_mode='hf' instead."
+                )
             validate_structured_output_request_lm_format_enforcer(params)
         else:
             # NOTE: backend must be "auto" here, because we have
@@ -320,9 +333,15 @@ class Processor:
             except ValueError:
                 # The request either failed validation
                 # or includes some jsonschema feature(s) that
-                # are not supported in xgrammar. Fall back to guidance.
-                validate_guidance_grammar(params, tokenizer=None)
-                params.structured_outputs._backend = "guidance"
+                # are not supported in xgrammar.
+                if isinstance(self.tokenizer, MistralTokenizer):
+                    # Fall back to outlines if the tokenizer is Mistral
+                    validate_structured_output_request_outlines(params)
+                    params.structured_outputs._backend = "outlines"
+                else:
+                    # Fall back to guidance by default.
+                    validate_guidance_grammar(params, tokenizer=None)
+                    params.structured_outputs._backend = "guidance"
             # Remember that this backend was set automatically
             params.structured_outputs._backend_was_auto = True