From 1c2bec0f8215f57ae3fd40286d944b7201948430 Mon Sep 17 00:00:00 2001
From: wwl2755 <wangwenlong2755@gmail.com>
Date: Sat, 22 Mar 2025 00:36:43 -0500
Subject: [PATCH] [Doc] add load_format items in docs (#14804)

Signed-off-by: wwl2755 <wangwenlong2755@gmail.com>
---
 vllm/config.py           |  6 ++++++
 vllm/engine/arg_utils.py | 10 ++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 1f7147f7cfd41..181fa803c620b 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1294,6 +1294,12 @@ class LoadConfig:
             "tensorizer" will use CoreWeave's tensorizer library for
                 fast weight loading.
             "bitsandbytes" will load nf4 type weights.
+            "sharded_state" will load weights from pre-sharded checkpoint files,
+                supporting efficient loading of tensor-parallel models.
+            "gguf" will load weights from GGUF format files.
+            "mistral" will load weights from consolidated safetensors files used
+                by Mistral models.
+            "runai_streamer" will load weights from RunAI streamer format files.
         model_loader_extra_config: The extra config for the model loader.
         ignore_patterns: The list of patterns to ignore when loading the model.
             Default to "original/**/*" to avoid repeated loading of llama's
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index edfa748b82d7b..e396e68f823d9 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -339,9 +339,15 @@ class EngineArgs:
             'CoreWeave. See the Tensorize vLLM Model script in the Examples '
             'section for more information.\n'
             '* "runai_streamer" will load the Safetensors weights using Run:ai'
-            'Model Streamer \n'
+            'Model Streamer.\n'
             '* "bitsandbytes" will load the weights using bitsandbytes '
-            'quantization.\n')
+            'quantization.\n'
+            '* "sharded_state" will load weights from pre-sharded checkpoint '
+            'files, supporting efficient loading of tensor-parallel models\n'
+            '* "gguf" will load weights from GGUF format files (details '
+            'specified in https://github.com/ggml-org/ggml/blob/master/docs/gguf.md).\n'
+            '* "mistral" will load weights from consolidated safetensors files '
+            'used by Mistral models.\n')
         parser.add_argument(
             '--config-format',
             default=EngineArgs.config_format,