From 1c2bec0f8215f57ae3fd40286d944b7201948430 Mon Sep 17 00:00:00 2001 From: wwl2755 Date: Sat, 22 Mar 2025 00:36:43 -0500 Subject: [PATCH] [Doc] add load_format items in docs (#14804) Signed-off-by: wwl2755 --- vllm/config.py | 6 ++++++ vllm/engine/arg_utils.py | 10 ++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 1f7147f7cfd41..181fa803c620b 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -1294,6 +1294,12 @@ class LoadConfig: "tensorizer" will use CoreWeave's tensorizer library for fast weight loading. "bitsandbytes" will load nf4 type weights. + "sharded_state" will load weights from pre-sharded checkpoint files, + supporting efficient loading of tensor-parallel models. + "gguf" will load weights from GGUF format files. + "mistral" will load weights from consolidated safetensors files used + by Mistral models. + "runai_streamer" will load weights from RunAI streamer format files. model_loader_extra_config: The extra config for the model loader. ignore_patterns: The list of patterns to ignore when loading the model. Default to "original/**/*" to avoid repeated loading of llama's diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index edfa748b82d7b..e396e68f823d9 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -339,9 +339,15 @@ class EngineArgs: 'CoreWeave. See the Tensorize vLLM Model script in the Examples ' 'section for more information.\n' '* "runai_streamer" will load the Safetensors weights using Run:ai' - 'Model Streamer \n' + 'Model Streamer.\n' '* "bitsandbytes" will load the weights using bitsandbytes ' - 'quantization.\n') + 'quantization.\n' + '* "sharded_state" will load weights from pre-sharded checkpoint ' + 'files, supporting efficient loading of tensor-parallel models\n' + '* "gguf" will load weights from GGUF format files (details ' + 'specified in https://github.com/ggml-org/ggml/blob/master/docs/gguf.md).\n' + '* "mistral" will load weights from consolidated safetensors files ' + 'used by Mistral models.\n') parser.add_argument( '--config-format', default=EngineArgs.config_format,