From 350c94deb30747f84536ee34d91c6fca564667ce Mon Sep 17 00:00:00 2001
From: "rongfu.leng" <rongfu.leng@daocloud.io>
Date: Thu, 18 Sep 2025 15:47:43 +0800
Subject: [PATCH] [Bugfix] when use s3 model cannot use default load_format
 (#24435)

Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io>
Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com>
---
 vllm/config/__init__.py  | 12 ++++++++++++
 vllm/engine/arg_utils.py |  1 -
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py
index 64be2f38c6a31..631618d427d42 100644
--- a/vllm/config/__init__.py
+++ b/vllm/config/__init__.py
@@ -3029,6 +3029,18 @@ class VllmConfig:
                 SequenceClassificationConfig)
             SequenceClassificationConfig.verify_and_update_config(self)
 
+        if hasattr(self.model_config, "model_weights") and is_runai_obj_uri(
+                self.model_config.model_weights):
+            if self.load_config.load_format == "auto":
+                logger.info("Detected Run:ai model config. "
+                            "Overriding `load_format` to 'runai_streamer'")
+                self.load_config.load_format = "runai_streamer"
+            elif self.load_config.load_format != "runai_streamer":
+                raise ValueError(f"To load a model from S3, 'load_format' "
+                                 f"must be 'runai_streamer', "
+                                 f"but got '{self.load_config.load_format}'. "
+                                 f"Model: {self.model_config.model}")
+
     def __str__(self):
         return (
             f"model={self.model_config.model!r}, "
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 4831cb5348c77..e2a1ec68e6f53 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -959,7 +959,6 @@ class EngineArgs:
         if (not isinstance(self, AsyncEngineArgs) and envs.VLLM_CI_USE_S3
                 and self.model in MODELS_ON_S3 and self.load_format == "auto"):
             self.model = f"{MODEL_WEIGHTS_S3_BUCKET}/{self.model}"
-            self.load_format = "runai_streamer"
 
         if self.disable_mm_preprocessor_cache:
             logger.warning(