[Small] Prevent bypassing media domain restriction via HTTP redirects (#26035)

Signed-off-by: Chenheli Hua <huachenheli@outlook.com> Signed-off-by: simon-mo <simon.mo@hey.com>
2025-12-10 05:34:57 +08:00 · 2025-10-02 10:27:10 -07:00 · 2025-10-02 10:27:10 -07:00 · 9d9a2b77f1
commit 9d9a2b77f1
parent 6040e0b6c0
5 changed files with 39 additions and 7 deletions
--- a/docs/features/multimodal_inputs.md
+++ b/docs/features/multimodal_inputs.md
@ -8,6 +8,9 @@ This page teaches you how to pass multi-modal inputs to [multi-modal models][sup
 !!! tip
    When serving multi-modal models, consider setting `--allowed-media-domains` to restrict domain that vLLM can access to prevent it from accessing arbitrary endpoints that can potentially be vulnerable to Server-Side Request Forgery (SSRF) attacks. You can provide a list of domains for this arg. For example: `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com`
    Also, consider setting `VLLM_MEDIA_URL_ALLOW_REDIRECTS=0` to prevent HTTP redirects from being followed to bypass domain restrictions.
    This restriction is especially important if you run vLLM in a containerized environment where the vLLM pods may have unrestricted access to internal networks.
 ## Offline Inference
--- a/docs/usage/security.md
+++ b/docs/usage/security.md
@ -66,6 +66,9 @@ Restrict domains that vLLM can access for media URLs by setting
 `--allowed-media-domains` to prevent Server-Side Request Forgery (SSRF) attacks.
 (e.g. `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com`)
 Also, consider setting `VLLM_MEDIA_URL_ALLOW_REDIRECTS=0` to prevent HTTP
 redirects from being followed to bypass domain restrictions.
 ## Security and Firewalls: Protecting Exposed vLLM Systems
 While vLLM is designed to allow unsafe network services to be isolated to
--- a/vllm/connections.py
+++ b/vllm/connections.py
@ -54,6 +54,7 @@ class HTTPConnection:
        stream: bool = False,
        timeout: Optional[float] = None,
        extra_headers: Optional[Mapping[str, str]] = None,
        allow_redirects: bool = True,
    ):
        self._validate_http_url(url)
@ -63,7 +64,8 @@ class HTTPConnection:
        return client.get(url,
                          headers=self._headers(**extra_headers),
                          stream=stream,
-                          timeout=timeout)
+                          timeout=timeout,
                          allow_redirects=allow_redirects)
    async def get_async_response(
        self,
@ -71,6 +73,7 @@ class HTTPConnection:
        *,
        timeout: Optional[float] = None,
        extra_headers: Optional[Mapping[str, str]] = None,
        allow_redirects: bool = True,
    ):
        self._validate_http_url(url)
@ -79,10 +82,17 @@ class HTTPConnection:
        return client.get(url,
                          headers=self._headers(**extra_headers),
-                          timeout=timeout)
+                          timeout=timeout,
                          allow_redirects=allow_redirects)
-    def get_bytes(self, url: str, *, timeout: Optional[float] = None) -> bytes:
+    def get_bytes(self,
-        with self.get_response(url, timeout=timeout) as r:
+                  url: str,
                  *,
                  timeout: Optional[float] = None,
                  allow_redirects: bool = True) -> bytes:
        with self.get_response(url,
                               timeout=timeout,
                               allow_redirects=allow_redirects) as r:
            r.raise_for_status()
            return r.content
@ -92,8 +102,10 @@ class HTTPConnection:
        url: str,
        *,
        timeout: Optional[float] = None,
        allow_redirects: bool = True,
    ) -> bytes:
-        async with await self.get_async_response(url, timeout=timeout) as r:
+        async with await self.get_async_response(
                url, timeout=timeout, allow_redirects=allow_redirects) as r:
            r.raise_for_status()
            return await r.read()
--- a/vllm/envs.py
+++ b/vllm/envs.py
@ -68,6 +68,7 @@ if TYPE_CHECKING:
    VLLM_IMAGE_FETCH_TIMEOUT: int = 5
    VLLM_VIDEO_FETCH_TIMEOUT: int = 30
    VLLM_AUDIO_FETCH_TIMEOUT: int = 10
    VLLM_MEDIA_URL_ALLOW_REDIRECTS: bool = True
    VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8
    VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25
    VLLM_VIDEO_LOADER_BACKEND: str = "opencv"
@ -725,6 +726,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "VLLM_AUDIO_FETCH_TIMEOUT":
    lambda: int(os.getenv("VLLM_AUDIO_FETCH_TIMEOUT", "10")),
    # Whether to allow HTTP redirects when fetching from media URLs.
    # Default to True
    "VLLM_MEDIA_URL_ALLOW_REDIRECTS":
    lambda: bool(int(os.getenv("VLLM_MEDIA_URL_ALLOW_REDIRECTS", "1"))),
    # Max number of workers for the thread pool handling
    # media bytes loading. Set to 1 to disable parallel processing.
    # Default is 8
--- a/vllm/multimodal/utils.py
+++ b/vllm/multimodal/utils.py
@ -140,7 +140,11 @@ class MediaConnector:
            self._assert_url_in_allowed_media_domains(url_spec)
            connection = self.connection
-            data = connection.get_bytes(url, timeout=fetch_timeout)
+            data = connection.get_bytes(
                url,
                timeout=fetch_timeout,
                allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,
            )
            return media_io.load_bytes(data)
@ -167,7 +171,11 @@ class MediaConnector:
            self._assert_url_in_allowed_media_domains(url_spec)
            connection = self.connection
-            data = await connection.async_get_bytes(url, timeout=fetch_timeout)
+            data = await connection.async_get_bytes(
                url,
                timeout=fetch_timeout,
                allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,
            )
            future = loop.run_in_executor(global_thread_pool,
                                          media_io.load_bytes, data)
            return await future