From 9d9a2b77f19f68262d5e469c4e82c0f6365ad72d Mon Sep 17 00:00:00 2001 From: Chenheli Hua Date: Thu, 2 Oct 2025 10:27:10 -0700 Subject: [PATCH] [Small] Prevent bypassing media domain restriction via HTTP redirects (#26035) Signed-off-by: Chenheli Hua Signed-off-by: simon-mo --- docs/features/multimodal_inputs.md | 3 +++ docs/usage/security.md | 3 +++ vllm/connections.py | 22 +++++++++++++++++----- vllm/envs.py | 6 ++++++ vllm/multimodal/utils.py | 12 ++++++++++-- 5 files changed, 39 insertions(+), 7 deletions(-) diff --git a/docs/features/multimodal_inputs.md b/docs/features/multimodal_inputs.md index bcc48e756046..4438c398d342 100644 --- a/docs/features/multimodal_inputs.md +++ b/docs/features/multimodal_inputs.md @@ -8,6 +8,9 @@ This page teaches you how to pass multi-modal inputs to [multi-modal models][sup !!! tip When serving multi-modal models, consider setting `--allowed-media-domains` to restrict domain that vLLM can access to prevent it from accessing arbitrary endpoints that can potentially be vulnerable to Server-Side Request Forgery (SSRF) attacks. You can provide a list of domains for this arg. For example: `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com` + + Also, consider setting `VLLM_MEDIA_URL_ALLOW_REDIRECTS=0` to prevent HTTP redirects from being followed to bypass domain restrictions. + This restriction is especially important if you run vLLM in a containerized environment where the vLLM pods may have unrestricted access to internal networks. ## Offline Inference diff --git a/docs/usage/security.md b/docs/usage/security.md index 5d85e889c80c..9d10b66a5a97 100644 --- a/docs/usage/security.md +++ b/docs/usage/security.md @@ -66,6 +66,9 @@ Restrict domains that vLLM can access for media URLs by setting `--allowed-media-domains` to prevent Server-Side Request Forgery (SSRF) attacks. (e.g. `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com`) +Also, consider setting `VLLM_MEDIA_URL_ALLOW_REDIRECTS=0` to prevent HTTP +redirects from being followed to bypass domain restrictions. + ## Security and Firewalls: Protecting Exposed vLLM Systems While vLLM is designed to allow unsafe network services to be isolated to diff --git a/vllm/connections.py b/vllm/connections.py index 103505eb3d81..1f341719ae30 100644 --- a/vllm/connections.py +++ b/vllm/connections.py @@ -54,6 +54,7 @@ class HTTPConnection: stream: bool = False, timeout: Optional[float] = None, extra_headers: Optional[Mapping[str, str]] = None, + allow_redirects: bool = True, ): self._validate_http_url(url) @@ -63,7 +64,8 @@ class HTTPConnection: return client.get(url, headers=self._headers(**extra_headers), stream=stream, - timeout=timeout) + timeout=timeout, + allow_redirects=allow_redirects) async def get_async_response( self, @@ -71,6 +73,7 @@ class HTTPConnection: *, timeout: Optional[float] = None, extra_headers: Optional[Mapping[str, str]] = None, + allow_redirects: bool = True, ): self._validate_http_url(url) @@ -79,10 +82,17 @@ class HTTPConnection: return client.get(url, headers=self._headers(**extra_headers), - timeout=timeout) + timeout=timeout, + allow_redirects=allow_redirects) - def get_bytes(self, url: str, *, timeout: Optional[float] = None) -> bytes: - with self.get_response(url, timeout=timeout) as r: + def get_bytes(self, + url: str, + *, + timeout: Optional[float] = None, + allow_redirects: bool = True) -> bytes: + with self.get_response(url, + timeout=timeout, + allow_redirects=allow_redirects) as r: r.raise_for_status() return r.content @@ -92,8 +102,10 @@ class HTTPConnection: url: str, *, timeout: Optional[float] = None, + allow_redirects: bool = True, ) -> bytes: - async with await self.get_async_response(url, timeout=timeout) as r: + async with await self.get_async_response( + url, timeout=timeout, allow_redirects=allow_redirects) as r: r.raise_for_status() return await r.read() diff --git a/vllm/envs.py b/vllm/envs.py index 832d031f998e..e2ba31f3a7da 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -68,6 +68,7 @@ if TYPE_CHECKING: VLLM_IMAGE_FETCH_TIMEOUT: int = 5 VLLM_VIDEO_FETCH_TIMEOUT: int = 30 VLLM_AUDIO_FETCH_TIMEOUT: int = 10 + VLLM_MEDIA_URL_ALLOW_REDIRECTS: bool = True VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8 VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25 VLLM_VIDEO_LOADER_BACKEND: str = "opencv" @@ -725,6 +726,11 @@ environment_variables: dict[str, Callable[[], Any]] = { "VLLM_AUDIO_FETCH_TIMEOUT": lambda: int(os.getenv("VLLM_AUDIO_FETCH_TIMEOUT", "10")), + # Whether to allow HTTP redirects when fetching from media URLs. + # Default to True + "VLLM_MEDIA_URL_ALLOW_REDIRECTS": + lambda: bool(int(os.getenv("VLLM_MEDIA_URL_ALLOW_REDIRECTS", "1"))), + # Max number of workers for the thread pool handling # media bytes loading. Set to 1 to disable parallel processing. # Default is 8 diff --git a/vllm/multimodal/utils.py b/vllm/multimodal/utils.py index 1f1eea6bfee7..bab12fd1681a 100644 --- a/vllm/multimodal/utils.py +++ b/vllm/multimodal/utils.py @@ -140,7 +140,11 @@ class MediaConnector: self._assert_url_in_allowed_media_domains(url_spec) connection = self.connection - data = connection.get_bytes(url, timeout=fetch_timeout) + data = connection.get_bytes( + url, + timeout=fetch_timeout, + allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS, + ) return media_io.load_bytes(data) @@ -167,7 +171,11 @@ class MediaConnector: self._assert_url_in_allowed_media_domains(url_spec) connection = self.connection - data = await connection.async_get_bytes(url, timeout=fetch_timeout) + data = await connection.async_get_bytes( + url, + timeout=fetch_timeout, + allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS, + ) future = loop.run_in_executor(global_thread_pool, media_io.load_bytes, data) return await future