Limit HTTP header count and size (#23267)

Signed-off-by: Taneem Ibrahim <taneem.ibrahim@gmail.com>
Signed-off-by: Russell Bryant <rbryant@redhat.com>
Co-authored-by: Taneem Ibrahim <taneem.ibrahim@gmail.com>
This commit is contained in:
Russell Bryant 2025-08-20 13:57:37 -04:00 committed by GitHub
parent c4477f55e5
commit f77a0802b7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 41 additions and 0 deletions

View File

@ -0,0 +1,10 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Shared constants for vLLM entrypoints.
"""
# HTTP header limits for h11 parser
# These constants help mitigate header abuse attacks
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT = 4194304 # 4 MB
H11_MAX_HEADER_COUNT_DEFAULT = 256

View File

@ -14,6 +14,8 @@ from vllm import envs
from vllm.engine.async_llm_engine import AsyncEngineDeadError from vllm.engine.async_llm_engine import AsyncEngineDeadError
from vllm.engine.multiprocessing import MQEngineDeadError from vllm.engine.multiprocessing import MQEngineDeadError
from vllm.engine.protocol import EngineClient from vllm.engine.protocol import EngineClient
from vllm.entrypoints.constants import (H11_MAX_HEADER_COUNT_DEFAULT,
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT)
from vllm.entrypoints.ssl import SSLCertRefresher from vllm.entrypoints.ssl import SSLCertRefresher
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils import find_process_using_port from vllm.utils import find_process_using_port
@ -26,6 +28,11 @@ async def serve_http(app: FastAPI,
sock: Optional[socket.socket], sock: Optional[socket.socket],
enable_ssl_refresh: bool = False, enable_ssl_refresh: bool = False,
**uvicorn_kwargs: Any): **uvicorn_kwargs: Any):
"""
Start a FastAPI app using Uvicorn, with support for custom Uvicorn config
options. Supports http header limits via h11_max_incomplete_event_size and
h11_max_header_count.
"""
logger.info("Available routes are:") logger.info("Available routes are:")
for route in app.routes: for route in app.routes:
methods = getattr(route, "methods", None) methods = getattr(route, "methods", None)
@ -36,7 +43,21 @@ async def serve_http(app: FastAPI,
logger.info("Route: %s, Methods: %s", path, ', '.join(methods)) logger.info("Route: %s, Methods: %s", path, ', '.join(methods))
# Extract header limit options if present
h11_max_incomplete_event_size = uvicorn_kwargs.pop(
"h11_max_incomplete_event_size", None)
h11_max_header_count = uvicorn_kwargs.pop("h11_max_header_count", None)
# Set safe defaults if not provided
if h11_max_incomplete_event_size is None:
h11_max_incomplete_event_size = H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
if h11_max_header_count is None:
h11_max_header_count = H11_MAX_HEADER_COUNT_DEFAULT
config = uvicorn.Config(app, **uvicorn_kwargs) config = uvicorn.Config(app, **uvicorn_kwargs)
# Set header limits
config.h11_max_incomplete_event_size = h11_max_incomplete_event_size
config.h11_max_header_count = h11_max_header_count
config.load() config.load()
server = uvicorn.Server(config) server = uvicorn.Server(config)
_add_shutdown_handlers(app, server) _add_shutdown_handlers(app, server)

View File

@ -1922,6 +1922,8 @@ async def run_server_worker(listen_address,
ssl_certfile=args.ssl_certfile, ssl_certfile=args.ssl_certfile,
ssl_ca_certs=args.ssl_ca_certs, ssl_ca_certs=args.ssl_ca_certs,
ssl_cert_reqs=args.ssl_cert_reqs, ssl_cert_reqs=args.ssl_cert_reqs,
h11_max_incomplete_event_size=args.h11_max_incomplete_event_size,
h11_max_header_count=args.h11_max_header_count,
**uvicorn_kwargs, **uvicorn_kwargs,
) )

View File

@ -20,6 +20,8 @@ from vllm.config import config
from vllm.engine.arg_utils import AsyncEngineArgs, optional_type from vllm.engine.arg_utils import AsyncEngineArgs, optional_type
from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption, from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption,
validate_chat_template) validate_chat_template)
from vllm.entrypoints.constants import (H11_MAX_HEADER_COUNT_DEFAULT,
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT)
from vllm.entrypoints.openai.serving_models import LoRAModulePath from vllm.entrypoints.openai.serving_models import LoRAModulePath
from vllm.entrypoints.openai.tool_parsers import ToolParserManager from vllm.entrypoints.openai.tool_parsers import ToolParserManager
from vllm.logger import init_logger from vllm.logger import init_logger
@ -172,6 +174,12 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
enable_log_outputs: bool = False enable_log_outputs: bool = False
"""If set to True, enable logging of model outputs (generations) """If set to True, enable logging of model outputs (generations)
in addition to the input logging that is enabled by default.""" in addition to the input logging that is enabled by default."""
h11_max_incomplete_event_size: int = H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
"""Maximum size (bytes) of an incomplete HTTP event (header or body) for
h11 parser. Helps mitigate header abuse. Default: 4194304 (4 MB)."""
h11_max_header_count: int = H11_MAX_HEADER_COUNT_DEFAULT
"""Maximum number of HTTP headers allowed in a request for h11 parser.
Helps mitigate header abuse. Default: 256."""
@staticmethod @staticmethod
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: