mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-29 06:34:32 +08:00
[2/N] API Server: Avoid ulimit footgun (#11530)
This commit is contained in:
parent
2072924d14
commit
55fb97f7bd
@ -21,7 +21,7 @@ from vllm.entrypoints.utils import with_cancellation
|
||||
from vllm.logger import init_logger
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import FlexibleArgumentParser, random_uuid
|
||||
from vllm.utils import FlexibleArgumentParser, random_uuid, set_ulimit
|
||||
from vllm.version import __version__ as VLLM_VERSION
|
||||
|
||||
logger = init_logger("vllm.entrypoints.api_server")
|
||||
@ -119,6 +119,8 @@ async def run_server(args: Namespace,
|
||||
logger.info("vLLM API server version %s", VLLM_VERSION)
|
||||
logger.info("args: %s", args)
|
||||
|
||||
set_ulimit()
|
||||
|
||||
app = await init_app(args, llm_engine)
|
||||
assert engine is not None
|
||||
|
||||
|
||||
@ -68,7 +68,7 @@ from vllm.entrypoints.utils import with_cancellation
|
||||
from vllm.logger import init_logger
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path,
|
||||
is_valid_ipv6_address)
|
||||
is_valid_ipv6_address, set_ulimit)
|
||||
from vllm.version import __version__ as VLLM_VERSION
|
||||
|
||||
TIMEOUT_KEEP_ALIVE = 5 # seconds
|
||||
@ -727,6 +727,10 @@ async def run_server(args, **uvicorn_kwargs) -> None:
|
||||
sock_addr = (args.host or "", args.port)
|
||||
sock = create_server_socket(sock_addr)
|
||||
|
||||
# workaround to avoid footguns where uvicorn drops requests with too
|
||||
# many concurrent requests active
|
||||
set_ulimit()
|
||||
|
||||
def signal_handler(*_) -> None:
|
||||
# Interrupt server on sigterm while initializing
|
||||
raise KeyboardInterrupt("terminated")
|
||||
|
||||
@ -12,6 +12,7 @@ import inspect
|
||||
import ipaddress
|
||||
import os
|
||||
import re
|
||||
import resource
|
||||
import signal
|
||||
import socket
|
||||
import subprocess
|
||||
@ -1818,3 +1819,20 @@ def memory_profiling(
|
||||
result.non_torch_increase_in_bytes = current_cuda_memory_bytes - baseline_memory_in_bytes - weights_memory_in_bytes - diff.torch_memory_in_bytes # noqa
|
||||
result.profile_time = diff.timestamp
|
||||
result.non_kv_cache_memory_in_bytes = result.non_torch_increase_in_bytes + result.torch_peak_increase_in_bytes + result.weights_memory_in_bytes # noqa
|
||||
|
||||
|
||||
# Adapted from: https://github.com/sgl-project/sglang/blob/f46f394f4d4dbe4aae85403dec006199b34d2840/python/sglang/srt/utils.py#L630 # noqa: E501Curre
|
||||
def set_ulimit(target_soft_limit=65535):
|
||||
resource_type = resource.RLIMIT_NOFILE
|
||||
current_soft, current_hard = resource.getrlimit(resource_type)
|
||||
|
||||
if current_soft < target_soft_limit:
|
||||
try:
|
||||
resource.setrlimit(resource_type,
|
||||
(target_soft_limit, current_hard))
|
||||
except ValueError as e:
|
||||
logger.warning(
|
||||
"Found ulimit of %s and failed to automatically increase"
|
||||
"with error %s. This can cause fd limit errors like"
|
||||
"`OSError: [Errno 24] Too many open files`. Consider "
|
||||
"increasing with ulimit -n", current_soft, e)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user