mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 01:45:01 +08:00
[Frontend] Expose do_log_stats interval to env (#22905)
Signed-off-by: Csrayz <jover@cmbchina.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
e8b40c7fa2
commit
a0632a3e03
@ -35,6 +35,7 @@ You can check if this is happening by trying the old defaults with `--generation
|
|||||||
If other strategies don't solve the problem, it's likely that the vLLM instance is stuck somewhere. You can use the following environment variables to help debug the issue:
|
If other strategies don't solve the problem, it's likely that the vLLM instance is stuck somewhere. You can use the following environment variables to help debug the issue:
|
||||||
|
|
||||||
- `export VLLM_LOGGING_LEVEL=DEBUG` to turn on more logging.
|
- `export VLLM_LOGGING_LEVEL=DEBUG` to turn on more logging.
|
||||||
|
- `export VLLM_LOG_STATS_INTERVAL=1.` to get log statistics more frequently for tracking running queue, waiting queue and cache hit states.
|
||||||
- `export CUDA_LAUNCH_BLOCKING=1` to identify which CUDA kernel is causing the problem.
|
- `export CUDA_LAUNCH_BLOCKING=1` to identify which CUDA kernel is causing the problem.
|
||||||
- `export NCCL_DEBUG=TRACE` to turn on more logging for NCCL.
|
- `export NCCL_DEBUG=TRACE` to turn on more logging for NCCL.
|
||||||
- `export VLLM_TRACE_FUNCTION=1` to record all function calls for inspection in the log files to tell which function crashes or hangs. Do not use this flag unless absolutely needed for debugging, it will cause significant delays in startup time.
|
- `export VLLM_TRACE_FUNCTION=1` to record all function calls for inspection in the log files to tell which function crashes or hangs. Do not use this flag unless absolutely needed for debugging, it will cause significant delays in startup time.
|
||||||
|
|||||||
@ -126,7 +126,7 @@ async def lifespan(app: FastAPI):
|
|||||||
|
|
||||||
async def _force_log():
|
async def _force_log():
|
||||||
while True:
|
while True:
|
||||||
await asyncio.sleep(10.)
|
await asyncio.sleep(envs.VLLM_LOG_STATS_INTERVAL)
|
||||||
await engine_client.do_log_stats()
|
await engine_client.do_log_stats()
|
||||||
|
|
||||||
task = asyncio.create_task(_force_log())
|
task = asyncio.create_task(_force_log())
|
||||||
|
|||||||
@ -38,6 +38,7 @@ if TYPE_CHECKING:
|
|||||||
VLLM_LOGGING_PREFIX: str = ""
|
VLLM_LOGGING_PREFIX: str = ""
|
||||||
VLLM_LOGGING_CONFIG_PATH: Optional[str] = None
|
VLLM_LOGGING_CONFIG_PATH: Optional[str] = None
|
||||||
VLLM_LOGITS_PROCESSOR_THREADS: Optional[int] = None
|
VLLM_LOGITS_PROCESSOR_THREADS: Optional[int] = None
|
||||||
|
VLLM_LOG_STATS_INTERVAL: float = 10.
|
||||||
VLLM_TRACE_FUNCTION: int = 0
|
VLLM_TRACE_FUNCTION: int = 0
|
||||||
VLLM_ATTENTION_BACKEND: Optional[str] = None
|
VLLM_ATTENTION_BACKEND: Optional[str] = None
|
||||||
VLLM_USE_FLASHINFER_SAMPLER: Optional[bool] = None
|
VLLM_USE_FLASHINFER_SAMPLER: Optional[bool] = None
|
||||||
@ -436,6 +437,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
lambda: int(os.getenv("VLLM_LOGITS_PROCESSOR_THREADS", "0"))
|
lambda: int(os.getenv("VLLM_LOGITS_PROCESSOR_THREADS", "0"))
|
||||||
if "VLLM_LOGITS_PROCESSOR_THREADS" in os.environ else None,
|
if "VLLM_LOGITS_PROCESSOR_THREADS" in os.environ else None,
|
||||||
|
|
||||||
|
# If set, vllm will log stats at this interval in seconds
|
||||||
|
# If not set, vllm will log stats every 10 seconds.
|
||||||
|
"VLLM_LOG_STATS_INTERVAL":
|
||||||
|
lambda: val if (val := float(os.getenv("VLLM_LOG_STATS_INTERVAL", "10.")))
|
||||||
|
> 0. else 10.,
|
||||||
|
|
||||||
# Trace function calls
|
# Trace function calls
|
||||||
# If set to 1, vllm will trace function calls
|
# If set to 1, vllm will trace function calls
|
||||||
# Useful for debugging
|
# Useful for debugging
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user