mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 15:06:58 +08:00
133 lines
4.3 KiB
Python
133 lines
4.3 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
import gc
|
|
import json
|
|
import time
|
|
from collections import Counter
|
|
from contextlib import suppress
|
|
from typing import Any
|
|
|
|
import vllm.envs as envs
|
|
from vllm.logger import init_logger
|
|
|
|
logger = init_logger(__name__)
|
|
|
|
|
|
class GCDebugConfig:
|
|
"""
|
|
Config for GC Debugger.
|
|
- 0: disable GC debugger
|
|
- 1: enable GC debugger with gc.collect elpased times
|
|
- '{"top_objects":5}': enable GC debugger with top 5 collected objects
|
|
"""
|
|
|
|
def __init__(self, gc_debug_conf: str | None = None) -> None:
|
|
self.enabled: bool = False
|
|
self.top_objects: int = -1
|
|
|
|
if not gc_debug_conf or gc_debug_conf == "0":
|
|
pass
|
|
elif gc_debug_conf == "1":
|
|
self.enabled = True
|
|
else:
|
|
try:
|
|
json_conf = json.loads(gc_debug_conf)
|
|
self.enabled = True
|
|
self.top_objects = json_conf.get("top_objects", -1)
|
|
except Exception:
|
|
self.enabled = False
|
|
logger.error("Failed to parse VLLM_GC_DEBUG(%s)", envs.VLLM_GC_DEBUG)
|
|
logger.debug("GC Debug Config. %s", str(self))
|
|
|
|
def __repr__(self) -> str:
|
|
return f"enabled:{self.enabled},top_objects:{self.top_objects}"
|
|
|
|
|
|
class GCDebugger:
|
|
"""
|
|
Debugger for GC which logs helpful information for GC understanding.
|
|
To enable, you should call maybe_attach_gc_debug_callback in the process.
|
|
"""
|
|
|
|
def __init__(self, config: GCDebugConfig) -> None:
|
|
self.config = config
|
|
# Start time in micro second of this GC cycle
|
|
self.start_time_ns: int = time.monotonic_ns()
|
|
# If config.top_objects is positive,
|
|
# compute top collected objects by object types
|
|
self.gc_top_collected_objects: str = ""
|
|
|
|
def handle(self, phase: str, info: dict[str, int]) -> None:
|
|
"""
|
|
Handles a GC event (e.g. GC start or GC finish)
|
|
"""
|
|
generation = info.get("generation")
|
|
if generation is None:
|
|
return
|
|
if phase == "start":
|
|
# Before GC started, record GC start time
|
|
# and top collected objects
|
|
self.start_time_ns = time.monotonic_ns()
|
|
self.gc_top_collected_objects = _compute_top_gc_collected_objects(
|
|
gc.get_objects(generation), self.config.top_objects
|
|
)
|
|
elif phase == "stop":
|
|
# After GC finished, Record GC elapsed time and
|
|
# optionally top collected objects
|
|
elpased_ms = (time.monotonic_ns() - self.start_time_ns) / 1e6
|
|
logger.info(
|
|
"GC took %.3fms to complete. "
|
|
"Collected %s objects in GC generation %d.%s",
|
|
elpased_ms,
|
|
str(info.get("collected", "?")),
|
|
generation,
|
|
(
|
|
f" Top collected objects: \n{self.gc_top_collected_objects}"
|
|
if self.gc_top_collected_objects
|
|
else ""
|
|
),
|
|
)
|
|
|
|
|
|
def maybe_attach_gc_debug_callback() -> None:
|
|
"""
|
|
Attached a callback for GC debug when VLLM_GC_DEBUG is enabled.
|
|
"""
|
|
config = GCDebugConfig(envs.VLLM_GC_DEBUG)
|
|
if config.enabled:
|
|
debugger: GCDebugger = GCDebugger(config)
|
|
|
|
def gc_callback(phase: str, info: dict[str, int]) -> None:
|
|
debugger.handle(phase, info)
|
|
|
|
gc.callbacks.append(gc_callback)
|
|
|
|
|
|
def _compute_detailed_type(o: Any) -> str:
|
|
"""
|
|
Detailed object type.
|
|
|
|
TODO(Jialin): Further enhance the detailed type with element types for
|
|
easier debugging. We tried but occasionally it would run into signals
|
|
which kills the engine.
|
|
"""
|
|
size_str: str = ""
|
|
# Object doesn't support len() - this can happen with type objects
|
|
# or other objects that don't implement __len__ properly
|
|
with suppress(Exception):
|
|
size_str = f"(size:{len(o)})"
|
|
return f"{str(type(o))}{size_str}"
|
|
|
|
|
|
def _compute_top_gc_collected_objects(objects: list[Any], top: int) -> str:
|
|
"""
|
|
Group collected objects by types.
|
|
"""
|
|
if top <= 0:
|
|
return ""
|
|
object_types = [_compute_detailed_type(o) for o in objects]
|
|
return "\n".join(
|
|
f"{count:>5}:{object_type}"
|
|
for object_type, count in Counter(object_types).most_common(top)
|
|
)
|