diff --git a/docs/contributing/profiling.md b/docs/contributing/profiling.md index 5b83d93274f0d..a1b7927a95d16 100644 --- a/docs/contributing/profiling.md +++ b/docs/contributing/profiling.md @@ -208,3 +208,11 @@ One example is [snakeviz](https://jiffyclub.github.io/snakeviz/). pip install snakeviz snakeviz expensive_function.prof ``` + +### Analyzing Garbage Collection Costs + +Leverage VLLM_GC_DEBUG environment variable to debug GC costs. + +- VLLM_GC_DEBUG=1: enable GC debugger with gc.collect elpased times +- VLLM_GC_DEBUG='{"top_objects":5}': enable GC debugger to log top 5 + collected objects for each gc.collect diff --git a/tests/utils_/test_gc_utils.py b/tests/utils_/test_gc_utils.py new file mode 100644 index 0000000000000..265761b069ca7 --- /dev/null +++ b/tests/utils_/test_gc_utils.py @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from dataclasses import dataclass +from typing import Any + +from vllm.utils.gc_utils import (GCDebugConfig, _compute_detailed_type, + _compute_top_gc_collected_objects) + + +@dataclass +class Normal: + v: int + + +@dataclass +class ListWrapper: + vs: list[int] + + def __len__(self) -> int: + return len(self.vs) + + +def test_compute_detailed_type(): + assert _compute_detailed_type( + Normal(v=8)) == "" + + assert _compute_detailed_type([1, 2, 3]) == "(size:3)" + assert _compute_detailed_type({4, 5}) == "(size:2)" + assert _compute_detailed_type({6: 7}) == "(size:1)" + assert _compute_detailed_type(ListWrapper( + vs=[])) == "(size:0)" + + +def test_compute_top_gc_collected_objects(): + objects: list[Any] = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], + {13, 14}, { + 15: 16, + 17: 18 + }, + Normal(v=19), + Normal(v=20), + Normal(v=21)] + assert _compute_top_gc_collected_objects(objects, top=-1) == "" + assert _compute_top_gc_collected_objects(objects, top=0) == "" + assert _compute_top_gc_collected_objects( + objects, top=1) == " 4:(size:3)" + assert _compute_top_gc_collected_objects(objects, top=2) == "\n".join([ + " 4:(size:3)", + " 3:" + ]) + assert _compute_top_gc_collected_objects(objects, top=3) == "\n".join([ + " 4:(size:3)", + " 3:", + " 1:(size:2)" + ]) + + +def test_gc_debug_config(): + assert not GCDebugConfig(None).enabled + assert not GCDebugConfig("").enabled + assert not GCDebugConfig("0").enabled + + config = GCDebugConfig("1") + assert config.enabled + assert config.top_objects == -1 + + config = GCDebugConfig("{\"top_objects\":5}") + assert config.enabled + assert config.top_objects == 5 diff --git a/vllm/envs.py b/vllm/envs.py index 8543280443047..f06c860b8297c 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -205,6 +205,7 @@ if TYPE_CHECKING: VLLM_USE_NCCL_SYMM_MEM: bool = False VLLM_NCCL_INCLUDE_PATH: Optional[str] = None VLLM_USE_FBGEMM: bool = False + VLLM_GC_DEBUG: str = "" def get_default_cache_root(): @@ -1475,6 +1476,13 @@ environment_variables: dict[str, Callable[[], Any]] = { lambda: os.environ.get("VLLM_NCCL_INCLUDE_PATH", None), # Flag to enable FBGemm kernels on model execution "VLLM_USE_FBGEMM": lambda: bool(int(os.getenv("VLLM_USE_FBGEMM", "0"))), + + # GC debug config + # - VLLM_GC_DEBUG=0: disable GC debugger + # - VLLM_GC_DEBUG=1: enable GC debugger with gc.collect elpased times + # - VLLM_GC_DEBUG='{"top_objects":5}': enable GC debugger with + # top 5 collected objects + "VLLM_GC_DEBUG": lambda: os.getenv("VLLM_GC_DEBUG", ""), } # --8<-- [end:env-vars-definition] diff --git a/vllm/utils/gc_utils.py b/vllm/utils/gc_utils.py new file mode 100644 index 0000000000000..8ce2c200e2996 --- /dev/null +++ b/vllm/utils/gc_utils.py @@ -0,0 +1,128 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import gc +import json +import time +from collections import Counter +from contextlib import suppress +from typing import Any, Optional + +from vllm.envs import VLLM_GC_DEBUG +from vllm.logger import init_logger + +logger = init_logger(__name__) + + +class GCDebugConfig: + """ + Config for GC Debugger. + - 0: disable GC debugger + - 1: enable GC debugger with gc.collect elpased times + - '{"top_objects":5}': enable GC debugger with top 5 collected objects + """ + + def __init__(self, gc_debug_conf: Optional[str] = None) -> None: + self.enabled: bool = False + self.top_objects: int = -1 + + if not gc_debug_conf or gc_debug_conf == "0": + pass + elif gc_debug_conf == "1": + self.enabled = True + else: + try: + json_conf = json.loads(gc_debug_conf) + self.enabled = True + self.top_objects = json_conf.get("top_objects", -1) + except Exception: + self.enabled = False + logger.error("Failed to parse VLLM_GC_DEBUG(%s)", + VLLM_GC_DEBUG) + logger.info("GC Debug Config. %s", str(self)) + + def __repr__(self) -> str: + return f"enabled:{self.enabled},top_objects:{self.top_objects}" + + +class GCDebugger: + """ + Debugger for GC which logs helpful information for GC understanding. + To enable, you should call maybe_attach_gc_debug_callback in the process. + """ + + def __init__(self, config: GCDebugConfig) -> None: + self.config = config + # Start time in micro second of this GC cycle + self.start_time_ns: int = time.monotonic_ns() + # If config.top_objects is positive, + # compute top collected objects by object types + self.gc_top_collected_objects: str = "" + + def handle(self, phase: str, info: dict[str, int]) -> None: + """ + Handles a GC event (e.g. GC start or GC finish) + """ + generation = info.get("generation") + if generation is None: + return + if phase == "start": + # Before GC started, record GC start time + # and top collected objects + self.start_time_ns = time.monotonic_ns() + self.gc_top_collected_objects = _compute_top_gc_collected_objects( + gc.get_objects(generation), self.config.top_objects) + elif phase == "stop": + # After GC finished, Record GC elapsed time and + # optionally top collected objects + elpased_ms = (time.monotonic_ns() - self.start_time_ns) / 1e6 + logger.info( + "GC took %.3fms to complete. " + "Collected %s objects in GC generation %d.%s", + elpased_ms, + str(info.get("collected", "?")), + generation, + (f" Top collected objects: \n{self.gc_top_collected_objects}" + if self.gc_top_collected_objects else ""), + ) + + +def maybe_attach_gc_debug_callback() -> None: + """ + Attached a callback for GC debug when VLLM_GC_DEBUG is enabled. + """ + config = GCDebugConfig(VLLM_GC_DEBUG) + if config.enabled: + debugger: GCDebugger = GCDebugger(config) + + def gc_callback(phase: str, info: dict[str, int]) -> None: + debugger.handle(phase, info) + + gc.callbacks.append(gc_callback) + + +def _compute_detailed_type(o: Any) -> str: + """ + Detailed object type. + + TODO(Jialin): Further enhance the detailed type with element types for + easier debugging. We tried but occasionally it would run into signals + which kills the engine. + """ + size_str: str = "" + # Object doesn't support len() - this can happen with type objects + # or other objects that don't implement __len__ properly + with suppress(Exception): + size_str = f"(size:{len(o)})" + return f"{str(type(o))}{size_str}" + + +def _compute_top_gc_collected_objects(objects: list[Any], top: int) -> str: + """ + Group collected objects by types. + """ + if top <= 0: + return "" + object_types = [_compute_detailed_type(o) for o in objects] + return "\n".join( + f"{count:>5}:{object_type}" + for object_type, count in Counter(object_types).most_common(top)) diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index a43042a5510a8..3ee804f10c172 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -29,6 +29,7 @@ from vllm.transformers_utils.config import ( maybe_register_config_serialize_by_value) from vllm.utils import (decorate_logs, get_hash_fn_by_name, make_zmq_socket, resolve_obj_by_qualname, set_process_title) +from vllm.utils.gc_utils import maybe_attach_gc_debug_callback from vllm.v1.core.kv_cache_utils import (BlockHash, generate_scheduler_kv_cache_config, get_kv_cache_configs, @@ -532,6 +533,9 @@ class EngineCoreProc(EngineCore): gc.collect() gc.freeze() + # If enable, attach GC debugger after static variable freeze. + maybe_attach_gc_debug_callback() + @contextmanager def _perform_handshakes( self,