mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-28 01:25:18 +08:00
[Core] GC Debug callback (#24829)
Signed-off-by: Jialin Ouyang <jialino@meta.com> Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com> Co-authored-by: Jialin Ouyang <jialino@meta.com> Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
942fba3823
commit
04384cb9da
@ -208,3 +208,11 @@ One example is [snakeviz](https://jiffyclub.github.io/snakeviz/).
|
||||
pip install snakeviz
|
||||
snakeviz expensive_function.prof
|
||||
```
|
||||
|
||||
### Analyzing Garbage Collection Costs
|
||||
|
||||
Leverage VLLM_GC_DEBUG environment variable to debug GC costs.
|
||||
|
||||
- VLLM_GC_DEBUG=1: enable GC debugger with gc.collect elpased times
|
||||
- VLLM_GC_DEBUG='{"top_objects":5}': enable GC debugger to log top 5
|
||||
collected objects for each gc.collect
|
||||
|
||||
69
tests/utils_/test_gc_utils.py
Normal file
69
tests/utils_/test_gc_utils.py
Normal file
@ -0,0 +1,69 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from vllm.utils.gc_utils import (GCDebugConfig, _compute_detailed_type,
|
||||
_compute_top_gc_collected_objects)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Normal:
|
||||
v: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class ListWrapper:
|
||||
vs: list[int]
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.vs)
|
||||
|
||||
|
||||
def test_compute_detailed_type():
|
||||
assert _compute_detailed_type(
|
||||
Normal(v=8)) == "<class 'tests.utils_.test_gc_utils.Normal'>"
|
||||
|
||||
assert _compute_detailed_type([1, 2, 3]) == "<class 'list'>(size:3)"
|
||||
assert _compute_detailed_type({4, 5}) == "<class 'set'>(size:2)"
|
||||
assert _compute_detailed_type({6: 7}) == "<class 'dict'>(size:1)"
|
||||
assert _compute_detailed_type(ListWrapper(
|
||||
vs=[])) == "<class 'tests.utils_.test_gc_utils.ListWrapper'>(size:0)"
|
||||
|
||||
|
||||
def test_compute_top_gc_collected_objects():
|
||||
objects: list[Any] = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12],
|
||||
{13, 14}, {
|
||||
15: 16,
|
||||
17: 18
|
||||
},
|
||||
Normal(v=19),
|
||||
Normal(v=20),
|
||||
Normal(v=21)]
|
||||
assert _compute_top_gc_collected_objects(objects, top=-1) == ""
|
||||
assert _compute_top_gc_collected_objects(objects, top=0) == ""
|
||||
assert _compute_top_gc_collected_objects(
|
||||
objects, top=1) == " 4:<class 'list'>(size:3)"
|
||||
assert _compute_top_gc_collected_objects(objects, top=2) == "\n".join([
|
||||
" 4:<class 'list'>(size:3)",
|
||||
" 3:<class 'tests.utils_.test_gc_utils.Normal'>"
|
||||
])
|
||||
assert _compute_top_gc_collected_objects(objects, top=3) == "\n".join([
|
||||
" 4:<class 'list'>(size:3)",
|
||||
" 3:<class 'tests.utils_.test_gc_utils.Normal'>",
|
||||
" 1:<class 'set'>(size:2)"
|
||||
])
|
||||
|
||||
|
||||
def test_gc_debug_config():
|
||||
assert not GCDebugConfig(None).enabled
|
||||
assert not GCDebugConfig("").enabled
|
||||
assert not GCDebugConfig("0").enabled
|
||||
|
||||
config = GCDebugConfig("1")
|
||||
assert config.enabled
|
||||
assert config.top_objects == -1
|
||||
|
||||
config = GCDebugConfig("{\"top_objects\":5}")
|
||||
assert config.enabled
|
||||
assert config.top_objects == 5
|
||||
@ -205,6 +205,7 @@ if TYPE_CHECKING:
|
||||
VLLM_USE_NCCL_SYMM_MEM: bool = False
|
||||
VLLM_NCCL_INCLUDE_PATH: Optional[str] = None
|
||||
VLLM_USE_FBGEMM: bool = False
|
||||
VLLM_GC_DEBUG: str = ""
|
||||
|
||||
|
||||
def get_default_cache_root():
|
||||
@ -1475,6 +1476,13 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
lambda: os.environ.get("VLLM_NCCL_INCLUDE_PATH", None),
|
||||
# Flag to enable FBGemm kernels on model execution
|
||||
"VLLM_USE_FBGEMM": lambda: bool(int(os.getenv("VLLM_USE_FBGEMM", "0"))),
|
||||
|
||||
# GC debug config
|
||||
# - VLLM_GC_DEBUG=0: disable GC debugger
|
||||
# - VLLM_GC_DEBUG=1: enable GC debugger with gc.collect elpased times
|
||||
# - VLLM_GC_DEBUG='{"top_objects":5}': enable GC debugger with
|
||||
# top 5 collected objects
|
||||
"VLLM_GC_DEBUG": lambda: os.getenv("VLLM_GC_DEBUG", ""),
|
||||
}
|
||||
|
||||
# --8<-- [end:env-vars-definition]
|
||||
|
||||
128
vllm/utils/gc_utils.py
Normal file
128
vllm/utils/gc_utils.py
Normal file
@ -0,0 +1,128 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import gc
|
||||
import json
|
||||
import time
|
||||
from collections import Counter
|
||||
from contextlib import suppress
|
||||
from typing import Any, Optional
|
||||
|
||||
from vllm.envs import VLLM_GC_DEBUG
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
class GCDebugConfig:
|
||||
"""
|
||||
Config for GC Debugger.
|
||||
- 0: disable GC debugger
|
||||
- 1: enable GC debugger with gc.collect elpased times
|
||||
- '{"top_objects":5}': enable GC debugger with top 5 collected objects
|
||||
"""
|
||||
|
||||
def __init__(self, gc_debug_conf: Optional[str] = None) -> None:
|
||||
self.enabled: bool = False
|
||||
self.top_objects: int = -1
|
||||
|
||||
if not gc_debug_conf or gc_debug_conf == "0":
|
||||
pass
|
||||
elif gc_debug_conf == "1":
|
||||
self.enabled = True
|
||||
else:
|
||||
try:
|
||||
json_conf = json.loads(gc_debug_conf)
|
||||
self.enabled = True
|
||||
self.top_objects = json_conf.get("top_objects", -1)
|
||||
except Exception:
|
||||
self.enabled = False
|
||||
logger.error("Failed to parse VLLM_GC_DEBUG(%s)",
|
||||
VLLM_GC_DEBUG)
|
||||
logger.info("GC Debug Config. %s", str(self))
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"enabled:{self.enabled},top_objects:{self.top_objects}"
|
||||
|
||||
|
||||
class GCDebugger:
|
||||
"""
|
||||
Debugger for GC which logs helpful information for GC understanding.
|
||||
To enable, you should call maybe_attach_gc_debug_callback in the process.
|
||||
"""
|
||||
|
||||
def __init__(self, config: GCDebugConfig) -> None:
|
||||
self.config = config
|
||||
# Start time in micro second of this GC cycle
|
||||
self.start_time_ns: int = time.monotonic_ns()
|
||||
# If config.top_objects is positive,
|
||||
# compute top collected objects by object types
|
||||
self.gc_top_collected_objects: str = ""
|
||||
|
||||
def handle(self, phase: str, info: dict[str, int]) -> None:
|
||||
"""
|
||||
Handles a GC event (e.g. GC start or GC finish)
|
||||
"""
|
||||
generation = info.get("generation")
|
||||
if generation is None:
|
||||
return
|
||||
if phase == "start":
|
||||
# Before GC started, record GC start time
|
||||
# and top collected objects
|
||||
self.start_time_ns = time.monotonic_ns()
|
||||
self.gc_top_collected_objects = _compute_top_gc_collected_objects(
|
||||
gc.get_objects(generation), self.config.top_objects)
|
||||
elif phase == "stop":
|
||||
# After GC finished, Record GC elapsed time and
|
||||
# optionally top collected objects
|
||||
elpased_ms = (time.monotonic_ns() - self.start_time_ns) / 1e6
|
||||
logger.info(
|
||||
"GC took %.3fms to complete. "
|
||||
"Collected %s objects in GC generation %d.%s",
|
||||
elpased_ms,
|
||||
str(info.get("collected", "?")),
|
||||
generation,
|
||||
(f" Top collected objects: \n{self.gc_top_collected_objects}"
|
||||
if self.gc_top_collected_objects else ""),
|
||||
)
|
||||
|
||||
|
||||
def maybe_attach_gc_debug_callback() -> None:
|
||||
"""
|
||||
Attached a callback for GC debug when VLLM_GC_DEBUG is enabled.
|
||||
"""
|
||||
config = GCDebugConfig(VLLM_GC_DEBUG)
|
||||
if config.enabled:
|
||||
debugger: GCDebugger = GCDebugger(config)
|
||||
|
||||
def gc_callback(phase: str, info: dict[str, int]) -> None:
|
||||
debugger.handle(phase, info)
|
||||
|
||||
gc.callbacks.append(gc_callback)
|
||||
|
||||
|
||||
def _compute_detailed_type(o: Any) -> str:
|
||||
"""
|
||||
Detailed object type.
|
||||
|
||||
TODO(Jialin): Further enhance the detailed type with element types for
|
||||
easier debugging. We tried but occasionally it would run into signals
|
||||
which kills the engine.
|
||||
"""
|
||||
size_str: str = ""
|
||||
# Object doesn't support len() - this can happen with type objects
|
||||
# or other objects that don't implement __len__ properly
|
||||
with suppress(Exception):
|
||||
size_str = f"(size:{len(o)})"
|
||||
return f"{str(type(o))}{size_str}"
|
||||
|
||||
|
||||
def _compute_top_gc_collected_objects(objects: list[Any], top: int) -> str:
|
||||
"""
|
||||
Group collected objects by types.
|
||||
"""
|
||||
if top <= 0:
|
||||
return ""
|
||||
object_types = [_compute_detailed_type(o) for o in objects]
|
||||
return "\n".join(
|
||||
f"{count:>5}:{object_type}"
|
||||
for object_type, count in Counter(object_types).most_common(top))
|
||||
@ -29,6 +29,7 @@ from vllm.transformers_utils.config import (
|
||||
maybe_register_config_serialize_by_value)
|
||||
from vllm.utils import (decorate_logs, get_hash_fn_by_name, make_zmq_socket,
|
||||
resolve_obj_by_qualname, set_process_title)
|
||||
from vllm.utils.gc_utils import maybe_attach_gc_debug_callback
|
||||
from vllm.v1.core.kv_cache_utils import (BlockHash,
|
||||
generate_scheduler_kv_cache_config,
|
||||
get_kv_cache_configs,
|
||||
@ -532,6 +533,9 @@ class EngineCoreProc(EngineCore):
|
||||
gc.collect()
|
||||
gc.freeze()
|
||||
|
||||
# If enable, attach GC debugger after static variable freeze.
|
||||
maybe_attach_gc_debug_callback()
|
||||
|
||||
@contextmanager
|
||||
def _perform_handshakes(
|
||||
self,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user