From b9968373c3d95db7abc58f2c71b8b136373a4779 Mon Sep 17 00:00:00 2001 From: Kelvin Velasquez Date: Tue, 23 Dec 2025 10:30:20 -0600 Subject: [PATCH] fix(core): break circular reference in Request using weakref to prevent memory leak Signed-off-by: Kelvin Velasquez --- vllm/v1/request.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/v1/request.py b/vllm/v1/request.py index f33059b80b894..91f97cb828a94 100644 --- a/vllm/v1/request.py +++ b/vllm/v1/request.py @@ -3,6 +3,7 @@ import enum import time +import weakref from collections.abc import Callable, Mapping from functools import partial from typing import TYPE_CHECKING, Any, Optional @@ -132,7 +133,9 @@ class Request: self.block_hashes: list[BlockHash] = [] self.get_hash_new_full_blocks: Callable[[], list[BlockHash]] | None = None if block_hasher is not None: - self.get_hash_new_full_blocks = partial(block_hasher, self) + # Use weakref to avoid circular reference: Request -> partial -> Request + # This allows immediate reclamation by refcounting without waiting for GC. + self.get_hash_new_full_blocks = partial(block_hasher, weakref.proxy(self)) self.block_hashes = self.get_hash_new_full_blocks() self.skip_reading_prefix_cache = self.get_skip_reading_prefix_cache()