From 71c4b40562eb308d5fd93091373e4f913463a9eb Mon Sep 17 00:00:00 2001 From: youkaichao Date: Tue, 4 Mar 2025 18:54:19 +0800 Subject: [PATCH] [sleep mode] error out with expandable_segments (#14189) Signed-off-by: youkaichao --- vllm/device_allocator/cumem.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vllm/device_allocator/cumem.py b/vllm/device_allocator/cumem.py index 7f63fc1437872..0291fd9e1c88f 100644 --- a/vllm/device_allocator/cumem.py +++ b/vllm/device_allocator/cumem.py @@ -8,6 +8,7 @@ # not sure why, they are created from a different context. # the only successful approach is to call cuda driver API in C. import dataclasses +import os from contextlib import contextmanager from typing import Any, Callable, Dict, Optional, Tuple, Union @@ -140,6 +141,12 @@ class CuMemAllocator: return CuMemAllocator.instance def __init__(self): + conf = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "") + assert "expandable_segments:True" not in conf, \ + ("Expandable segments are not compatible with memory pool. " + "Please track https://github.com/pytorch/pytorch/issues/147851 " + "for the latest updates.") + self.pointer_to_data: Dict[int, AllocationData] = {} self.current_tag: str = CuMemAllocator.default_tag self.allocator_and_pools: Dict[str, Any] = {}