From 71c4b40562eb308d5fd93091373e4f913463a9eb Mon Sep 17 00:00:00 2001
From: youkaichao <youkaichao@gmail.com>
Date: Tue, 4 Mar 2025 18:54:19 +0800
Subject: [PATCH] [sleep mode] error out with expandable_segments (#14189)

Signed-off-by: youkaichao <youkaichao@gmail.com>
---
 vllm/device_allocator/cumem.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/vllm/device_allocator/cumem.py b/vllm/device_allocator/cumem.py
index 7f63fc1437872..0291fd9e1c88f 100644
--- a/vllm/device_allocator/cumem.py
+++ b/vllm/device_allocator/cumem.py
@@ -8,6 +8,7 @@
 # not sure why, they are created from a different context.
 # the only successful approach is to call cuda driver API in C.
 import dataclasses
+import os
 from contextlib import contextmanager
 from typing import Any, Callable, Dict, Optional, Tuple, Union
 
@@ -140,6 +141,12 @@ class CuMemAllocator:
         return CuMemAllocator.instance
 
     def __init__(self):
+        conf = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "")
+        assert "expandable_segments:True" not in conf, \
+            ("Expandable segments are not compatible with memory pool. "
+            "Please track https://github.com/pytorch/pytorch/issues/147851 "
+            "for the latest updates.")
+
         self.pointer_to_data: Dict[int, AllocationData] = {}
         self.current_tag: str = CuMemAllocator.default_tag
         self.allocator_and_pools: Dict[str, Any] = {}