[sleep mode] error out with expandable_segments (#14189)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2026-01-06 03:01:49 +08:00 · 2025-03-04 18:54:19 +08:00 · 2025-03-04 18:54:19 +08:00 · 71c4b40562
commit 71c4b40562
parent ac65bc92df
1 changed files with 7 additions and 0 deletions
--- a/vllm/device_allocator/cumem.py
+++ b/vllm/device_allocator/cumem.py
@ -8,6 +8,7 @@
 # not sure why, they are created from a different context.
 # the only successful approach is to call cuda driver API in C.
 import dataclasses
+import os
 from contextlib import contextmanager
 from typing import Any, Callable, Dict, Optional, Tuple, Union

@ -140,6 +141,12 @@ class CuMemAllocator:
        return CuMemAllocator.instance

    def __init__(self):
+        conf = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "")
+        assert "expandable_segments:True" not in conf, \
+            ("Expandable segments are not compatible with memory pool. "
+            "Please track https://github.com/pytorch/pytorch/issues/147851 "
+            "for the latest updates.")
+
        self.pointer_to_data: Dict[int, AllocationData] = {}
        self.current_tag: str = CuMemAllocator.default_tag
        self.allocator_and_pools: Dict[str, Any] = {}