diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index d61177d4245dd..f83a4f4faeb5e 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -292,7 +292,6 @@ class Worker(WorkerBase): allocator = CuMemAllocator.get_instance() context = allocator.use_memory_pool(tag="kv_cache") else: - from contextlib import nullcontext context = nullcontext() with context: self.model_runner.initialize_kv_cache(kv_cache_config) diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py index 7a01e585ba6d0..fc24d95b80f2c 100644 --- a/vllm/worker/worker.py +++ b/vllm/worker/worker.py @@ -3,6 +3,7 @@ """A GPU worker class.""" import gc import os +from contextlib import nullcontext from typing import Dict, List, Optional, Set, Tuple, Type, Union import torch @@ -206,7 +207,6 @@ class Worker(LocalOrDistributedWorkerBase): "used for one instance per process.") context = allocator.use_memory_pool(tag="weights") else: - from contextlib import nullcontext context = nullcontext() with context: self.model_runner.load_model() @@ -330,7 +330,6 @@ class Worker(LocalOrDistributedWorkerBase): allocator = CuMemAllocator.get_instance() context = allocator.use_memory_pool(tag="kv_cache") else: - from contextlib import nullcontext context = nullcontext() with context: self._init_cache_engine()