From bef68163a0797f580abc889cd8cca81905145080 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Thu, 26 Dec 2024 10:48:29 -0800 Subject: [PATCH] Minor Signed-off-by: Woosuk Kwon --- vllm/utils.py | 9 +++++++++ vllm/v1/worker/gpu_block_table.py | 7 +++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/vllm/utils.py b/vllm/utils.py index b9f6889f14112..6e7e7be153531 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -677,6 +677,14 @@ def is_pin_memory_available() -> bool: return current_platform.is_pin_memory_available() +@lru_cache(maxsize=None) +def is_uva_available() -> bool: + """Check if Unified Virtual Addressing (UVA) is available.""" + # UVA requires pinned memory. + # TODO(woosuk): Add more requirements for UVA. + return is_pin_memory_available() + + class DeviceMemoryProfiler: def __init__(self, device: Optional[torch.types.Device] = None): @@ -1529,6 +1537,7 @@ def get_cuda_view_from_cpu_tensor(cpu_tensor: torch.Tensor) -> torch.Tensor: """ Get a CUDA view of a CPU tensor using Unified Virtual Addressing (UVA). """ + assert cpu_tensor.is_pinned(), "CPU tensor must be pinned" return torch.ops._C.get_cuda_view_from_cpu_tensor(cpu_tensor) diff --git a/vllm/v1/worker/gpu_block_table.py b/vllm/v1/worker/gpu_block_table.py index 25a927c371566..f44d62f91b4cb 100644 --- a/vllm/v1/worker/gpu_block_table.py +++ b/vllm/v1/worker/gpu_block_table.py @@ -4,7 +4,7 @@ import numpy as np import torch from vllm import _custom_ops as ops -from vllm.utils import get_cuda_view_from_cpu_tensor +from vllm.utils import get_cuda_view_from_cpu_tensor, is_uva_available class BlockTable: @@ -36,10 +36,9 @@ class BlockTable: ) self.block_table_np = self.block_table_cpu.numpy() - # Pinned memory is required to use UVA. - # TODO(woosuk): Add other requirements for UVA. - self.use_uva = pin_memory + self.use_uva = is_uva_available() if self.use_uva: + # Pinned memory is required to use UVA. self.block_table_diff = torch.zeros((max_num_reqs, 2), dtype=torch.int32, device="cpu",