Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon 2024-12-26 10:48:29 -08:00
parent ff5b1033dc
commit bef68163a0
2 changed files with 12 additions and 4 deletions

View File

@ -677,6 +677,14 @@ def is_pin_memory_available() -> bool:
return current_platform.is_pin_memory_available()
@lru_cache(maxsize=None)
def is_uva_available() -> bool:
"""Check if Unified Virtual Addressing (UVA) is available."""
# UVA requires pinned memory.
# TODO(woosuk): Add more requirements for UVA.
return is_pin_memory_available()
class DeviceMemoryProfiler:
def __init__(self, device: Optional[torch.types.Device] = None):
@ -1529,6 +1537,7 @@ def get_cuda_view_from_cpu_tensor(cpu_tensor: torch.Tensor) -> torch.Tensor:
"""
Get a CUDA view of a CPU tensor using Unified Virtual Addressing (UVA).
"""
assert cpu_tensor.is_pinned(), "CPU tensor must be pinned"
return torch.ops._C.get_cuda_view_from_cpu_tensor(cpu_tensor)

View File

@ -4,7 +4,7 @@ import numpy as np
import torch
from vllm import _custom_ops as ops
from vllm.utils import get_cuda_view_from_cpu_tensor
from vllm.utils import get_cuda_view_from_cpu_tensor, is_uva_available
class BlockTable:
@ -36,10 +36,9 @@ class BlockTable:
)
self.block_table_np = self.block_table_cpu.numpy()
# Pinned memory is required to use UVA.
# TODO(woosuk): Add other requirements for UVA.
self.use_uva = pin_memory
self.use_uva = is_uva_available()
if self.use_uva:
# Pinned memory is required to use UVA.
self.block_table_diff = torch.zeros((max_num_reqs, 2),
dtype=torch.int32,
device="cpu",