mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-30 16:37:04 +08:00
Minor
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
ff5b1033dc
commit
bef68163a0
@ -677,6 +677,14 @@ def is_pin_memory_available() -> bool:
|
||||
return current_platform.is_pin_memory_available()
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def is_uva_available() -> bool:
|
||||
"""Check if Unified Virtual Addressing (UVA) is available."""
|
||||
# UVA requires pinned memory.
|
||||
# TODO(woosuk): Add more requirements for UVA.
|
||||
return is_pin_memory_available()
|
||||
|
||||
|
||||
class DeviceMemoryProfiler:
|
||||
|
||||
def __init__(self, device: Optional[torch.types.Device] = None):
|
||||
@ -1529,6 +1537,7 @@ def get_cuda_view_from_cpu_tensor(cpu_tensor: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Get a CUDA view of a CPU tensor using Unified Virtual Addressing (UVA).
|
||||
"""
|
||||
assert cpu_tensor.is_pinned(), "CPU tensor must be pinned"
|
||||
return torch.ops._C.get_cuda_view_from_cpu_tensor(cpu_tensor)
|
||||
|
||||
|
||||
|
||||
@ -4,7 +4,7 @@ import numpy as np
|
||||
import torch
|
||||
|
||||
from vllm import _custom_ops as ops
|
||||
from vllm.utils import get_cuda_view_from_cpu_tensor
|
||||
from vllm.utils import get_cuda_view_from_cpu_tensor, is_uva_available
|
||||
|
||||
|
||||
class BlockTable:
|
||||
@ -36,10 +36,9 @@ class BlockTable:
|
||||
)
|
||||
self.block_table_np = self.block_table_cpu.numpy()
|
||||
|
||||
# Pinned memory is required to use UVA.
|
||||
# TODO(woosuk): Add other requirements for UVA.
|
||||
self.use_uva = pin_memory
|
||||
self.use_uva = is_uva_available()
|
||||
if self.use_uva:
|
||||
# Pinned memory is required to use UVA.
|
||||
self.block_table_diff = torch.zeros((max_num_reqs, 2),
|
||||
dtype=torch.int32,
|
||||
device="cpu",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user