mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-31 18:07:05 +08:00
Minor
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
34d6cc2aea
commit
6ba31aa5f6
@ -55,7 +55,6 @@ def test_gpu_write(device):
|
||||
cuda_view[4, 5] = -1
|
||||
cuda_view.mul_(2)
|
||||
|
||||
torch.cuda.synchronize()
|
||||
assert cpu_tensor[0, 0] == 2
|
||||
assert cpu_tensor[2, 3] == 4
|
||||
assert cpu_tensor[4, 5] == -2
|
||||
|
||||
@ -5,6 +5,9 @@ import torch
|
||||
|
||||
from vllm import _custom_ops as ops
|
||||
from vllm.utils import get_cuda_view_from_cpu_tensor, is_uva_available
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
class BlockTable:
|
||||
@ -39,6 +42,8 @@ class BlockTable:
|
||||
# UVA requires pinned memory.
|
||||
self.use_uva = is_uva_available() and pin_memory
|
||||
if self.use_uva:
|
||||
logger.info("Using Unified Virtual Addressing (UVA) for block "
|
||||
"table transfer.")
|
||||
self.block_table_diff = torch.zeros((max_num_reqs, 2),
|
||||
dtype=torch.int32,
|
||||
device="cpu",
|
||||
@ -49,6 +54,10 @@ class BlockTable:
|
||||
self.block_table_cpu)
|
||||
self.block_table_diff_cuda_view = get_cuda_view_from_cpu_tensor(
|
||||
self.block_table_diff)
|
||||
else:
|
||||
logger.warning("Unified Virtual Addressing (UVA) is not supported "
|
||||
"in the current environment. This may result in "
|
||||
"lower performance.")
|
||||
|
||||
def add_row(self, row_idx: int, block_ids: List[int]) -> None:
|
||||
num_blocks = len(block_ids)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user