Minor

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2026-07-01 07:17:11 +08:00 · 2024-12-26 19:03:59 -08:00 · 2024-12-26 19:03:59 -08:00 · 6ba31aa5f6
commit 6ba31aa5f6
parent 34d6cc2aea
2 changed files with 9 additions and 1 deletions
--- a/tests/kernels/test_uva.py
+++ b/tests/kernels/test_uva.py
@ -55,7 +55,6 @@ def test_gpu_write(device):
    cuda_view[4, 5] = -1
    cuda_view.mul_(2)

-    torch.cuda.synchronize()
    assert cpu_tensor[0, 0] == 2
    assert cpu_tensor[2, 3] == 4
    assert cpu_tensor[4, 5] == -2
--- a/vllm/v1/worker/gpu_block_table.py
+++ b/vllm/v1/worker/gpu_block_table.py
@ -5,6 +5,9 @@ import torch

 from vllm import _custom_ops as ops
 from vllm.utils import get_cuda_view_from_cpu_tensor, is_uva_available
+from vllm.logger import init_logger
+
+logger = init_logger(__name__)


 class BlockTable:
@ -39,6 +42,8 @@ class BlockTable:
        # UVA requires pinned memory.
        self.use_uva = is_uva_available() and pin_memory
        if self.use_uva:
+            logger.info("Using Unified Virtual Addressing (UVA) for block "
+                        "table transfer.")
            self.block_table_diff = torch.zeros((max_num_reqs, 2),
                                                dtype=torch.int32,
                                                device="cpu",
@ -49,6 +54,10 @@ class BlockTable:
                self.block_table_cpu)
            self.block_table_diff_cuda_view = get_cuda_view_from_cpu_tensor(
                self.block_table_diff)
+        else:
+            logger.warning("Unified Virtual Addressing (UVA) is not supported "
+                           "in the current environment. This may result in "
+                           "lower performance.")

    def add_row(self, row_idx: int, block_ids: List[int]) -> None:
        num_blocks = len(block_ids)