diff --git a/tests/v1/worker/test_gpu_block_table.py b/tests/v1/worker/test_gpu_block_table.py new file mode 100644 index 0000000000000..bb29ea7b1d2ba --- /dev/null +++ b/tests/v1/worker/test_gpu_block_table.py @@ -0,0 +1,52 @@ +import pytest +import random +import time + +import torch + +from vllm.v1.worker.gpu_block_table import BlockTable + +MAX_NUM_REQS = 1024 +MAX_MODEL_LEN = 128 * 1024 +BLOCK_SIZE = 16 +MAX_NUM_BLOCKS_PER_REQ = MAX_MODEL_LEN // BLOCK_SIZE + + +def test_block_table(do_wait: bool): + random.seed(0) + torch.manual_seed(0) + torch.cuda.manual_seed_all(0) + + block_table = BlockTable( + max_num_reqs=MAX_NUM_REQS, + max_model_len=MAX_MODEL_LEN, + max_num_blocks_per_req=MAX_NUM_BLOCKS_PER_REQ, + pin_memory=True, + device=torch.device(0), + ) + + num_blocks = random.randint(1, MAX_NUM_BLOCKS_PER_REQ - 1) + block_ids = torch.randint(0, MAX_NUM_BLOCKS_PER_REQ, (num_blocks,), dtype=torch.int32, device="cpu") + block_table.add_row(0, block_ids) + num_blocks = random.randint(1, MAX_NUM_BLOCKS_PER_REQ - 100) + block_ids = torch.randint(0, MAX_NUM_BLOCKS_PER_REQ, (num_blocks,), dtype=torch.int32, device="cpu") + block_table.add_row(1, block_ids) + block_table.commit(2) + + torch.cuda.synchronize() + if do_wait: + time.sleep(1) + + block_ids = torch.randint(0, MAX_NUM_BLOCKS_PER_REQ, (100,), dtype=torch.int32, device="cpu") + block_table.append_row(1, num_blocks, block_ids) + block_table.move_row(1, 0) + block_table.commit(2) + + torch.cuda.synchronize() + if do_wait: + time.sleep(1) + + torch.testing.assert_close(block_table.block_table[:1].cpu(), block_table.block_table_cpu[:1]) + +if __name__ == "__main__": + test_block_table(do_wait=False)