import pytest import random import time import torch from vllm.v1.worker.gpu_block_table import BlockTable MAX_NUM_REQS = 1024 MAX_MODEL_LEN = 128 * 1024 BLOCK_SIZE = 16 MAX_NUM_BLOCKS_PER_REQ = MAX_MODEL_LEN // BLOCK_SIZE def test_block_table(do_wait: bool): random.seed(0) torch.manual_seed(0) torch.cuda.manual_seed_all(0) block_table = BlockTable( max_num_reqs=MAX_NUM_REQS, max_model_len=MAX_MODEL_LEN, max_num_blocks_per_req=MAX_NUM_BLOCKS_PER_REQ, pin_memory=True, device=torch.device(0), ) num_blocks = random.randint(1, MAX_NUM_BLOCKS_PER_REQ - 1) block_ids = torch.randint(0, MAX_NUM_BLOCKS_PER_REQ, (num_blocks,), dtype=torch.int32, device="cpu") block_table.add_row(0, block_ids) num_blocks = random.randint(1, MAX_NUM_BLOCKS_PER_REQ - 100) block_ids = torch.randint(0, MAX_NUM_BLOCKS_PER_REQ, (num_blocks,), dtype=torch.int32, device="cpu") block_table.add_row(1, block_ids) block_table.commit(2) torch.cuda.synchronize() if do_wait: time.sleep(1) block_ids = torch.randint(0, MAX_NUM_BLOCKS_PER_REQ, (100,), dtype=torch.int32, device="cpu") block_table.append_row(1, num_blocks, block_ids) block_table.move_row(1, 0) block_table.commit(2) torch.cuda.synchronize() if do_wait: time.sleep(1) torch.testing.assert_close(block_table.block_table[:1].cpu(), block_table.block_table_cpu[:1]) if __name__ == "__main__": test_block_table(do_wait=False)