import random import pytest import torch from vllm import _custom_ops as ops from vllm.platforms import current_platform SEEDS = [0] CUDA_DEVICES = [ f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2) ] @pytest.mark.parametrize("seed", SEEDS) @pytest.mark.parametrize("device", CUDA_DEVICES) def test_copy_subranges(seed, device): torch.set_default_device(device) current_platform.seed_everything(seed) num_rows = 1024 num_cols = 1024 src_matrix = torch.zeros(num_rows, num_cols, device=device, dtype=torch.int32) dst_matrix = torch.zeros(num_rows, num_cols, device=device, dtype=torch.int32) diff_matrix = torch.zeros(num_rows, 2, device=device, dtype=torch.int32) for i in range(num_rows): start_idx = random.randint(0, num_cols - 1) end_idx = random.randint(start_idx, num_cols - 1) num_diffs = end_idx - start_idx src_matrix[i, start_idx:end_idx] = torch.randint(0, 100, (num_diffs, ), device=device, dtype=torch.int32) diff_matrix[i, 0] = start_idx diff_matrix[i, 1] = num_diffs ops.copy_subranges(src_matrix, diff_matrix, dst_matrix, num_rows) assert torch.allclose(src_matrix, dst_matrix, rtol=0, atol=0)