Fix eager mode performance (#2377)

This commit is contained in:
Woosuk Kwon 2024-01-08 10:11:06 -08:00 committed by GitHub
parent 05921a9a7a
commit c884819135
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -235,9 +235,11 @@ class ModelRunner:
input_block_tables[i, :len(block_table)] = block_table
block_tables = torch.tensor(input_block_tables, device="cuda")
else:
max_block_table_len = (max_context_len + self.block_size -
1) // self.block_size
block_tables = _make_tensor_with_pad(
block_tables,
max_len=max_context_len,
max_len=max_block_table_len,
pad=0,
dtype=torch.int,
device="cuda",