mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 07:15:01 +08:00
Fix eager mode performance (#2377)
This commit is contained in:
parent
05921a9a7a
commit
c884819135
@ -235,9 +235,11 @@ class ModelRunner:
|
||||
input_block_tables[i, :len(block_table)] = block_table
|
||||
block_tables = torch.tensor(input_block_tables, device="cuda")
|
||||
else:
|
||||
max_block_table_len = (max_context_len + self.block_size -
|
||||
1) // self.block_size
|
||||
block_tables = _make_tensor_with_pad(
|
||||
block_tables,
|
||||
max_len=max_context_len,
|
||||
max_len=max_block_table_len,
|
||||
pad=0,
|
||||
dtype=torch.int,
|
||||
device="cuda",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user