mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-12 05:00:07 +08:00
Merge 8756b5ed1568cf3676d7be5d39d022c11a36fbad into 254f6b986720c92ddf97fbb1a6a6465da8e87e29
This commit is contained in:
commit
143590cdc6
@ -122,7 +122,11 @@ class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]):
|
|||||||
).unsqueeze(0) < seq_lens_device.unsqueeze(1)
|
).unsqueeze(0) < seq_lens_device.unsqueeze(1)
|
||||||
paged_kv_indices = block_table_tensor[mask]
|
paged_kv_indices = block_table_tensor[mask]
|
||||||
|
|
||||||
paged_kv_last_page_len = torch.where(seq_lens_device == 0, 1, seq_lens_device)
|
# kernel block size is always 1, so each page has exactly 1 token.
|
||||||
|
# last_page_len should always be 1 regardless of sequence length.
|
||||||
|
paged_kv_last_page_len = torch.ones(
|
||||||
|
num_reqs, dtype=seq_lens_device.dtype, device=device
|
||||||
|
)
|
||||||
|
|
||||||
paged_kv_indptr = torch.cat(
|
paged_kv_indptr = torch.cat(
|
||||||
[
|
[
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user