mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-26 16:09:42 +08:00
[Perf] Remove blocking copy in GDN Attention (#31167)
Signed-off-by: Benjamin Chislett <bchislett@nvidia.com>
This commit is contained in:
parent
5312a7284e
commit
85aff45e24
@ -143,7 +143,7 @@ class GDNAttentionMetadataBuilder(AttentionMetadataBuilder[GDNAttentionMetadata]
|
||||
|
||||
query_start_loc = m.query_start_loc
|
||||
context_lens = m.num_computed_tokens_cpu
|
||||
context_lens_tensor = context_lens.to(query_start_loc.device)
|
||||
context_lens_tensor = context_lens.to(query_start_loc.device, non_blocking=True)
|
||||
nums_dict, batch_ptr, token_chunk_offset_ptr = None, None, None
|
||||
|
||||
if (
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user