[Perf] Remove blocking copy in GDN Attention (#31167)

Signed-off-by: Benjamin Chislett <bchislett@nvidia.com>
This commit is contained in:
Benjamin Chislett 2025-12-22 17:25:22 -05:00 committed by GitHub
parent 5312a7284e
commit 85aff45e24
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -143,7 +143,7 @@ class GDNAttentionMetadataBuilder(AttentionMetadataBuilder[GDNAttentionMetadata]
query_start_loc = m.query_start_loc
context_lens = m.num_computed_tokens_cpu
context_lens_tensor = context_lens.to(query_start_loc.device)
context_lens_tensor = context_lens.to(query_start_loc.device, non_blocking=True)
nums_dict, batch_ptr, token_chunk_offset_ptr = None, None, None
if (