From 85aff45e24de7af96d30baa1d7d0fc7aec43c28a Mon Sep 17 00:00:00 2001 From: Benjamin Chislett Date: Mon, 22 Dec 2025 17:25:22 -0500 Subject: [PATCH] [Perf] Remove blocking copy in GDN Attention (#31167) Signed-off-by: Benjamin Chislett --- vllm/v1/attention/backends/gdn_attn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/attention/backends/gdn_attn.py b/vllm/v1/attention/backends/gdn_attn.py index ace2cbb0564c8..fcde986f48d46 100644 --- a/vllm/v1/attention/backends/gdn_attn.py +++ b/vllm/v1/attention/backends/gdn_attn.py @@ -143,7 +143,7 @@ class GDNAttentionMetadataBuilder(AttentionMetadataBuilder[GDNAttentionMetadata] query_start_loc = m.query_start_loc context_lens = m.num_computed_tokens_cpu - context_lens_tensor = context_lens.to(query_start_loc.device) + context_lens_tensor = context_lens.to(query_start_loc.device, non_blocking=True) nums_dict, batch_ptr, token_chunk_offset_ptr = None, None, None if (