diff --git a/vllm/v1/attention/backends/gdn_attn.py b/vllm/v1/attention/backends/gdn_attn.py index ace2cbb0564c8..fcde986f48d46 100644 --- a/vllm/v1/attention/backends/gdn_attn.py +++ b/vllm/v1/attention/backends/gdn_attn.py @@ -143,7 +143,7 @@ class GDNAttentionMetadataBuilder(AttentionMetadataBuilder[GDNAttentionMetadata] query_start_loc = m.query_start_loc context_lens = m.num_computed_tokens_cpu - context_lens_tensor = context_lens.to(query_start_loc.device) + context_lens_tensor = context_lens.to(query_start_loc.device, non_blocking=True) nums_dict, batch_ptr, token_chunk_offset_ptr = None, None, None if (