[Minor] Remove unused code in attention (#2384)

This commit is contained in:
Woosuk Kwon 2024-01-08 13:13:08 -08:00 committed by GitHub
parent c884819135
commit 28c3f12104
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -156,20 +156,15 @@ class PagedAttention(nn.Module):
output = out.view_as(query) output = out.view_as(query)
else: else:
# Decoding run. # Decoding run.
if key_cache is not None and value_cache is not None: output = _paged_attention(
output = _paged_attention( query,
query, key_cache,
key_cache, value_cache,
value_cache, input_metadata,
input_metadata, self.num_kv_heads,
self.num_kv_heads, self.scale,
self.scale, self.alibi_slopes,
self.alibi_slopes, )
)
else:
# This happens during the initial memory profiling run for
# CUDA graphs.
output = torch.zeros_like(query)
# Reshape the output tensor. # Reshape the output tensor.
return output.view(batch_size, seq_len, hidden_size) return output.view(batch_size, seq_len, hidden_size)