mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 23:35:01 +08:00
[Minor] Remove unused code in attention (#2384)
This commit is contained in:
parent
c884819135
commit
28c3f12104
@ -156,7 +156,6 @@ class PagedAttention(nn.Module):
|
|||||||
output = out.view_as(query)
|
output = out.view_as(query)
|
||||||
else:
|
else:
|
||||||
# Decoding run.
|
# Decoding run.
|
||||||
if key_cache is not None and value_cache is not None:
|
|
||||||
output = _paged_attention(
|
output = _paged_attention(
|
||||||
query,
|
query,
|
||||||
key_cache,
|
key_cache,
|
||||||
@ -166,10 +165,6 @@ class PagedAttention(nn.Module):
|
|||||||
self.scale,
|
self.scale,
|
||||||
self.alibi_slopes,
|
self.alibi_slopes,
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
# This happens during the initial memory profiling run for
|
|
||||||
# CUDA graphs.
|
|
||||||
output = torch.zeros_like(query)
|
|
||||||
|
|
||||||
# Reshape the output tensor.
|
# Reshape the output tensor.
|
||||||
return output.view(batch_size, seq_len, hidden_size)
|
return output.view(batch_size, seq_len, hidden_size)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user