mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 07:05:02 +08:00
Add comments on accessing kv_cache and attn_metadata (#13887)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
7b700ec8c8
commit
0ecdd98031
@ -47,6 +47,10 @@ class Attention(nn.Module):
|
|||||||
attn_type: str = AttentionType.DECODER,
|
attn_type: str = AttentionType.DECODER,
|
||||||
**extra_impl_args,
|
**extra_impl_args,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
"""
|
||||||
|
The KV cache is stored inside this class and is accessed via
|
||||||
|
`self.kv_cache`.
|
||||||
|
"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
if per_layer_sliding_window is not None:
|
if per_layer_sliding_window is not None:
|
||||||
# per-layer sliding window
|
# per-layer sliding window
|
||||||
@ -155,6 +159,15 @@ class Attention(nn.Module):
|
|||||||
key: torch.Tensor,
|
key: torch.Tensor,
|
||||||
value: torch.Tensor,
|
value: torch.Tensor,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
|
"""
|
||||||
|
The KV cache is stored inside this class and is accessed via
|
||||||
|
`self.kv_cache`.
|
||||||
|
|
||||||
|
Attention metadata (`attn_metadata`) is set using a context manager in
|
||||||
|
the model runner's `execute_model` method. It is accessed via forward
|
||||||
|
context using
|
||||||
|
`vllm.forward_context.get_forward_context().attn_metadata`.
|
||||||
|
"""
|
||||||
if self.calculate_kv_scales:
|
if self.calculate_kv_scales:
|
||||||
attn_metadata = get_forward_context().attn_metadata
|
attn_metadata = get_forward_context().attn_metadata
|
||||||
if attn_metadata.enable_kv_scales_calculation:
|
if attn_metadata.enable_kv_scales_calculation:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user