mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-02 00:44:31 +08:00
Fix pre-commit
Signed-off-by: yuantao <2422264527@qq.com>
This commit is contained in:
parent
93a7afcab3
commit
378e20833f
@ -855,7 +855,8 @@ class SinkFullAttentionManager(FullAttentionManager):
|
|||||||
req_blocks = self.req_to_blocks[request_id]
|
req_blocks = self.req_to_blocks[request_id]
|
||||||
assert len(req_blocks) == 0
|
assert len(req_blocks) == 0
|
||||||
# Append both sink blocks and hitted prefix cache blocks
|
# Append both sink blocks and hitted prefix cache blocks
|
||||||
req_blocks.extend(self.sink_blocks + new_computed_blocks)
|
req_blocks.extend(self.sink_blocks)
|
||||||
|
req_blocks.extend(new_computed_blocks)
|
||||||
self.num_cached_block[request_id] = len(new_computed_blocks)
|
self.num_cached_block[request_id] = len(new_computed_blocks)
|
||||||
else:
|
else:
|
||||||
# A running request. Should not have new computed blocks.
|
# A running request. Should not have new computed blocks.
|
||||||
|
|||||||
@ -102,8 +102,8 @@ def _reshape_kv_cache(
|
|||||||
|
|
||||||
attn_backend = attn_backends[layer_name]
|
attn_backend = attn_backends[layer_name]
|
||||||
if (
|
if (
|
||||||
getattr(kv_cache_spec, "head_size_v", kv_cache_spec.head_size)
|
hasattr(kv_cache_spec, "head_size_v")
|
||||||
!= kv_cache_spec.head_size
|
and kv_cache_spec.head_size_v != kv_cache_spec.head_size
|
||||||
):
|
):
|
||||||
kwargs = {"head_size_v": kv_cache_spec.head_size_v}
|
kwargs = {"head_size_v": kv_cache_spec.head_size_v}
|
||||||
stride_kwargs = {"diff_kv": True}
|
stride_kwargs = {"diff_kv": True}
|
||||||
|
|||||||
@ -5207,8 +5207,8 @@ class GPUModelRunner(
|
|||||||
kernel_num_blocks = num_blocks * num_blocks_per_kv_block
|
kernel_num_blocks = num_blocks * num_blocks_per_kv_block
|
||||||
|
|
||||||
if (
|
if (
|
||||||
getattr(kv_cache_spec, "head_size_v", kv_cache_spec.head_size)
|
hasattr(kv_cache_spec, "head_size_v")
|
||||||
!= kv_cache_spec.head_size
|
and kv_cache_spec.head_size_v != kv_cache_spec.head_size
|
||||||
):
|
):
|
||||||
kwargs = {"head_size_v": kv_cache_spec.head_size_v}
|
kwargs = {"head_size_v": kv_cache_spec.head_size_v}
|
||||||
stride_kwargs = {"diff_kv": True}
|
stride_kwargs = {"diff_kv": True}
|
||||||
|
|||||||
@ -191,8 +191,8 @@ class KVConnectorModelRunnerMixin:
|
|||||||
|
|
||||||
attn_backend = attn_group.backend
|
attn_backend = attn_group.backend
|
||||||
if (
|
if (
|
||||||
getattr(kv_cache_spec, "head_size_v", kv_cache_spec.head_size)
|
hasattr(kv_cache_spec, "head_size_v")
|
||||||
!= kv_cache_spec.head_size
|
and kv_cache_spec.head_size_v != kv_cache_spec.head_size
|
||||||
):
|
):
|
||||||
kwargs = {"head_size_v": kv_cache_spec.head_size_v}
|
kwargs = {"head_size_v": kv_cache_spec.head_size_v}
|
||||||
stride_kwargs = {"diff_kv": True}
|
stride_kwargs = {"diff_kv": True}
|
||||||
@ -269,8 +269,8 @@ class KVConnectorModelRunnerMixin:
|
|||||||
|
|
||||||
attn_backend = attn_group.backend
|
attn_backend = attn_group.backend
|
||||||
if (
|
if (
|
||||||
getattr(kv_cache_spec, "head_size_v", kv_cache_spec.head_size)
|
hasattr(kv_cache_spec, "head_size_v")
|
||||||
!= kv_cache_spec.head_size
|
and kv_cache_spec.head_size_v != kv_cache_spec.head_size
|
||||||
):
|
):
|
||||||
kwargs = {"head_size_v": kv_cache_spec.head_size_v}
|
kwargs = {"head_size_v": kv_cache_spec.head_size_v}
|
||||||
stride_kwargs = {"diff_kv": True}
|
stride_kwargs = {"diff_kv": True}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user