mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 21:55:32 +08:00
[Bugfix] Fix value unpack error of simple connector for KVCache transfer. (#11058)
Signed-off-by: ShangmingCai <csmthu@gmail.com>
This commit is contained in:
parent
9f3974a319
commit
db6c264a1e
@ -118,6 +118,12 @@ class SimpleConnector(KVConnectorBase):
|
|||||||
start_layer = model_executable.model.start_layer
|
start_layer = model_executable.model.start_layer
|
||||||
end_layer = model_executable.model.end_layer
|
end_layer = model_executable.model.end_layer
|
||||||
|
|
||||||
|
model_config = model_executable.model.config
|
||||||
|
num_heads = model_config.num_key_value_heads
|
||||||
|
hidden_size = model_config.hidden_size
|
||||||
|
num_attention_heads = model_config.num_attention_heads
|
||||||
|
head_size = int(hidden_size / num_attention_heads)
|
||||||
|
|
||||||
# query_lens contains new KV caches that are added to vLLM.
|
# query_lens contains new KV caches that are added to vLLM.
|
||||||
# so we will send them to decode instance
|
# so we will send them to decode instance
|
||||||
# FIXME(Kuntai): This assume that all requests are prefill.
|
# FIXME(Kuntai): This assume that all requests are prefill.
|
||||||
@ -131,8 +137,6 @@ class SimpleConnector(KVConnectorBase):
|
|||||||
for layer_id in range(start_layer, end_layer):
|
for layer_id in range(start_layer, end_layer):
|
||||||
kv_cache = kv_caches[layer_id - start_layer]
|
kv_cache = kv_caches[layer_id - start_layer]
|
||||||
|
|
||||||
_, _, num_heads, head_size = kv_cache[0].shape
|
|
||||||
|
|
||||||
key_cache = kv_cache[0].reshape(-1, num_heads, head_size)
|
key_cache = kv_cache[0].reshape(-1, num_heads, head_size)
|
||||||
value_cache = kv_cache[1].reshape(-1, num_heads, head_size)
|
value_cache = kv_cache[1].reshape(-1, num_heads, head_size)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user