mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-27 20:04:04 +08:00
[BugFix] Fix shared storage connector load kv only load attention layer (#21428)
Signed-off-by: David Chen <530634352@qq.com>
This commit is contained in:
parent
a40a8506df
commit
97d6c30cc9
@ -156,8 +156,16 @@ class SharedStorageConnector(KVConnectorBase_V1):
|
||||
logger.info("Inject KV cache of %d tokens to the paged memory",
|
||||
len(request.slot_mapping))
|
||||
for layer_name in forward_context.no_compile_layers:
|
||||
attn_layer = forward_context.no_compile_layers[layer_name]
|
||||
kv_cache_layer = attn_layer.kv_cache[\
|
||||
layer = forward_context.no_compile_layers[layer_name]
|
||||
|
||||
# Only process layers that have kv_cache
|
||||
# attribute (attention layers) Skip non-attention
|
||||
# layers like FusedMoE/MLP etc.
|
||||
kv_cache_attr = getattr(layer, 'kv_cache', None)
|
||||
if kv_cache_attr is None:
|
||||
continue
|
||||
|
||||
kv_cache_layer = kv_cache_attr[ \
|
||||
forward_context.virtual_engine]
|
||||
|
||||
filename = self._generate_filename_debug(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user