diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 33be4dccfc710..92822d829a881 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -1641,7 +1641,10 @@ class GPUModelRunner( ) -> None: attn_group = self.attn_groups[kv_cache_gid][attn_gid] builder = attn_group.get_metadata_builder(ubid or 0) - cache_key = (kv_cache_groups[kv_cache_gid].kv_cache_spec, type(builder)) + kv_cache_spec = kv_cache_groups[kv_cache_gid].kv_cache_spec + if isinstance(kv_cache_spec, UniformTypeKVCacheSpecs): + kv_cache_spec = kv_cache_spec.kv_cache_specs[attn_group.layer_names[0]] + cache_key = (kv_cache_spec, type(builder)) cascade_attn_prefix_len = ( cascade_attn_prefix_lens[kv_cache_gid][attn_gid]