diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py index 03cbb958237df..8044481a9cd6a 100644 --- a/vllm/v1/core/kv_cache_manager.py +++ b/vllm/v1/core/kv_cache_manager.py @@ -164,13 +164,14 @@ class KVCacheManager: new_full_blocks = req_blocks[ num_computed_full_blocks:num_full_blocks_after_append] - self._cache_full_blocks( - request=request, - blk_start_idx=num_computed_full_blocks, - full_blocks=new_full_blocks, - prev_block=req_blocks[num_computed_full_blocks - 1] - if num_computed_full_blocks >= 1 else None, - ) + if new_full_blocks: + self._cache_full_blocks( + request=request, + blk_start_idx=num_computed_full_blocks, + full_blocks=new_full_blocks, + prev_block=req_blocks[num_computed_full_blocks - 1] + if num_computed_full_blocks >= 1 else None, + ) return new_blocks @@ -375,8 +376,13 @@ class KVCacheManager: prev_block: The previous block in the chain. """ # Update the new blocks with the block hashes through the chain. - prev_block_hash_value = (prev_block.block_hash.hash_value - if prev_block is not None else None) + prev_block_hash_value = None + if prev_block is not None: + # Previous block must have a block hash because it must be + # a full, cached block. + assert prev_block.block_hash is not None + prev_block_hash_value = prev_block.block_hash.hash_value + for i, blk in enumerate(full_blocks): blk_idx = blk_start_idx + i