diff --git a/vllm/block.py b/vllm/block.py deleted file mode 100644 index 47c381c19383b..0000000000000 --- a/vllm/block.py +++ /dev/null @@ -1,88 +0,0 @@ -"""Token blocks.""" -from typing import TYPE_CHECKING, Iterator, List, Optional - -from vllm.utils import Device - -DEFAULT_LAST_ACCESSED_TIME: float = -1 - - -class PhysicalTokenBlock: - """Represents the state of a block in the KV cache.""" - - def __init__( - self, - device: Device, - block_number: int, - block_size: int, - block_hash: int, - num_hashed_tokens: int, - ) -> None: - self.device = device - self.block_number = block_number - self.block_size = block_size - self.block_hash = block_hash - self.num_hashed_tokens = num_hashed_tokens - - self.ref_count = 0 - self.last_accessed = DEFAULT_LAST_ACCESSED_TIME - - self.computed = False - - def __repr__(self) -> str: - return (f'PhysicalTokenBlock(device={self.device}, ' - f'block_number={self.block_number}, ' - f'num_hashed_tokens={self.num_hashed_tokens}, ' - f'ref_count={self.ref_count}, ' - f'last_accessed={self.last_accessed}, ' - f'computed={self.computed})') - - -class BlockTable: - """Holds a list of blocks with caching of their associated block_ids - """ - - def __init__(self, blocks: Optional[List[PhysicalTokenBlock]] = None): - self._blocks: List[PhysicalTokenBlock] = [] - self._block_ids: List[int] = [] - - if blocks is not None: - for block in blocks: - self.append(block) - - def append(self, block: PhysicalTokenBlock): - self._blocks.append(block) - self._block_ids.append(block.block_number) - - def __len__(self) -> int: - return len(self._blocks) - - def __getitem__(self, key): - return self._blocks[key] - - if TYPE_CHECKING: - - def __iter__(self) -> Iterator[PhysicalTokenBlock]: - raise RuntimeError("Method should be automatically generated") - - def __setitem__(self, key, value): - if isinstance(key, slice): - blocks = value - self._blocks[key] = blocks - self._block_ids[key] = [b.block_number for b in blocks] - else: - block = value - self._blocks[key] = block - self._block_ids[key] = block.block_number - - def reset(self): - self._blocks = [] - self._block_ids = [] - - def copy(self) -> "BlockTable": - return BlockTable(self._blocks) - - def list(self) -> List[PhysicalTokenBlock]: - return self._blocks - - def ids(self) -> List[int]: - return self._block_ids diff --git a/vllm/core/evictor.py b/vllm/core/evictor.py index 44adc4158abec..c9306518223a3 100644 --- a/vllm/core/evictor.py +++ b/vllm/core/evictor.py @@ -13,7 +13,7 @@ class EvictionPolicy(enum.Enum): class Evictor(ABC): """The Evictor subclasses should be used by the BlockAllocator class to - handle eviction of freed PhysicalTokenBlocks. + handle eviction of freed Blocks. """ @abstractmethod @@ -70,7 +70,7 @@ class BlockMetaData: class LRUEvictor(Evictor): """Evicts in a least-recently-used order using the last_accessed timestamp - that's recorded in the PhysicalTokenBlock. If there are multiple blocks with + that's recorded in the Block. If there are multiple blocks with the same last_accessed time, then the one with the largest num_hashed_tokens will be evicted. If two blocks each have the lowest last_accessed time and highest num_hashed_tokens value, then one will be chose arbitrarily