mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-12 14:47:05 +08:00
- **Add SPDX license headers to python source files**
- **Check for SPDX headers using pre-commit**
commit 9d7ef44c3cfb72ca4c32e1c677d99259d10d4745
Author: Russell Bryant <rbryant@redhat.com>
Date: Fri Jan 31 14:18:24 2025 -0500
Add SPDX license headers to python source files
This commit adds SPDX license headers to python source files as
recommended to
the project by the Linux Foundation. These headers provide a concise way
that is
both human and machine readable for communicating license information
for each
source file. It helps avoid any ambiguity about the license of the code
and can
also be easily used by tools to help manage license compliance.
The Linux Foundation runs license scans against the codebase to help
ensure
we are in compliance with the licenses of the code we use, including
dependencies. Having these headers in place helps that tool do its job.
More information can be found on the SPDX site:
- https://spdx.dev/learn/handling-license-info/
Signed-off-by: Russell Bryant <rbryant@redhat.com>
commit 5a1cf1cb3b80759131c73f6a9dddebccac039dea
Author: Russell Bryant <rbryant@redhat.com>
Date: Fri Jan 31 14:36:32 2025 -0500
Check for SPDX headers using pre-commit
Signed-off-by: Russell Bryant <rbryant@redhat.com>
---------
Signed-off-by: Russell Bryant <rbryant@redhat.com>
157 lines
5.3 KiB
Python
157 lines
5.3 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
import enum
|
|
import heapq
|
|
from abc import ABC, abstractmethod
|
|
from typing import Dict, List, Tuple
|
|
|
|
|
|
class EvictionPolicy(enum.Enum):
|
|
"""Enum for eviction policy used by make_evictor to instantiate the correct
|
|
Evictor subclass.
|
|
"""
|
|
LRU = enum.auto()
|
|
|
|
|
|
class Evictor(ABC):
|
|
"""The Evictor subclasses should be used by the BlockAllocator class to
|
|
handle eviction of freed Blocks.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def __init__(self):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def __contains__(self, block_id: int) -> bool:
|
|
pass
|
|
|
|
@abstractmethod
|
|
def evict(self) -> Tuple[int, int]:
|
|
"""Runs the eviction algorithm and returns the evicted block's
|
|
content hash along with physical block id along with physical block id
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def add(self, block_id: int, content_hash: int, num_hashed_tokens: int,
|
|
last_accessed: float):
|
|
"""Adds block to the evictor, making it a candidate for eviction"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def update(self, block_id: int, last_accessed: float):
|
|
"""Update corresponding block's access time in metadata"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def remove(self, block_id: int):
|
|
"""Remove a given block id from the cache."""
|
|
pass
|
|
|
|
@property
|
|
@abstractmethod
|
|
def num_blocks(self) -> int:
|
|
pass
|
|
|
|
|
|
class BlockMetaData:
|
|
"""Data structure for storing key data describe cached block, so that
|
|
evitor could use to make its decision which one to choose for eviction
|
|
|
|
Here we use physical block id as the dict key, as there maybe several
|
|
blocks with the same content hash, but their physical id is unique.
|
|
"""
|
|
|
|
def __init__(self, content_hash: int, num_hashed_tokens: int,
|
|
last_accessed: float):
|
|
self.content_hash = content_hash
|
|
self.num_hashed_tokens = num_hashed_tokens
|
|
self.last_accessed = last_accessed
|
|
|
|
|
|
class LRUEvictor(Evictor):
|
|
"""Evicts in a least-recently-used order using the last_accessed timestamp
|
|
that's recorded in the Block. If there are multiple blocks with
|
|
the same last_accessed time, then the one with the largest num_hashed_tokens
|
|
will be evicted. If two blocks each have the lowest last_accessed time and
|
|
highest num_hashed_tokens value, then one will be chose arbitrarily
|
|
"""
|
|
|
|
# CLEANUP_THRESHOLD determines the maximum allowable size of the priority
|
|
# queue relative to the free table size. When this threshold is exceeded,
|
|
# a cleanup operation is triggered to reduce memory usage.
|
|
CLEANUP_THRESHOLD = 50
|
|
|
|
def __init__(self):
|
|
self.free_table: Dict[int, BlockMetaData] = {}
|
|
self.priority_queue = []
|
|
|
|
def __contains__(self, block_id: int) -> bool:
|
|
return block_id in self.free_table
|
|
|
|
def evict(self) -> Tuple[int, int]:
|
|
if len(self.free_table) == 0:
|
|
raise ValueError("No usable cache memory left")
|
|
|
|
while self.priority_queue:
|
|
# We do not remove outdated entries from the priority queue at the
|
|
# time of updating the last_accessed timestamp. Instead, outdated
|
|
# entries are filtered out here during eviction. Outdated entries
|
|
# would either not in the free table, or have older last accessed
|
|
# time.
|
|
last_accessed, _, block_id, content_hash = heapq.heappop(
|
|
self.priority_queue)
|
|
if (block_id in self.free_table and
|
|
self.free_table[block_id].last_accessed == last_accessed):
|
|
self.free_table.pop(block_id)
|
|
return block_id, content_hash
|
|
|
|
raise ValueError("No usable cache memory left")
|
|
|
|
def add(self, block_id: int, content_hash: int, num_hashed_tokens: int,
|
|
last_accessed: float):
|
|
self.free_table[block_id] = BlockMetaData(content_hash,
|
|
num_hashed_tokens,
|
|
last_accessed)
|
|
heapq.heappush(
|
|
self.priority_queue,
|
|
(last_accessed, -num_hashed_tokens, block_id, content_hash))
|
|
self._cleanup_if_necessary()
|
|
|
|
def update(self, block_id: int, last_accessed: float):
|
|
self.free_table[block_id].last_accessed = last_accessed
|
|
|
|
def _cleanup_if_necessary(self):
|
|
if len(self.priority_queue) > LRUEvictor.CLEANUP_THRESHOLD * len(
|
|
self.free_table):
|
|
self._cleanup()
|
|
|
|
def _cleanup(self):
|
|
new_priority_queue: List[Tuple[float, int, int, int]] = []
|
|
|
|
for block_id, block in self.free_table.items():
|
|
new_priority_queue.append(
|
|
(block.last_accessed, -block.num_hashed_tokens, block_id,
|
|
block.content_hash))
|
|
heapq.heapify(new_priority_queue)
|
|
|
|
self.priority_queue = new_priority_queue
|
|
|
|
def remove(self, block_id: int):
|
|
if block_id not in self.free_table:
|
|
raise ValueError(
|
|
"Attempting to remove block that's not in the evictor")
|
|
self.free_table.pop(block_id)
|
|
|
|
@property
|
|
def num_blocks(self) -> int:
|
|
return len(self.free_table)
|
|
|
|
|
|
def make_evictor(eviction_policy: EvictionPolicy) -> Evictor:
|
|
if eviction_policy == EvictionPolicy.LRU:
|
|
return LRUEvictor()
|
|
else:
|
|
raise ValueError(f"Unknown cache eviction policy: {eviction_policy}")
|