mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 22:55:44 +08:00
[MISC] rename interval to max_recent_requests (#14285)
This commit is contained in:
parent
4d17e20310
commit
fd11a325b8
@ -310,7 +310,7 @@ def test_metrics():
|
||||
def stats(requests, queries, hits):
|
||||
return PrefixCacheStats(requests=requests, queries=queries, hits=hits)
|
||||
|
||||
metrics = PrefixCachingMetrics(interval=5)
|
||||
metrics = PrefixCachingMetrics(max_recent_requests=5)
|
||||
assert metrics.hit_rate == 0.0
|
||||
|
||||
metrics.observe(stats(1, 20, 9))
|
||||
|
||||
@ -47,15 +47,15 @@ NONE_HASH = int.from_bytes(os.urandom(32), byteorder="big") if os.getenv(
|
||||
|
||||
|
||||
class PrefixCachingMetrics:
|
||||
"""Metrics for prefix caching with a hit rate of the most recent N requests.
|
||||
"""Metrics for prefix caching with a hit rate of the max recent N requests.
|
||||
|
||||
Args:
|
||||
interval: The number of the most recent requests to aggregate.
|
||||
max_recent_requests: The number of the max recent requests to aggregate.
|
||||
Defaults to 1000.
|
||||
"""
|
||||
|
||||
def __init__(self, interval: int = 1000):
|
||||
self.interval = interval
|
||||
def __init__(self, max_recent_requests: int = 1000):
|
||||
self.max_recent_requests = max_recent_requests
|
||||
# The current aggregated values.
|
||||
self.aggregated_requests = 0
|
||||
self.aggregated_query_total = 0
|
||||
@ -70,7 +70,7 @@ class PrefixCachingMetrics:
|
||||
are being scheduled and are looking for computed blocks.
|
||||
|
||||
When there are more than `interval` requests, the oldest set of
|
||||
requestsare removed from the metrics.
|
||||
requests are removed from the metrics.
|
||||
|
||||
Args:
|
||||
stats: The prefix cache stats.
|
||||
@ -87,7 +87,7 @@ class PrefixCachingMetrics:
|
||||
self.aggregated_query_hit += stats.hits
|
||||
|
||||
# Remove the oldest stats if the number of requests exceeds.
|
||||
if self.aggregated_requests > self.interval:
|
||||
if self.aggregated_requests > self.max_recent_requests:
|
||||
old_requests, old_queries, old_hits = self.query_queue.popleft()
|
||||
self.aggregated_requests -= old_requests
|
||||
self.aggregated_query_total -= old_queries
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user