mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 18:25:40 +08:00
[V0][Metrics] Deprecate some KV/prefix cache metrics (#14136)
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
parent
c41d27156b
commit
2dfdfed8a0
@ -74,31 +74,51 @@ class Metrics:
|
||||
],
|
||||
multiprocess_mode="livemostrecent",
|
||||
)
|
||||
|
||||
# Deprecated in 0.8 - KV cache offloading is not used in V1
|
||||
# TODO: in 0.9, only enable if show_hidden_metrics=True
|
||||
self.gauge_scheduler_swapped = self._gauge_cls(
|
||||
name="vllm:num_requests_swapped",
|
||||
documentation="Number of requests swapped to CPU.",
|
||||
documentation=(
|
||||
"Number of requests swapped to CPU. "
|
||||
"DEPRECATED: KV cache offloading is not used in V1"),
|
||||
labelnames=labelnames,
|
||||
multiprocess_mode="sum")
|
||||
|
||||
# KV Cache Usage in %
|
||||
self.gauge_gpu_cache_usage = self._gauge_cls(
|
||||
name="vllm:gpu_cache_usage_perc",
|
||||
documentation="GPU KV-cache usage. 1 means 100 percent usage.",
|
||||
labelnames=labelnames,
|
||||
multiprocess_mode="sum")
|
||||
|
||||
# Deprecated in 0.8 - KV cache offloading is not used in V1
|
||||
# TODO: in 0.9, only enable if show_hidden_metrics=True
|
||||
self.gauge_cpu_cache_usage = self._gauge_cls(
|
||||
name="vllm:cpu_cache_usage_perc",
|
||||
documentation="CPU KV-cache usage. 1 means 100 percent usage.",
|
||||
documentation=(
|
||||
"CPU KV-cache usage. 1 means 100 percent usage. "
|
||||
"DEPRECATED: KV cache offloading is not used in V1"),
|
||||
labelnames=labelnames,
|
||||
multiprocess_mode="sum")
|
||||
# Prefix caching block hit rate
|
||||
|
||||
# Deprecated in 0.8 - KV cache offloading is not used in V1
|
||||
# TODO: in 0.9, only enable if show_hidden_metrics=True
|
||||
self.gauge_cpu_prefix_cache_hit_rate = self._gauge_cls(
|
||||
name="vllm:cpu_prefix_cache_hit_rate",
|
||||
documentation="CPU prefix cache block hit rate.",
|
||||
documentation=(
|
||||
"CPU prefix cache block hit rate. "
|
||||
"DEPRECATED: KV cache offloading is not used in V1"),
|
||||
labelnames=labelnames,
|
||||
multiprocess_mode="sum")
|
||||
|
||||
# Deprecated in 0.8 - replaced by queries+hits counters in V1
|
||||
# TODO: in 0.9, only enable if show_hidden_metrics=True
|
||||
self.gauge_gpu_prefix_cache_hit_rate = self._gauge_cls(
|
||||
name="vllm:gpu_prefix_cache_hit_rate",
|
||||
documentation="GPU prefix cache block hit rate.",
|
||||
documentation=("GPU prefix cache block hit rate. "
|
||||
"DEPRECATED: use vllm:gpu_prefix_cache_queries and "
|
||||
"vllm:gpu_prefix_cache_queries in V1"),
|
||||
labelnames=labelnames,
|
||||
multiprocess_mode="sum")
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user