mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 22:42:47 +08:00
[Bugfix] Fix incorrect kv cache metrics in grafana.json (#27133)
Signed-off-by: Fangping Shi <fangping_shi@apple.com> Co-authored-by: Fangping Shi <fangping_shi@apple.com>
This commit is contained in:
parent
6738e4a093
commit
7e0941055f
@ -530,7 +530,7 @@ spec:
|
||||
name: accelerators-thanos-querier-datasource
|
||||
# Multiply by 100 so we can read it as a percentage without setting a unit (avoids CUE unit conflicts)
|
||||
query: >
|
||||
100 * avg(vllm:gpu_cache_usage_perc)
|
||||
100 * avg(vllm:kv_cache_usage_perc)
|
||||
|
||||
"18":
|
||||
kind: Panel
|
||||
|
||||
@ -98,7 +98,7 @@ spec:
|
||||
kind: PrometheusTimeSeriesQuery
|
||||
spec:
|
||||
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
|
||||
query: avg(vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) or vector(0)
|
||||
query: avg(vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) or vector(0)
|
||||
minStep: "15s"
|
||||
|
||||
core_running_ts:
|
||||
@ -168,7 +168,7 @@ spec:
|
||||
spec:
|
||||
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
|
||||
# multiply by 100 to present percentage; omit format.unit to avoid schema conflicts
|
||||
query: (avg(vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
|
||||
query: (avg(vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
|
||||
minStep: "15s"
|
||||
|
||||
core_kv_usage_pct_ts:
|
||||
@ -187,7 +187,7 @@ spec:
|
||||
kind: PrometheusTimeSeriesQuery
|
||||
spec:
|
||||
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
|
||||
query: (avg by (service) (vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
|
||||
query: (avg by (service) (vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
|
||||
minStep: "15s"
|
||||
|
||||
# --- Per-Pod breakdowns (works on Simulator & Real) ---
|
||||
@ -246,7 +246,7 @@ spec:
|
||||
spec:
|
||||
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
|
||||
# if your exporter labels kv metric with pod (the sim does), this works; otherwise it will just return empty
|
||||
query: (avg by (pod) (vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
|
||||
query: (avg by (pod) (vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
|
||||
minStep: "15s"
|
||||
|
||||
# --- Real vLLM only (zeros on simulator) ---
|
||||
|
||||
@ -852,7 +852,7 @@
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "vllm:gpu_cache_usage_perc{model_name=\"$model_name\"}",
|
||||
"expr": "vllm:kv_cache_usage_perc{model_name=\"$model_name\"}",
|
||||
"instant": false,
|
||||
"legendFormat": "GPU Cache Usage",
|
||||
"range": true,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user