[Bugfix] Fix incorrect kv cache metrics in grafana.json (#27133)

Signed-off-by: Fangping Shi <fangping_shi@apple.com>
Co-authored-by: Fangping Shi <fangping_shi@apple.com>
This commit is contained in:
fangpings 2025-10-22 20:58:36 -07:00 committed by GitHub
parent 6738e4a093
commit 7e0941055f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 6 additions and 6 deletions

View File

@ -530,7 +530,7 @@ spec:
name: accelerators-thanos-querier-datasource
# Multiply by 100 so we can read it as a percentage without setting a unit (avoids CUE unit conflicts)
query: >
100 * avg(vllm:gpu_cache_usage_perc)
100 * avg(vllm:kv_cache_usage_perc)
"18":
kind: Panel

View File

@ -98,7 +98,7 @@ spec:
kind: PrometheusTimeSeriesQuery
spec:
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
query: avg(vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) or vector(0)
query: avg(vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) or vector(0)
minStep: "15s"
core_running_ts:
@ -168,7 +168,7 @@ spec:
spec:
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
# multiply by 100 to present percentage; omit format.unit to avoid schema conflicts
query: (avg(vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
query: (avg(vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
minStep: "15s"
core_kv_usage_pct_ts:
@ -187,7 +187,7 @@ spec:
kind: PrometheusTimeSeriesQuery
spec:
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
query: (avg by (service) (vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
query: (avg by (service) (vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
minStep: "15s"
# --- Per-Pod breakdowns (works on Simulator & Real) ---
@ -246,7 +246,7 @@ spec:
spec:
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
# if your exporter labels kv metric with pod (the sim does), this works; otherwise it will just return empty
query: (avg by (pod) (vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
query: (avg by (pod) (vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
minStep: "15s"
# --- Real vLLM only (zeros on simulator) ---

View File

@ -852,7 +852,7 @@
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "vllm:gpu_cache_usage_perc{model_name=\"$model_name\"}",
"expr": "vllm:kv_cache_usage_perc{model_name=\"$model_name\"}",
"instant": false,
"legendFormat": "GPU Cache Usage",
"range": true,