mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 08:56:02 +08:00
[Bugfix] Fix incorrect kv cache metrics in grafana.json (#27133)
Signed-off-by: Fangping Shi <fangping_shi@apple.com> Co-authored-by: Fangping Shi <fangping_shi@apple.com>
This commit is contained in:
parent
6738e4a093
commit
7e0941055f
@ -530,7 +530,7 @@ spec:
|
|||||||
name: accelerators-thanos-querier-datasource
|
name: accelerators-thanos-querier-datasource
|
||||||
# Multiply by 100 so we can read it as a percentage without setting a unit (avoids CUE unit conflicts)
|
# Multiply by 100 so we can read it as a percentage without setting a unit (avoids CUE unit conflicts)
|
||||||
query: >
|
query: >
|
||||||
100 * avg(vllm:gpu_cache_usage_perc)
|
100 * avg(vllm:kv_cache_usage_perc)
|
||||||
|
|
||||||
"18":
|
"18":
|
||||||
kind: Panel
|
kind: Panel
|
||||||
|
|||||||
@ -98,7 +98,7 @@ spec:
|
|||||||
kind: PrometheusTimeSeriesQuery
|
kind: PrometheusTimeSeriesQuery
|
||||||
spec:
|
spec:
|
||||||
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
|
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
|
||||||
query: avg(vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) or vector(0)
|
query: avg(vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) or vector(0)
|
||||||
minStep: "15s"
|
minStep: "15s"
|
||||||
|
|
||||||
core_running_ts:
|
core_running_ts:
|
||||||
@ -168,7 +168,7 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
|
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
|
||||||
# multiply by 100 to present percentage; omit format.unit to avoid schema conflicts
|
# multiply by 100 to present percentage; omit format.unit to avoid schema conflicts
|
||||||
query: (avg(vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
|
query: (avg(vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
|
||||||
minStep: "15s"
|
minStep: "15s"
|
||||||
|
|
||||||
core_kv_usage_pct_ts:
|
core_kv_usage_pct_ts:
|
||||||
@ -187,7 +187,7 @@ spec:
|
|||||||
kind: PrometheusTimeSeriesQuery
|
kind: PrometheusTimeSeriesQuery
|
||||||
spec:
|
spec:
|
||||||
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
|
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
|
||||||
query: (avg by (service) (vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
|
query: (avg by (service) (vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
|
||||||
minStep: "15s"
|
minStep: "15s"
|
||||||
|
|
||||||
# --- Per-Pod breakdowns (works on Simulator & Real) ---
|
# --- Per-Pod breakdowns (works on Simulator & Real) ---
|
||||||
@ -246,7 +246,7 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
|
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
|
||||||
# if your exporter labels kv metric with pod (the sim does), this works; otherwise it will just return empty
|
# if your exporter labels kv metric with pod (the sim does), this works; otherwise it will just return empty
|
||||||
query: (avg by (pod) (vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
|
query: (avg by (pod) (vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
|
||||||
minStep: "15s"
|
minStep: "15s"
|
||||||
|
|
||||||
# --- Real vLLM only (zeros on simulator) ---
|
# --- Real vLLM only (zeros on simulator) ---
|
||||||
|
|||||||
@ -852,7 +852,7 @@
|
|||||||
"uid": "${DS_PROMETHEUS}"
|
"uid": "${DS_PROMETHEUS}"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"expr": "vllm:gpu_cache_usage_perc{model_name=\"$model_name\"}",
|
"expr": "vllm:kv_cache_usage_perc{model_name=\"$model_name\"}",
|
||||||
"instant": false,
|
"instant": false,
|
||||||
"legendFormat": "GPU Cache Usage",
|
"legendFormat": "GPU Cache Usage",
|
||||||
"range": true,
|
"range": true,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user