mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 17:05:37 +08:00
761 lines
24 KiB
JSON
761 lines
24 KiB
JSON
{
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"builtIn": 1,
|
|
"datasource": {
|
|
"type": "grafana",
|
|
"uid": "-- Grafana --"
|
|
},
|
|
"enable": true,
|
|
"hide": true,
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
|
"name": "Annotations & Alerts",
|
|
"type": "dashboard"
|
|
}
|
|
]
|
|
},
|
|
"description": "High-level overview of VLLM model deployment behavior and key performance indicators. Designed for Data Scientists and Product Managers to monitor request volume, token throughput, and latency",
|
|
"editable": true,
|
|
"fiscalYearStartMonth": 0,
|
|
"graphTooltip": 0,
|
|
"id": 47,
|
|
"links": [],
|
|
"panels": [
|
|
{
|
|
"collapsed": true,
|
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
|
|
"id": 20,
|
|
"panels": [],
|
|
"title": "Request Over Time",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "palette-classic" },
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": { "type": "linear" },
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": { "group": "A", "mode": "none" },
|
|
"thresholdsStyle": { "mode": "off" }
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }]
|
|
},
|
|
"unit": "req/s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 6, "w": 10, "x": 0, "y": 1 },
|
|
"id": 1,
|
|
"options": {
|
|
"legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
|
|
"tooltip": { "mode": "single", "sort": "none" }
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"editorMode": "code",
|
|
"expr": "sum by (model_name) (\n rate(vllm:request_success_total{model_name=~\"$Deployment_id\"}[$__rate_interval])\n)",
|
|
"interval": "1",
|
|
"legendFormat": "{{model_name}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Successful Requests Over Time",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "thresholds" },
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }]
|
|
},
|
|
"unit": "req/s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 3, "w": 7, "x": 10, "y": 1 },
|
|
"id": 2,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": { "calcs": ["mean"], "fields": "", "values": false },
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "sum(rate(vllm:request_success_total{model_name=~\"$Deployment_id\"}[$__rate_interval]))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Requests Avg Rate",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "thresholds" },
|
|
"mappings": [
|
|
{ "options": { "Calcultaions": { "index": 0, "text": "Last (not null)" } }, "type": "value" }
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }]
|
|
},
|
|
"unit": "ms"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 3, "w": 7, "x": 17, "y": 1 },
|
|
"id": 3,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.50, sum by(le, model_name) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=~\"$Deployment_id\"}[$__rate_interval])))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "p50 Latency",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "thresholds" },
|
|
"mappings": [
|
|
{ "options": { "Calculation": { "index": 0, "text": "Last (not null)" } }, "type": "value" }
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }]
|
|
},
|
|
"unit": "ms"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 3, "w": 7, "x": 10, "y": 4 },
|
|
"id": 4,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.90, sum by(le, model_name) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=~\"$Deployment_id\"}[$__rate_interval])))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "p90 Latency",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "thresholds" },
|
|
"mappings": [
|
|
{ "options": { "Calculation": { "index": 0, "text": "Last (not null)" } }, "type": "value" }
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }]
|
|
},
|
|
"unit": "ms"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 3, "w": 7, "x": 17, "y": 4 },
|
|
"id": 5,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.99, sum by(le, model_name) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=~\"$Deployment_id\"}[$__rate_interval])))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "p99 Latency",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 7 },
|
|
"id": 19,
|
|
"panels": [],
|
|
"title": "Size Distribution",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "palette-classic" },
|
|
"custom": {
|
|
"fillOpacity": 80,
|
|
"gradientMode": "none",
|
|
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
|
"lineWidth": 1,
|
|
"stacking": { "group": "A", "mode": "none" }
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }]
|
|
},
|
|
"unit": "cps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 6, "w": 10, "x": 0, "y": 8 },
|
|
"id": 6,
|
|
"options": {
|
|
"legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
|
|
"tooltip": { "mode": "single", "sort": "none" }
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "sum by (le, model_name) (rate(vllm:request_prompt_tokens_bucket{model_name=~\"$Deployment_id\"}[$__rate_interval]))",
|
|
"legendFormat": "{{model_name}} le={{le}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Input Token Size Distribution",
|
|
"type": "histogram"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "thresholds" },
|
|
"mappings": [
|
|
{ "options": { "calculation ": { "index": 0, "text": "Last (not null)" } }, "type": "value" }
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }]
|
|
},
|
|
"unit": "cps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 3, "w": 7, "x": 10, "y": 8 },
|
|
"id": 9,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.90, sum by(le, model_name) (rate(vllm:request_prompt_tokens_bucket{model_name=~\"$Deployment_id\"}[$__rate_interval])))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Input Token Size p90",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "thresholds" },
|
|
"mappings": [
|
|
{ "options": { "Calcultion": { "index": 0, "text": "Last (not null)" } }, "type": "value" }
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }]
|
|
},
|
|
"unit": "cps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 3, "w": 7, "x": 17, "y": 8 },
|
|
"id": 8,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.50, sum by(le, model_name) (rate(vllm:request_prompt_tokens_bucket{model_name=~\"$Deployment_id\"}[$__rate_interval])))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Input Token Size p50",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "thresholds" },
|
|
"mappings": [
|
|
{ "options": { "Calcultaion": { "index": 0, "text": "mean" } }, "type": "value" }
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }]
|
|
},
|
|
"unit": "cps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 3, "w": 7, "x": 10, "y": 11 },
|
|
"id": 7,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "sum(rate(vllm:prompt_tokens_total{model_name=~\"$Deployment_id\"}[$__rate_interval]))\n/\nsum(rate(vllm:request_success_total{model_name=~\"$Deployment_id\"}[$__rate_interval]))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Input Token Size Avg",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "thresholds" },
|
|
"mappings": [
|
|
{ "options": { "Calculation": { "index": 0, "text": "Last (not null)" } }, "type": "value" }
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }]
|
|
},
|
|
"unit": "cps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 3, "w": 7, "x": 17, "y": 11 },
|
|
"id": 10,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.99, sum by(le, model_name) (rate(vllm:request_prompt_tokens_bucket{model_name=~\"$Deployment_id\"}[$__rate_interval])))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Input Token Size p99",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"collapsed": true,
|
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 },
|
|
"id": 18,
|
|
"panels": [],
|
|
"title": "Input Token Over Time",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "palette-classic" },
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": { "type": "linear" },
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": { "group": "A", "mode": "none" },
|
|
"thresholdsStyle": { "mode": "off" }
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }]
|
|
},
|
|
"unit": "cps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 6, "w": 10, "x": 0, "y": 15 },
|
|
"id": 11,
|
|
"options": {
|
|
"legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
|
|
"tooltip": { "mode": "single", "sort": "none" }
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "sum by (model_name) (rate(vllm:prompt_tokens_total{model_name=~\"$Deployment_id\"}[$__rate_interval]))",
|
|
"legendFormat": "{{model_name}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Input Tokens Over Time",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "thresholds" },
|
|
"mappings": [
|
|
{ "options": { "Calculation": { "index": 0, "text": "mean" } }, "type": "value" }
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }]
|
|
},
|
|
"unit": "cps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 3, "w": 7, "x": 10, "y": 15 },
|
|
"id": 12,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "sum(rate(vllm:prompt_tokens_total{model_name=~\"$Deployment_id\"}[$__rate_interval]))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Input Tokens/Sec Avg",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 },
|
|
"id": 17,
|
|
"panels": [],
|
|
"title": "Output Token Over Time",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "palette-classic" },
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": { "type": "linear" },
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": { "group": "A", "mode": "none" },
|
|
"thresholdsStyle": { "mode": "off" }
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }]
|
|
},
|
|
"unit": "cps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 6, "w": 10, "x": 0, "y": 22 },
|
|
"id": 13,
|
|
"options": {
|
|
"legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
|
|
"tooltip": { "mode": "single", "sort": "none" }
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "sum by (model_name) (rate(vllm:generation_tokens_total{model_name=~\"$Deployment_id\"}[$__rate_interval]))",
|
|
"legendFormat": "{{model_name}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Output Tokens Over Time",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "thresholds" },
|
|
"mappings": [
|
|
{ "options": { "Calculation": { "index": 0, "text": "mean" } }, "type": "value" }
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }]
|
|
},
|
|
"unit": "cps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 3, "w": 7, "x": 10, "y": 22 },
|
|
"id": 14,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "sum(rate(vllm:generation_tokens_total{model_name=~\"$Deployment_id\"}[$__rate_interval]))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Output Tokens/Sec Avg",
|
|
"type": "stat"
|
|
}
|
|
],
|
|
"preload": false,
|
|
"schemaVersion": 40,
|
|
"tags": [],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"current": { "text": "Prometheus", "value": "4184fc20-68a7-483a-8d9b-7caa59c680dd" },
|
|
"label": "datasource",
|
|
"name": "DS_PROMETHEUS",
|
|
"options": [],
|
|
"query": "prometheus",
|
|
"refresh": 1,
|
|
"type": "datasource"
|
|
},
|
|
{
|
|
"current": { "text": ["All"], "value": ["$__all"] },
|
|
"definition": "label_values(vllm:request_success_total,model_name)",
|
|
"includeAll": true,
|
|
"label": "Deployment_ID",
|
|
"multi": true,
|
|
"name": "Deployment_id",
|
|
"options": [],
|
|
"query": {
|
|
"qryType": 1,
|
|
"query": "label_values(vllm:request_success_total,model_name)",
|
|
"refId": "PrometheusVariableQueryEditor-VariableQuery"
|
|
},
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"sort": 1,
|
|
"type": "query"
|
|
},
|
|
{
|
|
"current": { "text": "All hours", "value": "All hours" },
|
|
"hide": 2,
|
|
"label": "Rush Hours Only",
|
|
"name": "rush_hours",
|
|
"options": [
|
|
{ "selected": true, "text": "false", "value": "All hours" },
|
|
{ "selected": false, "text": "true", "value": "Rush hours" }
|
|
],
|
|
"query": "false : All hours, true : Rush hours",
|
|
"type": "custom"
|
|
},
|
|
{
|
|
"current": { "text": "All", "value": "All" },
|
|
"hide": 2,
|
|
"label": "Rush Hours Type",
|
|
"name": "rush_hours_type",
|
|
"options": [
|
|
{ "selected": true, "text": "^All__.*$", "value": "All" },
|
|
{ "selected": false, "text": "^Static__.*$", "value": "Static" },
|
|
{ "selected": false, "text": "^Dynamic__.*$", "value": "Dynamic" }
|
|
],
|
|
"query": "^All__.*$ : All, ^Static__.*$ : Static, ^Dynamic__.*$ : Dynamic",
|
|
"type": "custom"
|
|
},
|
|
{
|
|
"current": { "text": "", "value": "" },
|
|
"hide": 2,
|
|
"name": "query0",
|
|
"options": [],
|
|
"query": "",
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"type": "query"
|
|
}
|
|
]
|
|
},
|
|
"time": { "from": "now-12h", "to": "now" },
|
|
"timepicker": {},
|
|
"timezone": "browser",
|
|
"title": "Query Statistics_New4",
|
|
"uid": "query-statistics4",
|
|
"version": 2,
|
|
"weekStart": ""
|
|
}
|
|
|