mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 03:44:58 +08:00
1405 lines
35 KiB
JSON
1405 lines
35 KiB
JSON
{
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"builtIn": 1,
|
|
"datasource": {
|
|
"type": "grafana",
|
|
"uid": "-- Grafana --"
|
|
},
|
|
"enable": true,
|
|
"hide": true,
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
|
"name": "Annotations & Alerts",
|
|
"type": "dashboard"
|
|
}
|
|
]
|
|
},
|
|
"editable": true,
|
|
"fiscalYearStartMonth": 0,
|
|
"graphTooltip": 0,
|
|
"id": 26,
|
|
"links": [],
|
|
"panels": [
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"id": 9,
|
|
"panels": [],
|
|
"title": "Graph: E2E latency over time ",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "End-to-End latency of requests, showing average and key percentiles over time.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "Latency",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 18,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": true,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"decimals": 2,
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 1
|
|
},
|
|
"id": 1,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "rate(vllm:e2e_request_latency_seconds_sum[$__interval]) / rate(vllm:e2e_request_latency_seconds_count[$__interval])",
|
|
"format": "table",
|
|
"legendFormat": "E2E Latency",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "E2E Latency over Time",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "99th percentile of End-to-End request latency over the selected time range.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 2,
|
|
"displayName": "P99",
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 1
|
|
},
|
|
"id": 5,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__range])))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "E2E Latency (P99)",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "90th percentile of End-to-End request latency over the selected time range.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 2,
|
|
"displayName": "P90",
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 1
|
|
},
|
|
"id": 4,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.90, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__range])))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "E2E Latency (P90)",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "Average End-to-End request latency over the selected time range.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 2,
|
|
"displayName": "Average",
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 5
|
|
},
|
|
"id": 2,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "(sum(increase(vllm:e2e_request_latency_seconds_sum[$__range])) / sum(increase(vllm:e2e_request_latency_seconds_count[$__range])))",
|
|
"legendFormat": "Average E2E Latency",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "E2E Latency (Avg)",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "50th percentile (median) of End-to-End request latency over the selected time range.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 2,
|
|
"displayName": "P50",
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 5
|
|
},
|
|
"id": 3,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.50, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__range])))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "E2E Latency (P50)",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 9
|
|
},
|
|
"id": 8,
|
|
"panels": [],
|
|
"title": "Graph: TTFT(Time To First Token) over time ",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "Time to first token (TTFT) latency, showing average and key percentiles over time.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "Latency",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 18,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"decimals": 2,
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 10
|
|
},
|
|
"id": 10,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "rate(vllm:time_to_first_token_seconds_sum[$__interval]) / rate(vllm:time_to_first_token_seconds_count[$__interval])",
|
|
"format": "table",
|
|
"legendFormat": "TTFT (Avg)",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "TTFT Over Time",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "99th percentile of Time To First Token latency over the selected time range.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 2,
|
|
"displayName": "P99",
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 10
|
|
},
|
|
"id": 14,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__range])))",
|
|
"legendFormat": "TTFT (p99)",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "TTFT (P99)",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "90th percentile of Time To First Token latency over the selected time range.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 2,
|
|
"displayName": "P90",
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 10
|
|
},
|
|
"id": 13,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.90, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__range])))",
|
|
"legendFormat": "TTFT (p90)",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "TTFT (P90)",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "Average Time To First Token latency over the selected time range.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 2,
|
|
"displayName": "Average",
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 14
|
|
},
|
|
"id": 11,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "(sum(increase(vllm:time_to_first_token_seconds_sum[$__range])) / sum(increase(vllm:time_to_first_token_seconds_count[$__range])))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "TTFT (Avg)",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "50th percentile (median) of Time To First Token latency over the selected time range.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"displayName": "P50",
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 14
|
|
},
|
|
"id": 12,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orietitletChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.50, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__range])))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "TTFT (P50)",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 18
|
|
},
|
|
"id": 7,
|
|
"panels": [],
|
|
"title": "ITL (Iteration Latency / Time Per Output Token) over time.",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "Iteration latency, or average time taken to generate a single output token, with percentiles.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "Latency",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 17,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"decimals": 2,
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 19
|
|
},
|
|
"id": 15,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "rate(vllm:time_per_output_token_seconds_sum[$__interval]) / rate(vllm:time_per_output_token_seconds_count[$__interval])",
|
|
"legendFormat": "ITL (Avg)",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.50, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__interval])))",
|
|
"hide": false,
|
|
"instant": false,
|
|
"legendFormat": "ITL (p50)",
|
|
"range": true,
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.90, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__interval])))",
|
|
"hide": false,
|
|
"instant": false,
|
|
"legendFormat": "ITL (p90)",
|
|
"range": true,
|
|
"refId": "C"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__interval])))",
|
|
"hide": false,
|
|
"instant": false,
|
|
"legendFormat": "ITL (p99)",
|
|
"range": true,
|
|
"refId": "D"
|
|
}
|
|
],
|
|
"title": "ITL (Time Per Output Token) Over Time",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "90th percentile of Iteration Latency over the selected time range.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 2,
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 19
|
|
},
|
|
"id": 18,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.90, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__range])))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "ITL (P90)",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "99th percentile of Iteration Latency over the selected time range.\n\n",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 2,
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 19
|
|
},
|
|
"id": 19,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__range])))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "ITL (P99)",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "Average Iteration Latency (time per output token) over the selected time range.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 2,
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 23
|
|
},
|
|
"id": 16,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "(sum(increase(vllm:time_per_output_token_seconds_sum[$__range])) / sum(increase(vllm:time_per_output_token_seconds_count[$__range])))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "ITL (Avg)",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "50th percentile (median) of Iteration Latency over the selected time range.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 2,
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 23
|
|
},
|
|
"id": 17,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "histogram_quantile(0.50, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__range])))",
|
|
"legendFormat": "__auto",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "ITL (P50)",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 27
|
|
},
|
|
"id": 6,
|
|
"panels": [],
|
|
"title": "TPS (Tokens Per Second)",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "Rate of tokens processed per second, including prompt and generation phases.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "tokens/sec (tps)"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 28
|
|
},
|
|
"id": 20,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "11.3.0",
|
|
"targets": [
|
|
{
|
|
"editorMode": "code",
|
|
"expr": "rate(vllm:generation_tokens_total[$__interval])",
|
|
"legendFormat": "Generation TPS",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "rate(vllm:prompt_tokens_total[$__interval])",
|
|
"hide": false,
|
|
"instant": false,
|
|
"legendFormat": "Prompt TPS",
|
|
"range": true,
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "rate(vllm:iteration_tokens_total_count[$__interval])",
|
|
"hide": false,
|
|
"instant": false,
|
|
"legendFormat": "Overall Iteration TPS",
|
|
"range": true,
|
|
"refId": "C"
|
|
}
|
|
],
|
|
"title": "TPS (Tokens Per Second) Over Time",
|
|
"type": "timeseries"
|
|
}
|
|
],
|
|
"preload": false,
|
|
"schemaVersion": 40,
|
|
"tags": [],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"name": "DS_PROMETHEUS",
|
|
"type": "datasource",
|
|
"label": "datasource",
|
|
"query": "prometheus",
|
|
"refresh": 1,
|
|
"current": {
|
|
"text": "Prometheus",
|
|
"value": "prometheus"
|
|
}
|
|
},
|
|
{
|
|
"current": {
|
|
"text": "avg : Average\n0.50 : P50\n0.90 : P90\n0.99 : P99\n0.999 : Max (Approx)",
|
|
"value": "avg : Average\n0.50 : P50\n0.90 : P90\n0.99 : P99\n0.999 : Max (Approx)"
|
|
},
|
|
"label": "Aggregation",
|
|
"name": "agg_method",
|
|
"options": [
|
|
{
|
|
"selected": true,
|
|
"text": "avg : Average\n0.50 : P50\n0.90 : P90\n0.99 : P99\n0.999 : Max (Approx)",
|
|
"value": "avg : Average\n0.50 : P50\n0.90 : P90\n0.99 : P99\n0.999 : Max (Approx)"
|
|
}
|
|
],
|
|
"query": "avg : Average\n0.50 : P50\n0.90 : P90\n0.99 : P99\n0.999 : Max (Approx)",
|
|
"type": "custom"
|
|
},
|
|
{
|
|
"current": {
|
|
"text": [
|
|
"granite-33-2b-instruct"
|
|
],
|
|
"value": [
|
|
"granite-33-2b-instruct"
|
|
]
|
|
},
|
|
"definition": "label_values(vllm:generation_tokens_total,model_name)",
|
|
"includeAll": true,
|
|
"label": "Deployment_ID",
|
|
"multi": true,
|
|
"name": "Deployment_id",
|
|
"options": [],
|
|
"query": {
|
|
"qryType": 1,
|
|
"query": "label_values(vllm:generation_tokens_total,model_name)",
|
|
"refId": "PrometheusVariableQueryEditor-VariableQuery"
|
|
},
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"type": "query"
|
|
}
|
|
]
|
|
},
|
|
"time": {
|
|
"from": "now-12h",
|
|
"to": "now"
|
|
},
|
|
"timezone": "browser",
|
|
"uid": "performance-statistics",
|
|
"title": "Performance Statistics",
|
|
"version": 40,
|
|
"weekStart": ""
|
|
} |