{ "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "grafana", "uid": "-- Grafana --" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "description": "High-level overview of VLLM model deployment behavior and key performance indicators. Designed for Data Scientists and Product Managers to monitor request volume, token throughput, and latency", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": 47, "links": [], "panels": [ { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 20, "panels": [], "title": "Request Over Time", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, "unit": "req/s" }, "overrides": [] }, "gridPos": { "h": 6, "w": 10, "x": 0, "y": 1 }, "id": 1, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "sum by (model_name) (\n rate(vllm:request_success_total{model_name=~\"$Deployment_id\"}[$__rate_interval])\n)", "interval": "1", "legendFormat": "{{model_name}}", "range": true, "refId": "A" } ], "title": "Successful Requests Over Time", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, "unit": "req/s" }, "overrides": [] }, "gridPos": { "h": 3, "w": 7, "x": 10, "y": 1 }, "id": 2, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": ["mean"], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.3.0", "targets": [ { "editorMode": "code", "expr": "sum(rate(vllm:request_success_total{model_name=~\"$Deployment_id\"}[$__rate_interval]))", "legendFormat": "__auto", "range": true, "refId": "A" } ], "title": "Requests Avg Rate", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [ { "options": { "Calcultaions": { "index": 0, "text": "Last (not null)" } }, "type": "value" } ], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, "unit": "ms" }, "overrides": [] }, "gridPos": { "h": 3, "w": 7, "x": 17, "y": 1 }, "id": 3, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.3.0", "targets": [ { "editorMode": "code", "expr": "histogram_quantile(0.50, sum by(le, model_name) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=~\"$Deployment_id\"}[$__rate_interval])))", "legendFormat": "__auto", "range": true, "refId": "A" } ], "title": "p50 Latency", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [ { "options": { "Calculation": { "index": 0, "text": "Last (not null)" } }, "type": "value" } ], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, "unit": "ms" }, "overrides": [] }, "gridPos": { "h": 3, "w": 7, "x": 10, "y": 4 }, "id": 4, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.3.0", "targets": [ { "editorMode": "code", "expr": "histogram_quantile(0.90, sum by(le, model_name) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=~\"$Deployment_id\"}[$__rate_interval])))", "legendFormat": "__auto", "range": true, "refId": "A" } ], "title": "p90 Latency", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [ { "options": { "Calculation": { "index": 0, "text": "Last (not null)" } }, "type": "value" } ], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, "unit": "ms" }, "overrides": [] }, "gridPos": { "h": 3, "w": 7, "x": 17, "y": 4 }, "id": 5, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.3.0", "targets": [ { "editorMode": "code", "expr": "histogram_quantile(0.99, sum by(le, model_name) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=~\"$Deployment_id\"}[$__rate_interval])))", "legendFormat": "__auto", "range": true, "refId": "A" } ], "title": "p99 Latency", "type": "stat" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 7 }, "id": 19, "panels": [], "title": "Size Distribution", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "fillOpacity": 80, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineWidth": 1, "stacking": { "group": "A", "mode": "none" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, "unit": "cps" }, "overrides": [] }, "gridPos": { "h": 6, "w": 10, "x": 0, "y": 8 }, "id": 6, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.0", "targets": [ { "editorMode": "code", "expr": "sum by (le, model_name) (rate(vllm:request_prompt_tokens_bucket{model_name=~\"$Deployment_id\"}[$__rate_interval]))", "legendFormat": "{{model_name}} le={{le}}", "range": true, "refId": "A" } ], "title": "Input Token Size Distribution", "type": "histogram" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [ { "options": { "calculation ": { "index": 0, "text": "Last (not null)" } }, "type": "value" } ], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, "unit": "cps" }, "overrides": [] }, "gridPos": { "h": 3, "w": 7, "x": 10, "y": 8 }, "id": 9, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.3.0", "targets": [ { "editorMode": "code", "expr": "histogram_quantile(0.90, sum by(le, model_name) (rate(vllm:request_prompt_tokens_bucket{model_name=~\"$Deployment_id\"}[$__rate_interval])))", "legendFormat": "__auto", "range": true, "refId": "A" } ], "title": "Input Token Size p90", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [ { "options": { "Calcultion": { "index": 0, "text": "Last (not null)" } }, "type": "value" } ], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, "unit": "cps" }, "overrides": [] }, "gridPos": { "h": 3, "w": 7, "x": 17, "y": 8 }, "id": 8, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.3.0", "targets": [ { "editorMode": "code", "expr": "histogram_quantile(0.50, sum by(le, model_name) (rate(vllm:request_prompt_tokens_bucket{model_name=~\"$Deployment_id\"}[$__rate_interval])))", "legendFormat": "__auto", "range": true, "refId": "A" } ], "title": "Input Token Size p50", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [ { "options": { "Calcultaion": { "index": 0, "text": "mean" } }, "type": "value" } ], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, "unit": "cps" }, "overrides": [] }, "gridPos": { "h": 3, "w": 7, "x": 10, "y": 11 }, "id": 7, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.3.0", "targets": [ { "editorMode": "code", "expr": "sum(rate(vllm:prompt_tokens_total{model_name=~\"$Deployment_id\"}[$__rate_interval]))\n/\nsum(rate(vllm:request_success_total{model_name=~\"$Deployment_id\"}[$__rate_interval]))", "legendFormat": "__auto", "range": true, "refId": "A" } ], "title": "Input Token Size Avg", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [ { "options": { "Calculation": { "index": 0, "text": "Last (not null)" } }, "type": "value" } ], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, "unit": "cps" }, "overrides": [] }, "gridPos": { "h": 3, "w": 7, "x": 17, "y": 11 }, "id": 10, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.3.0", "targets": [ { "editorMode": "code", "expr": "histogram_quantile(0.99, sum by(le, model_name) (rate(vllm:request_prompt_tokens_bucket{model_name=~\"$Deployment_id\"}[$__rate_interval])))", "legendFormat": "__auto", "range": true, "refId": "A" } ], "title": "Input Token Size p99", "type": "stat" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, "id": 18, "panels": [], "title": "Input Token Over Time", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, "unit": "cps" }, "overrides": [] }, "gridPos": { "h": 6, "w": 10, "x": 0, "y": 15 }, "id": 11, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.0", "targets": [ { "editorMode": "code", "expr": "sum by (model_name) (rate(vllm:prompt_tokens_total{model_name=~\"$Deployment_id\"}[$__rate_interval]))", "legendFormat": "{{model_name}}", "range": true, "refId": "A" } ], "title": "Input Tokens Over Time", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [ { "options": { "Calculation": { "index": 0, "text": "mean" } }, "type": "value" } ], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, "unit": "cps" }, "overrides": [] }, "gridPos": { "h": 3, "w": 7, "x": 10, "y": 15 }, "id": 12, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.3.0", "targets": [ { "editorMode": "code", "expr": "sum(rate(vllm:prompt_tokens_total{model_name=~\"$Deployment_id\"}[$__rate_interval]))", "legendFormat": "__auto", "range": true, "refId": "A" } ], "title": "Input Tokens/Sec Avg", "type": "stat" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 }, "id": 17, "panels": [], "title": "Output Token Over Time", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, "unit": "cps" }, "overrides": [] }, "gridPos": { "h": 6, "w": 10, "x": 0, "y": 22 }, "id": 13, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.0", "targets": [ { "editorMode": "code", "expr": "sum by (model_name) (rate(vllm:generation_tokens_total{model_name=~\"$Deployment_id\"}[$__rate_interval]))", "legendFormat": "{{model_name}}", "range": true, "refId": "A" } ], "title": "Output Tokens Over Time", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [ { "options": { "Calculation": { "index": 0, "text": "mean" } }, "type": "value" } ], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 80 }] }, "unit": "cps" }, "overrides": [] }, "gridPos": { "h": 3, "w": 7, "x": 10, "y": 22 }, "id": 14, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.3.0", "targets": [ { "editorMode": "code", "expr": "sum(rate(vllm:generation_tokens_total{model_name=~\"$Deployment_id\"}[$__rate_interval]))", "legendFormat": "__auto", "range": true, "refId": "A" } ], "title": "Output Tokens/Sec Avg", "type": "stat" } ], "preload": false, "schemaVersion": 40, "tags": [], "templating": { "list": [ { "current": { "text": "Prometheus", "value": "4184fc20-68a7-483a-8d9b-7caa59c680dd" }, "label": "datasource", "name": "DS_PROMETHEUS", "options": [], "query": "prometheus", "refresh": 1, "type": "datasource" }, { "current": { "text": ["All"], "value": ["$__all"] }, "definition": "label_values(vllm:request_success_total,model_name)", "includeAll": true, "label": "Deployment_ID", "multi": true, "name": "Deployment_id", "options": [], "query": { "qryType": 1, "query": "label_values(vllm:request_success_total,model_name)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, "regex": "", "sort": 1, "type": "query" }, { "current": { "text": "All hours", "value": "All hours" }, "hide": 2, "label": "Rush Hours Only", "name": "rush_hours", "options": [ { "selected": true, "text": "false", "value": "All hours" }, { "selected": false, "text": "true", "value": "Rush hours" } ], "query": "false : All hours, true : Rush hours", "type": "custom" }, { "current": { "text": "All", "value": "All" }, "hide": 2, "label": "Rush Hours Type", "name": "rush_hours_type", "options": [ { "selected": true, "text": "^All__.*$", "value": "All" }, { "selected": false, "text": "^Static__.*$", "value": "Static" }, { "selected": false, "text": "^Dynamic__.*$", "value": "Dynamic" } ], "query": "^All__.*$ : All, ^Static__.*$ : Static, ^Dynamic__.*$ : Dynamic", "type": "custom" }, { "current": { "text": "", "value": "" }, "hide": 2, "name": "query0", "options": [], "query": "", "refresh": 1, "regex": "", "type": "query" } ] }, "time": { "from": "now-12h", "to": "now" }, "timepicker": {}, "timezone": "browser", "title": "Query Statistics_New4", "uid": "query-statistics4", "version": 2, "weekStart": "" }