From ba196291789d783cee96b8bbaefbc8988e21efa4 Mon Sep 17 00:00:00 2001 From: func25 Date: Sat, 23 Aug 2025 13:23:23 +0700 Subject: [PATCH 1/2] finalize --- dashboards/victorialogs-cluster.json | 8494 ++++++++++++++++++++-- dashboards/victorialogs.json | 4815 +++++++++++-- dashboards/vm/victorialogs-cluster.json | 8500 +++++++++++++++++++++-- dashboards/vm/victorialogs.json | 4822 ++++++++++--- 4 files changed, 23667 insertions(+), 2964 deletions(-) diff --git a/dashboards/victorialogs-cluster.json b/dashboards/victorialogs-cluster.json index f0a2fa815a..e450293e71 100644 --- a/dashboards/victorialogs-cluster.json +++ b/dashboards/victorialogs-cluster.json @@ -50,7 +50,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 1, + "id": 6, "links": [], "panels": [ { @@ -71,7 +71,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows total amount of log entries in the storage.", + "description": "Total amount of log entries in the storage.", "fieldConfig": { "defaults": { "color": { @@ -83,7 +83,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -92,12 +92,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 4, "x": 0, "y": 1 }, - "id": 10, + "id": 79, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -117,7 +117,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -131,7 +131,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -143,7 +143,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the cumulative number of log entries ingested over the last 24h.", + "description": "The total number of log entries ingested over the past 24 hours.", "fieldConfig": { "defaults": { "color": { @@ -155,7 +155,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -164,12 +164,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 4, "y": 1 }, - "id": 65, + "id": 80, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -189,7 +189,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -203,7 +203,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -215,7 +215,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Total amount of used disk space.\nAccounts for all compressed log entries and index size.\n\nSee how to [control disk usage](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage).", + "description": "Average ingestion rate of log entries.", "fieldConfig": { "defaults": { "color": { @@ -227,21 +227,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "bytes" + "unit": "short" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 9, "y": 1 }, - "id": 24, + "id": 82, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -261,7 +261,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -270,16 +270,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", + "expr": "sum(rate(vl_rows_ingested_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], - "title": "Disk space usage", + "title": "Insert req/s", "type": "stat" }, { @@ -287,7 +287,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the average ingestion rate of log entries.", + "description": "Total amount of used disk space.\nAccounts for all compressed log entries and index size.\n\nSee how to [control disk usage](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage).", "fieldConfig": { "defaults": { "color": { @@ -299,21 +299,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 14, "y": 1 }, - "id": 22, + "id": 85, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -333,7 +333,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -342,16 +342,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(vl_rows_ingested_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], - "title": "Insert req/s", + "title": "Disk space usage", "type": "stat" }, { @@ -359,7 +359,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Total number of available CPUs for VictoriaLogs process", + "description": "Integer number of CPU cores available to the application. This value is automatically rounded down from fractional CPU quotas. For optimal performance, fractional CPU units should be avoided. See the [best practices](https://docs.victoriametrics.com/victoriametrics/bestpractices/#kubernetes) documentation for more details.", "fieldConfig": { "defaults": { "color": { @@ -371,7 +371,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -384,12 +384,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 19, "y": 1 }, - "id": 30, + "id": 87, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -409,7 +409,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -423,7 +423,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -437,10 +437,10 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 4, "x": 0, - "y": 3 + "y": 5 }, "id": 63, "options": { @@ -452,7 +452,7 @@ "content": "
$version
", "mode": "markdown" }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "title": "Version", "type": "text" }, @@ -461,7 +461,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the cumulative number of log entries ingested over the last 24h. \n\nThe size is calculated before compression.", + "description": "The cumulative number of log entries ingested over the last 24h. \n\nThe size is calculated before compression.", "fieldConfig": { "defaults": { "color": { @@ -473,7 +473,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -482,12 +482,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 4, - "y": 3 + "y": 5 }, - "id": 64, + "id": 81, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -507,7 +507,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -521,7 +521,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -533,7 +533,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "The ratio between original data size and compressed data stored on disk.\n\nCompression ratio doesn't account for indexdb size. It also may change with time, as [merges](https://docs.victoriametrics.com/victorialogs/#forced-merge) are improving compression of historical data.", + "description": "Rate of HTTP read requests.", "fieldConfig": { "defaults": { "color": { @@ -545,21 +545,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "none" + "unit": "short" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 9, - "y": 3 + "y": 5 }, - "id": 25, + "id": 83, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -579,7 +579,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -588,16 +588,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": " sum(vl_uncompressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) / sum(vl_compressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance\", path=~\"/select/.*\"}[$__rate_interval]))", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], - "title": "Compression ratio", + "title": "Read req/s", "type": "stat" }, { @@ -605,7 +605,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the rate of HTTP read requests.", + "description": "The ratio between original data size and compressed data stored on disk. This metric excludes indexdb size. \n\nThe ratio can go up or down as the system performs automatic maintenance and applies retention policies. For examples:\n- Background merges: [Merges](https://docs.victoriametrics.com/victorialogs/#forced-merge) improve compression by combining data into larger, more efficiently compressed blocks\n- Retention policies: When old data is deleted due to retention settings, the ratio changes as different time periods have varying compression characteristics\n\n", "fieldConfig": { "defaults": { "color": { @@ -617,21 +617,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "short" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 14, - "y": 3 + "y": 5 }, - "id": 36, + "id": 86, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -651,7 +651,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -660,16 +660,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance\", path=~\"/select/.*\"}[$__rate_interval]))", + "expr": " sum(vl_uncompressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) / sum(vl_compressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "__auto", + "legendFormat": "{{instance}}", "refId": "A" } ], - "title": "Read req/s", + "title": "Compression ratio", "type": "stat" }, { @@ -677,7 +677,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Total size of available memory for VictoriaLogs process", + "description": "Total system memory available to the application. This represents the system or container's memory capacity or limit, not the currently free memory.", "fieldConfig": { "defaults": { "color": { @@ -689,7 +689,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -698,12 +698,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 19, - "y": 3 + "y": 5 }, - "id": 34, + "id": 88, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -723,20 +723,20 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { - "type": "prometheus", "uid": "$ds" }, + "editorMode": "code", "exemplar": false, "expr": "sum(vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"})", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -767,7 +767,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -796,8 +796,8 @@ }, "properties": [ { - "id": "displayName", - "value": "Count" + "id": "custom.hidden", + "value": true } ] } @@ -807,7 +807,7 @@ "h": 6, "w": 9, "x": 0, - "y": 5 + "y": 9 }, "id": 72, "options": { @@ -823,12 +823,12 @@ "showHeader": true, "sortBy": [ { - "desc": true, - "displayName": "Count" + "desc": false, + "displayName": "tier" } ] }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -837,14 +837,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(job, short_version)", + "expr": "sum by (instance, version, tier) (\n label_set(\n vm_app_version{job=~\"$job\",instance=~\"$instance_select_all\"}\n and on(instance,version)\n vm_app_version{job=~\"$job\",instance=~\"$instance_insert_all\"} * 0 + 1,\n \"tier\",\"select,insert\"\n )\nor\n label_set(\n vm_app_version{job=~\"$job\",instance=~\"$instance_select_all\"}\n unless on(instance,version)\n vm_app_version{job=~\"$job\",instance=~\"$instance_insert_all\"} * 0 + 1,\n \"tier\",\"select\"\n )\nor\n label_set(\n vm_app_version{job=~\"$job\",instance=~\"$instance_insert_all\"}\n unless on(instance,version)\n vm_app_version{job=~\"$job\",instance=~\"$instance_select_all\"} * 0 + 1,\n \"tier\",\"insert\"\n )\nor\n label_set(\n vm_app_version{job=~\"$job\",instance=~\"$instance_storage_all\"} * 0 + 1,\n \"tier\",\"storage\"\n )\nor\n label_set(\n (\n vm_app_version{job=~\"$job\",instance=~\"$instance\"}\n unless on(instance,version)\n (\n vm_app_version{job=~\"$job\",instance=~\"$instance_select_all\"}\n or\n vm_app_version{job=~\"$job\",instance=~\"$instance_insert_all\"}\n or\n vm_app_version{job=~\"$job\",instance=~\"$instance_storage_all\"}\n )\n ) * 0 + 1,\n \"tier\",\"unknown\"\n )\n)\n", "format": "table", + "hide": false, "instant": true, + "legendFormat": "{{instance}}", "range": false, "refId": "A" } ], - "title": "", + "title": "Component versions ($job)", "type": "table" }, { @@ -852,6 +854,7 @@ "type": "prometheus", "uid": "$ds" }, + "description": "Uptime shows whether the services are working and what their roles are. A VictoriaLogs instance can serve all roles; if it is not yet serving requests from the network, it will show as unknown.", "fieldConfig": { "defaults": { "color": { @@ -894,12 +897,13 @@ "decimals": 0, "links": [], "mappings": [], + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -915,7 +919,7 @@ "h": 6, "w": 15, "x": 9, - "y": 5 + "y": 9 }, "id": 73, "options": { @@ -935,7 +939,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -943,11 +947,63 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(min_over_time(up{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job)", + "expr": "sum(min_over_time(up{job=~\"$job\", instance=~\"$instance_storage_all\"}[$__rate_interval]))", "format": "time_series", "instant": false, - "legendFormat": "{{job}}", + "legendFormat": "storage", "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum (\n min_over_time(\n up{job=~\"$job\", instance=~\"$instance_select_all\"}[$__rate_interval]\n )\n unless on(instance)\n up{job=~\"$job\", instance=~\"$instance_insert_all\"}\n)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "select", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum (\n min_over_time(\n up{job=~\"$job\", instance=~\"$instance_insert_all\"}[$__rate_interval]\n )\n unless on(instance)\n up{job=~\"$job\", instance=~\"$instance_select_all\"}\n)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "insert", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(\n min_over_time(\n up{job=~\"$job\", instance=~\"$instance_select_all\"}[$__rate_interval]\n )\n and on(instance)\n up{job=~\"$job\", instance=~\"$instance_insert_all\"}\n)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "insert,select", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum by (instance) (\n min_over_time(up{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n)\nunless on (instance)\nsum by (instance) (\n min_over_time(up{job=~\"$job\", instance=~\"$instance_storage_all\"}[$__rate_interval])\n or\n min_over_time(up{job=~\"$job\", instance=~\"$instance_insert_all\"}[$__rate_interval])\n or\n min_over_time(up{job=~\"$job\", instance=~\"$instance_select_all\"}[$__rate_interval])\n)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "unknown", + "refId": "E" } ], "title": "Uptime ($job)", @@ -959,7 +1015,7 @@ "h": 1, "w": 24, "x": 0, - "y": 11 + "y": 15 }, "id": 18, "panels": [], @@ -971,7 +1027,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows ingestion rate in number of log entries and bytes per second.", + "description": "Ingestion rate in number of log entries and bytes per second.", "fieldConfig": { "defaults": { "color": { @@ -1018,7 +1074,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1051,7 +1107,7 @@ "h": 8, "w": 12, "x": 0, - "y": 12 + "y": 16 }, "id": 2, "options": { @@ -1072,7 +1128,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1102,7 +1158,7 @@ "refId": "B" } ], - "title": "Logs ingestion rate ", + "title": "Logs ingestion rate ($instance)", "type": "timeseries" }, { @@ -1157,7 +1213,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1173,7 +1229,7 @@ "h": 8, "w": 12, "x": 12, - "y": 12 + "y": 16 }, "id": 14, "options": { @@ -1195,7 +1251,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1212,7 +1268,7 @@ "refId": "A" } ], - "title": "Requests rate ", + "title": "Requests rate ($instance)", "type": "timeseries" }, { @@ -1267,7 +1323,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1283,7 +1339,7 @@ "h": 8, "w": 12, "x": 0, - "y": 20 + "y": 24 }, "id": 69, "options": { @@ -1302,7 +1358,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1336,7 +1392,7 @@ "refId": "B" } ], - "title": "Requests error rate", + "title": "Requests error rate ($instance)", "type": "timeseries" }, { @@ -1344,7 +1400,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "The less time it takes is better.\n", + "description": "99th percentile response time for VictoriaLogs HTTP endpoints, grouped by instance and path. This means 99% of requests are faster than this value. **Lower numbers are better**, as they indicate faster responses and fewer slow requests.", "fieldConfig": { "defaults": { "color": { @@ -1359,7 +1415,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1386,12 +1442,13 @@ "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1407,7 +1464,7 @@ "h": 8, "w": 12, "x": 12, - "y": 20 + "y": 24 }, "id": 66, "options": { @@ -1429,7 +1486,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1445,7 +1502,7 @@ "refId": "A" } ], - "title": "Query duration 0.99 quantile", + "title": "Request duration p99 ($instance)", "type": "timeseries" }, { @@ -1453,7 +1510,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the amount of on-disk space occupied by all the data stored in the storage.\n\nSee how to [control disk usage](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage).", + "description": "99th percentile response time for VictoriaLogs HTTP endpoints, grouped by instance and path. This means 99% of requests are faster than this value. **Lower numbers are better**, as they indicate faster responses and fewer slow requests.", "fieldConfig": { "defaults": { "color": { @@ -1500,7 +1557,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1516,7 +1573,7 @@ "h": 8, "w": 12, "x": 0, - "y": 28 + "y": 32 }, "id": 6, "options": { @@ -1538,7 +1595,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1546,16 +1603,16 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", + "expr": "max(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "disk usage", + "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "Disk space usage ", + "title": "Disk space usage ($instance)", "type": "timeseries" }, { @@ -1563,7 +1620,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the rate of logging the messages by their level. Unexpected spike in rate is a good reason to check logs.", + "description": "Rate of VictoriaLogs' own application log messages (debug, warnings, errors) - NOT the logs that VictoriaLogs is collecting from external sources.", "fieldConfig": { "defaults": { "color": { @@ -1608,7 +1665,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1623,7 +1680,7 @@ "h": 8, "w": 12, "x": 12, - "y": 28 + "y": 32 }, "id": 67, "options": { @@ -1645,7 +1702,7 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1653,142 +1710,6999 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (job, level, location) > 0", + "expr": "sum(rate(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (job, level, location)", "interval": "5m", "legendFormat": "{{job}} - {{level}}: {{location}}", "range": true, "refId": "A" } ], - "title": "Logging rate", + "title": "VictoriaLogs internal logging ($instance)", "type": "timeseries" }, { - "collapsed": true, + "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 36 + "y": 40 }, "id": 68, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "$ds" + "panels": [], + "title": "Troubleshooting", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. \n\nNormally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "description": "Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "stepAfter", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 0, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 543 - }, - "id": 62, - "options": { - "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "sortBy": "Last *", - "sortDesc": true + "insertNulls": false, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "desc" + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "pluginVersion": "11.5.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$ds" + "decimals": 0, + "links": [], + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 }, - "editorMode": "code", - "expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) > 0) by(job)", - "format": "time_series", - "instant": false, - "legendFormat": "{{job}}", - "refId": "A" - } + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 62, + "options": { + "legend": { + "calcs": [ + "lastNotNull" ], - "title": "Restarts", - "type": "timeseries" + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "description": "The number of the new [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) created over the last 24h.\n\nPrefer having as low churn rate as possible. \nSee [How to determine which fields must be associated with log streams?](https://docs.victoriametrics.com/victorialogs/keyconcepts/#how-to-determine-which-fields-must-be-associated-with-log-streams)\n", + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job)", + "format": "time_series", + "instant": false, + "legendFormat": "{{job}}", + "refId": "A" + } + ], + "title": "Restarts", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "The number of new [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) created over the last 24 hours. A lower rate is better. See [How to determine which fields must be associated with log streams?](https://docs.victoriametrics.com/victorialogs/keyconcepts/#how-to-determine-which-fields-must-be-associated-with-log-streams) for more details.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "increase(vl_streams_created_total{job=~\"$job\", instance=~\"$instance\"}[1d])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "new log streams over 24h", + "range": true, + "refId": "A" + } + ], + "title": "Streams churn rate 24h", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Flags explicitly set to non-default values", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "job" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 70, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(flag{is_set=\"true\", job=~\"$job\", instance=~\"$instance\"}) by(job, instance, name, value)", + "format": "table", + "instant": true, + "legendFormat": "{{name}}={{value}}", + "range": false, + "refId": "A" + } + ], + "title": "Non-default flags", + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Number of log entries ignored or dropped on insertion due to the following reasons:\n* Timestamp out of the retention period or in the future\n* Number of fields per entry exceeded\n* Line too long\n\nIf this occurs, check the VictoriaLogs log for details.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "fieldMinMax": false, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 71, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(vl_rows_dropped_total{job=~\"$job\", instance=~\"$instance\"}[1h])) by (reason)", + "hide": false, + "interval": "", + "legendFormat": "{{reason}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(vl_too_long_lines_skipped_total{job=~\"$job\", instance=~\"$instance\"}[1h]))", + "hide": false, + "interval": "", + "legendFormat": "line_too_long", + "range": true, + "refId": "B" + } + ], + "title": "Logs dropped for last 1h", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 57 + }, + "id": 28, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Percentage of physical RAM used compared to the memory limit. If this percentage is high, check the `RSS` anonymous vs resident ratio panel for more details.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 0.8 + }, + { + "color": "red", + "value": 0.9 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 58 + }, + "id": 134, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n)", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "RSS % of memory limit", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "How much memory (RAM) the VictoriaLogs process is actually using, compared to its allowed container or system limit. See 'Memory Usage' panel for a detailed breakdown.\n\n- Good: Below 70% most of the time, maybe spiking a bit under load.\n- Bad: Above 90% for more than 5 minutes = risk of out-of-memory (`OOM`) kill.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 17, + "x": 7, + "y": 58 + }, + "id": 136, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by (job)", + "interval": "", + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Physical memory % usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Portion of RAM that cannot be reclaimed without swapping. If both the `RSS`-to-limit percentage and this ratio are high, the process is at high risk of an `OOM` kill.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 63 + }, + "id": 135, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(process_resident_memory_anon_bytes{job=~\"$job\",instance=~\"$instance\"}\n/\nprocess_resident_memory_bytes{job=~\"$job\",instance=~\"$instance\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Anonymous / Resident ratio", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "- Anonymous resident memory: Private memory allocated to the application that **cannot** be reclaimed by the kernel. Refer to the [Check/profile](https://docs.victoriametrics.com/victorialogs/#profiling) Go heap section for troubleshooting.\n- File-backed resident memory: Memory mapped from files, which can be safely reclaimed. Increases during querying. Correlate with `I/O` panels for further analysis.\n- Shared resident memory: Typically negligible. Large spikes may indicate unexpected shared memory consumers.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 17, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 68 + }, + "id": 137, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "anonymous resident memory", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_file_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "file-backed resident memory", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_shared_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "shared resident memory", + "range": true, + "refId": "C" + } + ], + "title": "Memory usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 68 + }, + "id": 44, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}\n) by(job)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU % usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Lower is better, e.g. 20% means the process was delayed by memory pressure 20% of the time. See [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n- waiting: Time fraction where at least one thread was blocked on memory.\n- stalled: Time fraction where every thread was blocked on memory (severe pressure).\n\nIf queries slow down and both series spike, the host is likely limited by RAM or I/O throughput.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 0.2 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 76 + }, + "id": 138, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_pressure_memory_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}: waiting", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_pressure_memory_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}: stalled", + "range": true, + "refId": "B" + } + ], + "title": "Memory pressure", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Helps troubleshoot high CPU usage or throttling:\n\n- waiting: The percentage of time at least one task in the VictoriaLogs process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: The percentage of time all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there’s a CPU burst, it's normal to see waiting or stalled > 1%. It only becomes a concern if it consistently climbs above 5–10% and aligns with latency spikes or GC slowdowns.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 76 + }, + "id": 144, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_pressure_cpu_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job) > 0.005", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}: waiting", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_pressure_cpu_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job) > 0.005", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}: stalled", + "range": true, + "refId": "B" + } + ], + "title": "CPU pressure", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "The higher the better.\n\nThis computes the fraction of read bytes that came from the page cache (i.e., not from disk). It answers: \"Of all the bytes my process read via read(), how many were cache hits?\"\n\nThat's why you can see many read syscalls (read() calls), but the actual disk reads stay low; because the data was already in RAM.\n\n\n\n\n\n\n\n\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 0, + "y": 84 + }, + "id": 139, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "1 - (sum(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) / sum(rate(process_io_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])))", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Page-cache hit ratio", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Number of read/write calls application makes:\n\n- read call: Number of read*()-family system calls your process has issued since start. Each call can move 1 byte or megabytes, cached or uncached.\n- write call: Number of write*()-family system calls (including write, pwrite, writev, etc.) made by the process.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "read calls" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 4, + "y": 84 + }, + "id": 140, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_io_read_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "read calls", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_io_write_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "write calls", + "range": true, + "refId": "B" + } + ], + "title": "Read/Write syscalls ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Percentage of open file descriptors (files, sockets, pipes, etc.,) compared to the limit set in the OS. Reaching the limit of open files can cause various issues and must be prevented.\n\nSee [how to change limits](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a).", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "max" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 84 + }, + "id": 145, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(max_over_time(process_open_fds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n/\nprocess_max_fds{job=~\"$job\", instance=~\"$instance\"}) by (job)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Open FDs & usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Measure the actual bytes read from and written to disk by the process:\n\n- read: physical bytes the kernel actually pulled from the storage device on behalf of the process (after checking page-cache).\n- write: physical bytes the kernel ultimately wrote to the storage device for the process (after combining, caching, or delaying writes).", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "read" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 92 + }, + "id": 141, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "read", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_io_storage_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "write", + "range": true, + "refId": "C" + } + ], + "title": "Disk writes/reads ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html). The lower the better.\n\n- waiting: at least one runnable thread blocked on block-`I/O` (disk, NVMe, network-storage) while others could still make progress.\n- stalled: all non-idle threads simultaneously waiting on `I/O`; no useful user code ran during these periods → true `I/O` thrashing.\n\nIf stalled > 0 while querying, it's recommended to increase queue depth on NVMe, raise blk-mq budgets, or relax cgroup I/O limits.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 92 + }, + "id": 143, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_pressure_io_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job) > 0.005", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}} - waiting", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_pressure_io_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job) > 0.005", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}} - stalled", + "range": true, + "refId": "B" + } + ], + "title": "IO pressure", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Current number of active TCP connections to VictoriaLogs. This metric helps monitor connection pool usage and identify potential connection leaks. High values may indicate clients not properly closing connections or connection pooling issues. Monitor for gradual increases that could lead to resource exhaustion.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 100 + }, + "id": 146, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vm_tcplistener_conns{job=~\"$job\", instance=~\"$instance\"}) by (job)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "TCP connections ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 100 + }, + "id": 148, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by (job)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Goroutines ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Rate of incoming TCP connections accepted by VictoriaLogs. This metric indicates network activity and client connection patterns. Sudden spikes may indicate increased load or potential DDoS attacks. Sustained high rates should be correlated with resource usage to ensure adequate capacity.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 108 + }, + "id": 147, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(vm_tcplistener_accepts_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "TCP connections rate ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 108 + }, + "id": 149, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(process_num_threads{job=~\"$job\", instance=~\"$instance\"}) by (job)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Threads ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Percent of CPU spent on garbage collection.\n\nIf % is high, then CPU usage can be decreased by changing `GOGC` to higher values. Increasing `GOGC` value will increase memory usage, and decrease CPU usage.\n\nTry searching for keyword `GOGC` at https://docs.victoriametrics.com/victoriametrics/troubleshooting/ ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 116 + }, + "id": 74, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(\n rate(go_gc_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) \n / rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n ) by(job)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU spent on GC ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 0.1 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 116 + }, + "id": 61, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(histogram_quantile(0.99, sum(rate(go_sched_latencies_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (job, le))) by (job)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Go scheduling latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Rate of allocations in memory. Sudden increase in allocations would mean increased pressure on Go Garbage Collector and can saturate CPU resources of the application.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 124 + }, + "id": 75, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(go_memstats_alloc_bytes_total{job=~\"$job\"}[$__rate_interval])) by (job)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Memory allocations rate", + "type": "timeseries" + } + ], + "title": "Resource usage", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 58 + }, + "id": 89, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "CPU utilization per instance as a fraction of available cores. Sustained values above 80% indicate CPU saturation; correlate with CPU PSI and query latency.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 133 + }, + "id": 116, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "0": "m", + "1": "a", + "2": "x", + "3": "(", + "4": "\n", + "5": " ", + "6": " ", + "7": "r", + "8": "a", + "9": "t", + "10": "e", + "11": "(", + "12": "\n", + "13": " ", + "14": " ", + "15": " ", + "16": " ", + "17": "p", + "18": "r", + "19": "o", + "20": "c", + "21": "e", + "22": "s", + "23": "s", + "24": "_", + "25": "c", + "26": "p", + "27": "u", + "28": "_", + "29": "s", + "30": "e", + "31": "c", + "32": "o", + "33": "n", + "34": "d", + "35": "s", + "36": "_", + "37": "t", + "38": "o", + "39": "t", + "40": "a", + "41": "l", + "42": "{", + "43": "j", + "44": "o", + "45": "b", + "46": "=", + "47": "~", + "48": "\"", + "49": "$", + "50": "j", + "51": "o", + "52": "b", + "53": "\"", + "54": ",", + "55": "i", + "56": "n", + "57": "s", + "58": "t", + "59": "a", + "60": "n", + "61": "c", + "62": "e", + "63": "=", + "64": "~", + "65": "\"", + "66": "$", + "67": "i", + "68": "n", + "69": "s", + "70": "t", + "71": "a", + "72": "n", + "73": "c", + "74": "e", + "75": "_", + "76": "s", + "77": "t", + "78": "o", + "79": "r", + "80": "a", + "81": "g", + "82": "e", + "83": "\"", + "84": "}", + "85": "[", + "86": "t", + "87": "m", + "88": "p", + "89": "_", + "90": "v", + "91": "i", + "92": "c", + "93": "t", + "94": "o", + "95": "r", + "96": "i", + "97": "a", + "98": "m", + "99": "e", + "100": "t", + "101": "r", + "102": "i", + "103": "c", + "104": "s", + "105": "_", + "106": "p", + "107": "r", + "108": "e", + "109": "t", + "110": "t", + "111": "i", + "112": "f", + "113": "y", + "114": "_", + "115": "q", + "116": "u", + "117": "e", + "118": "r", + "119": "y", + "120": "_", + "121": "_", + "122": "r", + "123": "a", + "124": "t", + "125": "e", + "126": "_", + "127": "i", + "128": "n", + "129": "t", + "130": "e", + "131": "r", + "132": "v", + "133": "a", + "134": "l", + "135": "]", + "136": "\n", + "137": " ", + "138": " ", + "139": ")", + "140": "\n", + "141": " ", + "142": " ", + "143": " ", + "144": " ", + "145": "/", + "146": "\n", + "147": " ", + "148": " ", + "149": "p", + "150": "r", + "151": "o", + "152": "c", + "153": "e", + "154": "s", + "155": "s", + "156": "_", + "157": "c", + "158": "p", + "159": "u", + "160": "_", + "161": "c", + "162": "o", + "163": "r", + "164": "e", + "165": "s", + "166": "_", + "167": "a", + "168": "v", + "169": "a", + "170": "i", + "171": "l", + "172": "a", + "173": "b", + "174": "l", + "175": "e", + "176": "{", + "177": "j", + "178": "o", + "179": "b", + "180": "=", + "181": "~", + "182": "\"", + "183": "$", + "184": "j", + "185": "o", + "186": "b", + "187": "\"", + "188": ",", + "189": "i", + "190": "n", + "191": "s", + "192": "t", + "193": "a", + "194": "n", + "195": "c", + "196": "e", + "197": "=", + "198": "~", + "199": "\"", + "200": "$", + "201": "i", + "202": "n", + "203": "s", + "204": "t", + "205": "a", + "206": "n", + "207": "c", + "208": "e", + "209": "_", + "210": "s", + "211": "t", + "212": "o", + "213": "r", + "214": "a", + "215": "g", + "216": "e", + "217": "\"", + "218": "}", + "219": "\n", + "220": ")", + "editorMode": "code", + "expr": "max(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance_storage\"}\n)", + "legendFormat": "max", + "range": true, + "refId": "A" + }, + { + "0": "m", + "1": "a", + "2": "x", + "3": "(", + "4": "\n", + "5": " ", + "6": " ", + "7": "r", + "8": "a", + "9": "t", + "10": "e", + "11": "(", + "12": "\n", + "13": " ", + "14": " ", + "15": " ", + "16": " ", + "17": "p", + "18": "r", + "19": "o", + "20": "c", + "21": "e", + "22": "s", + "23": "s", + "24": "_", + "25": "c", + "26": "p", + "27": "u", + "28": "_", + "29": "s", + "30": "e", + "31": "c", + "32": "o", + "33": "n", + "34": "d", + "35": "s", + "36": "_", + "37": "t", + "38": "o", + "39": "t", + "40": "a", + "41": "l", + "42": "{", + "43": "j", + "44": "o", + "45": "b", + "46": "=", + "47": "~", + "48": "\"", + "49": "$", + "50": "j", + "51": "o", + "52": "b", + "53": "\"", + "54": ",", + "55": "i", + "56": "n", + "57": "s", + "58": "t", + "59": "a", + "60": "n", + "61": "c", + "62": "e", + "63": "=", + "64": "~", + "65": "\"", + "66": "$", + "67": "i", + "68": "n", + "69": "s", + "70": "t", + "71": "a", + "72": "n", + "73": "c", + "74": "e", + "75": "_", + "76": "s", + "77": "t", + "78": "o", + "79": "r", + "80": "a", + "81": "g", + "82": "e", + "83": "\"", + "84": "}", + "85": "[", + "86": "t", + "87": "m", + "88": "p", + "89": "_", + "90": "v", + "91": "i", + "92": "c", + "93": "t", + "94": "o", + "95": "r", + "96": "i", + "97": "a", + "98": "m", + "99": "e", + "100": "t", + "101": "r", + "102": "i", + "103": "c", + "104": "s", + "105": "_", + "106": "p", + "107": "r", + "108": "e", + "109": "t", + "110": "t", + "111": "i", + "112": "f", + "113": "y", + "114": "_", + "115": "q", + "116": "u", + "117": "e", + "118": "r", + "119": "y", + "120": "_", + "121": "_", + "122": "r", + "123": "a", + "124": "t", + "125": "e", + "126": "_", + "127": "i", + "128": "n", + "129": "t", + "130": "e", + "131": "r", + "132": "v", + "133": "a", + "134": "l", + "135": "]", + "136": "\n", + "137": " ", + "138": " ", + "139": ")", + "140": "\n", + "141": " ", + "142": " ", + "143": " ", + "144": " ", + "145": "/", + "146": "\n", + "147": " ", + "148": " ", + "149": "p", + "150": "r", + "151": "o", + "152": "c", + "153": "e", + "154": "s", + "155": "s", + "156": "_", + "157": "c", + "158": "p", + "159": "u", + "160": "_", + "161": "c", + "162": "o", + "163": "r", + "164": "e", + "165": "s", + "166": "_", + "167": "a", + "168": "v", + "169": "a", + "170": "i", + "171": "l", + "172": "a", + "173": "b", + "174": "l", + "175": "e", + "176": "{", + "177": "j", + "178": "o", + "179": "b", + "180": "=", + "181": "~", + "182": "\"", + "183": "$", + "184": "j", + "185": "o", + "186": "b", + "187": "\"", + "188": ",", + "189": "i", + "190": "n", + "191": "s", + "192": "t", + "193": "a", + "194": "n", + "195": "c", + "196": "e", + "197": "=", + "198": "~", + "199": "\"", + "200": "$", + "201": "i", + "202": "n", + "203": "s", + "204": "t", + "205": "a", + "206": "n", + "207": "c", + "208": "e", + "209": "_", + "210": "s", + "211": "t", + "212": "o", + "213": "r", + "214": "a", + "215": "g", + "216": "e", + "217": "\"", + "218": "}", + "219": "\n", + "220": ")", + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "min(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance_storage\"}\n)", + "hide": false, + "legendFormat": "min", + "range": true, + "refId": "B" + }, + { + "0": "m", + "1": "a", + "2": "x", + "3": "(", + "4": "\n", + "5": " ", + "6": " ", + "7": "r", + "8": "a", + "9": "t", + "10": "e", + "11": "(", + "12": "\n", + "13": " ", + "14": " ", + "15": " ", + "16": " ", + "17": "p", + "18": "r", + "19": "o", + "20": "c", + "21": "e", + "22": "s", + "23": "s", + "24": "_", + "25": "c", + "26": "p", + "27": "u", + "28": "_", + "29": "s", + "30": "e", + "31": "c", + "32": "o", + "33": "n", + "34": "d", + "35": "s", + "36": "_", + "37": "t", + "38": "o", + "39": "t", + "40": "a", + "41": "l", + "42": "{", + "43": "j", + "44": "o", + "45": "b", + "46": "=", + "47": "~", + "48": "\"", + "49": "$", + "50": "j", + "51": "o", + "52": "b", + "53": "\"", + "54": ",", + "55": "i", + "56": "n", + "57": "s", + "58": "t", + "59": "a", + "60": "n", + "61": "c", + "62": "e", + "63": "=", + "64": "~", + "65": "\"", + "66": "$", + "67": "i", + "68": "n", + "69": "s", + "70": "t", + "71": "a", + "72": "n", + "73": "c", + "74": "e", + "75": "_", + "76": "s", + "77": "t", + "78": "o", + "79": "r", + "80": "a", + "81": "g", + "82": "e", + "83": "\"", + "84": "}", + "85": "[", + "86": "t", + "87": "m", + "88": "p", + "89": "_", + "90": "v", + "91": "i", + "92": "c", + "93": "t", + "94": "o", + "95": "r", + "96": "i", + "97": "a", + "98": "m", + "99": "e", + "100": "t", + "101": "r", + "102": "i", + "103": "c", + "104": "s", + "105": "_", + "106": "p", + "107": "r", + "108": "e", + "109": "t", + "110": "t", + "111": "i", + "112": "f", + "113": "y", + "114": "_", + "115": "q", + "116": "u", + "117": "e", + "118": "r", + "119": "y", + "120": "_", + "121": "_", + "122": "r", + "123": "a", + "124": "t", + "125": "e", + "126": "_", + "127": "i", + "128": "n", + "129": "t", + "130": "e", + "131": "r", + "132": "v", + "133": "a", + "134": "l", + "135": "]", + "136": "\n", + "137": " ", + "138": " ", + "139": ")", + "140": "\n", + "141": " ", + "142": " ", + "143": " ", + "144": " ", + "145": "/", + "146": "\n", + "147": " ", + "148": " ", + "149": "p", + "150": "r", + "151": "o", + "152": "c", + "153": "e", + "154": "s", + "155": "s", + "156": "_", + "157": "c", + "158": "p", + "159": "u", + "160": "_", + "161": "c", + "162": "o", + "163": "r", + "164": "e", + "165": "s", + "166": "_", + "167": "a", + "168": "v", + "169": "a", + "170": "i", + "171": "l", + "172": "a", + "173": "b", + "174": "l", + "175": "e", + "176": "{", + "177": "j", + "178": "o", + "179": "b", + "180": "=", + "181": "~", + "182": "\"", + "183": "$", + "184": "j", + "185": "o", + "186": "b", + "187": "\"", + "188": ",", + "189": "i", + "190": "n", + "191": "s", + "192": "t", + "193": "a", + "194": "n", + "195": "c", + "196": "e", + "197": "=", + "198": "~", + "199": "\"", + "200": "$", + "201": "i", + "202": "n", + "203": "s", + "204": "t", + "205": "a", + "206": "n", + "207": "c", + "208": "e", + "209": "_", + "210": "s", + "211": "t", + "212": "o", + "213": "r", + "214": "a", + "215": "g", + "216": "e", + "217": "\"", + "218": "}", + "219": "\n", + "220": ")", + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "quantile(0.5,\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance_storage\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance_storage\", instance=~\"$instance\"}\n)", + "hide": false, + "legendFormat": "median", + "range": true, + "refId": "C" + } + ], + "title": "CPU % usage ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "How much memory (RAM) the VictoriaLogs process is actually using, compared to its allowed container or system limit. See 'Memory Usage' panel for a detailed breakdown.\n\n- Good: Below 70% most of the time, maybe spiking a bit under load.\n- Bad: Above 90% for more than 5 minutes = risk of out-of-memory (`OOM`) kill.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 133 + }, + "id": 117, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": true, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n max_over_time(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance_storage\"}\n)", + "interval": "", + "legendFormat": "max", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "min(\n max_over_time(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance_storage\"}\n)", + "hide": false, + "interval": "", + "legendFormat": "min", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "quantile(0.5,\n max_over_time(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance_storage\"}\n)", + "hide": false, + "interval": "", + "legendFormat": "median", + "range": true, + "refId": "C" + } + ], + "title": "Physical memory usage ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Disk space usage and limits for VictoriaLogs storage. Tracks current data usage against the configured retention limit and total disk space.\n\nThe orange line indicates the space retention limit. When usage approaches this limit, older data will be automatically deleted. If the space retention limit (`-retention.maxDiskSpaceUsageBytes`) is not specified, it won't show.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "max space retention of instances" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + }, + { + "id": "custom.insertNulls", + "value": 3600000 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 141 + }, + "id": 209, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_max_disk_space_usage_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) by (job)\n/\nmax(vl_total_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by (job) > 0", + "hide": false, + "interval": "", + "legendFormat": "max space retention of instances", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) by (job)\n/\nmax(vl_total_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by (job)", + "hide": false, + "interval": "", + "legendFormat": "{{job}}", + "range": true, + "refId": "B" + } + ], + "title": "Disk ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Rate of incoming query requests by endpoint path. This metric tracks the query load on VictoriaLogs, including different query interfaces and internal operations. \n\nHigher rates indicate increased query activity from users or applications. Monitor for sudden spikes that might indicate new dashboards, automated queries, or potential performance issues. Sustained high rates may require scaling query processing capacity.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 141 + }, + "id": 129, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])) by (path) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "range": true, + "refId": "A" + } + ], + "title": "HTTP request rate ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "99th percentile of HTTP request duration. This represents the time it takes for 99% of HTTP operations to complete. High values indicate slow ingestion/querying performance that could affect overall system throughput. Spikes may suggest storage bottlenecks, resource contention, or inefficient data processing.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 149 + }, + "id": 101, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance_storage\", quantile=\"0.99\"}) by (path) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "range": true, + "refId": "A" + } + ], + "title": "HTTP request duration p99 ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "99th percentile response time for VictoriaLogs HTTP endpoints, grouped by instance and path. This means 99% of requests are faster than this value. **Lower numbers are better**, as they indicate faster responses and fewer slow requests.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*(bytes)/" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 149 + }, + "id": 102, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_rows_ingested_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])) by (type) > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_bytes_ingested_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])) by (type) > 0", + "hide": false, + "instant": false, + "legendFormat": "{{type}} (bytes)", + "range": true, + "refId": "B" + } + ], + "title": "Logs ingestion rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Number of concurrent insert operations has reached the configured limit: `-maxConcurrentInserts` (default: 2x CPU cores)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 157 + }, + "id": 100, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(increase(vm_concurrent_insert_limit_reached_total{job=~\"$job\", instance=~\"$instance_storage\"})) by (job)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Concurrent insert limit reached ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Number of insert requests that timed out while waiting for available concurrency slots. This indicates sustained ingestion overload beyond configured limits.\n\nHigh values suggest:\n- Insert queue is consistently full\n- Insert requests waiting too long for execution slots\n- System under sustained heavy ingestion load\n- Need for horizontal scaling or ingestion optimization\n\nCombined with `Insert concurrency limit reached`, provides complete picture of ingestion rejection patterns.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 157 + }, + "id": 202, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(vm_concurrent_insert_limit_timeout_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])) by (job)", + "interval": "", + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Insert timeouts ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Number of active message processors handling log ingestion. These processors parse and process incoming log messages before storage. The count typically correlates with the ingestion load.\n\nIf this number is unusually high or inflated (which is rare), check memory usage. It may indicate a heavy, concurrently ingestion load or processing bottlenecks that could benefit from performance tuning.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 165 + }, + "id": 112, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vl_insert_processors_count{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval]) by (job)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vm_concurrent_insert_capacity{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "soft limit", + "range": true, + "refId": "B" + } + ], + "title": "Message processors ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Number of log rows waiting to be written to storage, categorized by type. Pending rows indicate temporary queuing during ingestion. Consistently high values may suggest storage write bottlenecks or insufficient write capacity.\n\nPending rows are flushed in two ways:\n\n- After a specific time period (typically 1 second)\n- When the pending row size exceeds a threshold (typically 1.75 MB)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 165 + }, + "id": 98, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(vl_pending_rows{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval]) by (type)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Pending rows ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "99th percentile duration of background flush operations by type (max across instances). High values may indicate disk pressure or heavy ingestion. Correlate with `I/O` panels.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 173 + }, + "id": 118, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": false + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vl_insert_flush_duration_seconds{job=~\"$job\", instance=~\"$instance_storage\", quantile=\"0.99\"}) by (type) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Flush duration p99 ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "The number of new [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) created over the last 24 hours. Lower is better. See [How to determine which fields must be associated with log streams?](https://docs.victoriametrics.com/victorialogs/keyconcepts/#how-to-determine-which-fields-must-be-associated-with-log-streams)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 173 + }, + "id": 103, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(increase(vl_streams_created_total{job=~\"$job\", instance=~\"$instance_storage\"}[1d])) by (job)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Streams churn rate 24h ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Rate of incoming query requests by endpoint path. This metric tracks the query load on VictoriaLogs, including different query interfaces and internal operations. \n\nHigher rates indicate increased query activity from users or applications. Monitor for sudden spikes that might indicate new dashboards, automated queries, or potential performance issues. Sustained high rates may require scaling query processing capacity.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 181 + }, + "id": 104, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance_storage\",path=~\"^/(internal/)?select.*\"}[$__rate_interval])) by (path) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "range": true, + "refId": "A" + } + ], + "title": "Query requests rate ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "The number of concurrent select operations has reached the configured limit: `-search.maxConcurrentRequests`. To check the default capacity, refer to the `-vl_concurrent_select_capacity` metric.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 181 + }, + "id": 205, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(increase(vl_concurrent_select_limit_reached_total{job=~\"$job\", instance=~\"$instance_storage\"})) by (job)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Concurrent query limit reached ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Number of queries that timed out while waiting for available concurrency slots. This indicates sustained query overload beyond configured limits.\n\nHigh values suggest:\n- Query queue is consistently full\n- Queries waiting too long for execution slots\n- System under sustained heavy load\n- Need for horizontal scaling or query optimization\n\nCombined with `Select concurrency limit reached`, provides complete picture of query rejection patterns.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 189 + }, + "id": 200, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_concurrent_select_limit_timeout_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])) by (job) > 0", + "interval": "", + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Query timeouts ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Total number of time-based (daily) partitions in storage. The number typically grows over time as new data arrives and is partitioned by time periods. \n\nExcessive partition counts may indicate retention policy issues or very high data ingestion rates that could impact query performance.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 0, + "y": 196 + }, + "id": 90, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(vl_partitions{job=~\"$job\", instance=~\"$instance_storage\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Partition Count ($instance_storage)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Number of storage parts (data files) in each tier. More parts mean fragmentation; fewer parts suggest successful merging. High part counts may slow queries and trigger background merge operations.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 3, + "y": 196 + }, + "id": 91, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_storage_parts{job=~\"$job\", instance=~\"$instance_storage\"}) by(type)", + "hide": false, + "interval": "", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Part count max by type ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Number of storage merge operations by type (sum across instances). Merges compact smaller parts into larger ones; bursts are normal after activity spikes.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 196 + }, + "id": 93, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(vl_merges_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])) by(type)", + "hide": false, + "interval": "$__rate_interval", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Merge events ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "99th percentile duration of merge operations by storage type. Merge operations combine smaller storage parts into larger ones for optimization. \n\nNormal merge durations vary by storage type and data volume. Consistently high durations may indicate storage performance issues, high write load, or insufficient resources for background operations. Monitor for trends that could impact overall system performance.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 204 + }, + "id": 94, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_merge_duration_seconds{job=~\"$job\", instance=~\"$instance_storage\", quantile=\"0.99\"}) by (type) > 0", + "hide": false, + "interval": "", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Merge duration p99 ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "99th percentile of data volume processed during merge operations by storage type. \n\nThis metric indicates the scale of background storage optimization activities. Larger merge sizes generally improve storage efficiency but require more resources. Consistently high values may indicate heavy write loads or large storage parts that need optimization. Monitor correlation with merge duration for performance insights.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 204 + }, + "id": 95, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_merge_bytes{job=~\"$job\", instance=~\"$instance_storage\", quantile=\"0.99\"}) by (type) > 0", + "hide": false, + "interval": "", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Merge bytes p99 ($instance_storage)", + "type": "timeseries" + } + ], + "title": "vlstorage", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 59 + }, + "id": 109, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "CPU utilization per instance as a fraction of available cores. Sustained values above 80% indicate CPU saturation; correlate with CPU PSI and query latency.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 134 + }, + "id": 206, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "0": "m", + "1": "a", + "2": "x", + "3": "(", + "4": "\n", + "5": " ", + "6": " ", + "7": "r", + "8": "a", + "9": "t", + "10": "e", + "11": "(", + "12": "\n", + "13": " ", + "14": " ", + "15": " ", + "16": " ", + "17": "p", + "18": "r", + "19": "o", + "20": "c", + "21": "e", + "22": "s", + "23": "s", + "24": "_", + "25": "c", + "26": "p", + "27": "u", + "28": "_", + "29": "s", + "30": "e", + "31": "c", + "32": "o", + "33": "n", + "34": "d", + "35": "s", + "36": "_", + "37": "t", + "38": "o", + "39": "t", + "40": "a", + "41": "l", + "42": "{", + "43": "j", + "44": "o", + "45": "b", + "46": "=", + "47": "~", + "48": "\"", + "49": "$", + "50": "j", + "51": "o", + "52": "b", + "53": "\"", + "54": ",", + "55": "i", + "56": "n", + "57": "s", + "58": "t", + "59": "a", + "60": "n", + "61": "c", + "62": "e", + "63": "=", + "64": "~", + "65": "\"", + "66": "$", + "67": "i", + "68": "n", + "69": "s", + "70": "t", + "71": "a", + "72": "n", + "73": "c", + "74": "e", + "75": "_", + "76": "s", + "77": "t", + "78": "o", + "79": "r", + "80": "a", + "81": "g", + "82": "e", + "83": "\"", + "84": "}", + "85": "[", + "86": "t", + "87": "m", + "88": "p", + "89": "_", + "90": "v", + "91": "i", + "92": "c", + "93": "t", + "94": "o", + "95": "r", + "96": "i", + "97": "a", + "98": "m", + "99": "e", + "100": "t", + "101": "r", + "102": "i", + "103": "c", + "104": "s", + "105": "_", + "106": "p", + "107": "r", + "108": "e", + "109": "t", + "110": "t", + "111": "i", + "112": "f", + "113": "y", + "114": "_", + "115": "q", + "116": "u", + "117": "e", + "118": "r", + "119": "y", + "120": "_", + "121": "_", + "122": "r", + "123": "a", + "124": "t", + "125": "e", + "126": "_", + "127": "i", + "128": "n", + "129": "t", + "130": "e", + "131": "r", + "132": "v", + "133": "a", + "134": "l", + "135": "]", + "136": "\n", + "137": " ", + "138": " ", + "139": ")", + "140": "\n", + "141": " ", + "142": " ", + "143": " ", + "144": " ", + "145": "/", + "146": "\n", + "147": " ", + "148": " ", + "149": "p", + "150": "r", + "151": "o", + "152": "c", + "153": "e", + "154": "s", + "155": "s", + "156": "_", + "157": "c", + "158": "p", + "159": "u", + "160": "_", + "161": "c", + "162": "o", + "163": "r", + "164": "e", + "165": "s", + "166": "_", + "167": "a", + "168": "v", + "169": "a", + "170": "i", + "171": "l", + "172": "a", + "173": "b", + "174": "l", + "175": "e", + "176": "{", + "177": "j", + "178": "o", + "179": "b", + "180": "=", + "181": "~", + "182": "\"", + "183": "$", + "184": "j", + "185": "o", + "186": "b", + "187": "\"", + "188": ",", + "189": "i", + "190": "n", + "191": "s", + "192": "t", + "193": "a", + "194": "n", + "195": "c", + "196": "e", + "197": "=", + "198": "~", + "199": "\"", + "200": "$", + "201": "i", + "202": "n", + "203": "s", + "204": "t", + "205": "a", + "206": "n", + "207": "c", + "208": "e", + "209": "_", + "210": "s", + "211": "t", + "212": "o", + "213": "r", + "214": "a", + "215": "g", + "216": "e", + "217": "\"", + "218": "}", + "219": "\n", + "220": ")", + "editorMode": "code", + "expr": "max(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance_insert\"}\n)", + "legendFormat": "max", + "range": true, + "refId": "A" + }, + { + "0": "m", + "1": "a", + "2": "x", + "3": "(", + "4": "\n", + "5": " ", + "6": " ", + "7": "r", + "8": "a", + "9": "t", + "10": "e", + "11": "(", + "12": "\n", + "13": " ", + "14": " ", + "15": " ", + "16": " ", + "17": "p", + "18": "r", + "19": "o", + "20": "c", + "21": "e", + "22": "s", + "23": "s", + "24": "_", + "25": "c", + "26": "p", + "27": "u", + "28": "_", + "29": "s", + "30": "e", + "31": "c", + "32": "o", + "33": "n", + "34": "d", + "35": "s", + "36": "_", + "37": "t", + "38": "o", + "39": "t", + "40": "a", + "41": "l", + "42": "{", + "43": "j", + "44": "o", + "45": "b", + "46": "=", + "47": "~", + "48": "\"", + "49": "$", + "50": "j", + "51": "o", + "52": "b", + "53": "\"", + "54": ",", + "55": "i", + "56": "n", + "57": "s", + "58": "t", + "59": "a", + "60": "n", + "61": "c", + "62": "e", + "63": "=", + "64": "~", + "65": "\"", + "66": "$", + "67": "i", + "68": "n", + "69": "s", + "70": "t", + "71": "a", + "72": "n", + "73": "c", + "74": "e", + "75": "_", + "76": "s", + "77": "t", + "78": "o", + "79": "r", + "80": "a", + "81": "g", + "82": "e", + "83": "\"", + "84": "}", + "85": "[", + "86": "t", + "87": "m", + "88": "p", + "89": "_", + "90": "v", + "91": "i", + "92": "c", + "93": "t", + "94": "o", + "95": "r", + "96": "i", + "97": "a", + "98": "m", + "99": "e", + "100": "t", + "101": "r", + "102": "i", + "103": "c", + "104": "s", + "105": "_", + "106": "p", + "107": "r", + "108": "e", + "109": "t", + "110": "t", + "111": "i", + "112": "f", + "113": "y", + "114": "_", + "115": "q", + "116": "u", + "117": "e", + "118": "r", + "119": "y", + "120": "_", + "121": "_", + "122": "r", + "123": "a", + "124": "t", + "125": "e", + "126": "_", + "127": "i", + "128": "n", + "129": "t", + "130": "e", + "131": "r", + "132": "v", + "133": "a", + "134": "l", + "135": "]", + "136": "\n", + "137": " ", + "138": " ", + "139": ")", + "140": "\n", + "141": " ", + "142": " ", + "143": " ", + "144": " ", + "145": "/", + "146": "\n", + "147": " ", + "148": " ", + "149": "p", + "150": "r", + "151": "o", + "152": "c", + "153": "e", + "154": "s", + "155": "s", + "156": "_", + "157": "c", + "158": "p", + "159": "u", + "160": "_", + "161": "c", + "162": "o", + "163": "r", + "164": "e", + "165": "s", + "166": "_", + "167": "a", + "168": "v", + "169": "a", + "170": "i", + "171": "l", + "172": "a", + "173": "b", + "174": "l", + "175": "e", + "176": "{", + "177": "j", + "178": "o", + "179": "b", + "180": "=", + "181": "~", + "182": "\"", + "183": "$", + "184": "j", + "185": "o", + "186": "b", + "187": "\"", + "188": ",", + "189": "i", + "190": "n", + "191": "s", + "192": "t", + "193": "a", + "194": "n", + "195": "c", + "196": "e", + "197": "=", + "198": "~", + "199": "\"", + "200": "$", + "201": "i", + "202": "n", + "203": "s", + "204": "t", + "205": "a", + "206": "n", + "207": "c", + "208": "e", + "209": "_", + "210": "s", + "211": "t", + "212": "o", + "213": "r", + "214": "a", + "215": "g", + "216": "e", + "217": "\"", + "218": "}", + "219": "\n", + "220": ")", + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "min(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance_insert\"}\n)", + "hide": false, + "legendFormat": "min", + "range": true, + "refId": "B" + }, + { + "0": "m", + "1": "a", + "2": "x", + "3": "(", + "4": "\n", + "5": " ", + "6": " ", + "7": "r", + "8": "a", + "9": "t", + "10": "e", + "11": "(", + "12": "\n", + "13": " ", + "14": " ", + "15": " ", + "16": " ", + "17": "p", + "18": "r", + "19": "o", + "20": "c", + "21": "e", + "22": "s", + "23": "s", + "24": "_", + "25": "c", + "26": "p", + "27": "u", + "28": "_", + "29": "s", + "30": "e", + "31": "c", + "32": "o", + "33": "n", + "34": "d", + "35": "s", + "36": "_", + "37": "t", + "38": "o", + "39": "t", + "40": "a", + "41": "l", + "42": "{", + "43": "j", + "44": "o", + "45": "b", + "46": "=", + "47": "~", + "48": "\"", + "49": "$", + "50": "j", + "51": "o", + "52": "b", + "53": "\"", + "54": ",", + "55": "i", + "56": "n", + "57": "s", + "58": "t", + "59": "a", + "60": "n", + "61": "c", + "62": "e", + "63": "=", + "64": "~", + "65": "\"", + "66": "$", + "67": "i", + "68": "n", + "69": "s", + "70": "t", + "71": "a", + "72": "n", + "73": "c", + "74": "e", + "75": "_", + "76": "s", + "77": "t", + "78": "o", + "79": "r", + "80": "a", + "81": "g", + "82": "e", + "83": "\"", + "84": "}", + "85": "[", + "86": "t", + "87": "m", + "88": "p", + "89": "_", + "90": "v", + "91": "i", + "92": "c", + "93": "t", + "94": "o", + "95": "r", + "96": "i", + "97": "a", + "98": "m", + "99": "e", + "100": "t", + "101": "r", + "102": "i", + "103": "c", + "104": "s", + "105": "_", + "106": "p", + "107": "r", + "108": "e", + "109": "t", + "110": "t", + "111": "i", + "112": "f", + "113": "y", + "114": "_", + "115": "q", + "116": "u", + "117": "e", + "118": "r", + "119": "y", + "120": "_", + "121": "_", + "122": "r", + "123": "a", + "124": "t", + "125": "e", + "126": "_", + "127": "i", + "128": "n", + "129": "t", + "130": "e", + "131": "r", + "132": "v", + "133": "a", + "134": "l", + "135": "]", + "136": "\n", + "137": " ", + "138": " ", + "139": ")", + "140": "\n", + "141": " ", + "142": " ", + "143": " ", + "144": " ", + "145": "/", + "146": "\n", + "147": " ", + "148": " ", + "149": "p", + "150": "r", + "151": "o", + "152": "c", + "153": "e", + "154": "s", + "155": "s", + "156": "_", + "157": "c", + "158": "p", + "159": "u", + "160": "_", + "161": "c", + "162": "o", + "163": "r", + "164": "e", + "165": "s", + "166": "_", + "167": "a", + "168": "v", + "169": "a", + "170": "i", + "171": "l", + "172": "a", + "173": "b", + "174": "l", + "175": "e", + "176": "{", + "177": "j", + "178": "o", + "179": "b", + "180": "=", + "181": "~", + "182": "\"", + "183": "$", + "184": "j", + "185": "o", + "186": "b", + "187": "\"", + "188": ",", + "189": "i", + "190": "n", + "191": "s", + "192": "t", + "193": "a", + "194": "n", + "195": "c", + "196": "e", + "197": "=", + "198": "~", + "199": "\"", + "200": "$", + "201": "i", + "202": "n", + "203": "s", + "204": "t", + "205": "a", + "206": "n", + "207": "c", + "208": "e", + "209": "_", + "210": "s", + "211": "t", + "212": "o", + "213": "r", + "214": "a", + "215": "g", + "216": "e", + "217": "\"", + "218": "}", + "219": "\n", + "220": ")", + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "quantile(0.5,\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance_insert\"}\n)", + "hide": false, + "legendFormat": "median", + "range": true, + "refId": "C" + } + ], + "title": "CPU % usage ($instance_insert)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "99th percentile of insert operation duration. This represents the time it takes for 99% of insert operations to complete. High values indicate slow ingestion performance that could affect overall system throughput. Spikes may suggest storage bottlenecks, resource contention, or inefficient data processing.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 134 + }, + "id": 127, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance_insert_all\", instance=~\"$instance\", quantile=\"0.99\"}) by (path) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "range": true, + "refId": "A" + } + ], + "title": "HTTP request duration p99 ($instance_insert)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Rate of incoming query requests by endpoint path. This metric tracks the query load on VictoriaLogs, including different query interfaces and internal operations. \n\nHigher rates indicate increased query activity from users or applications. Monitor for sudden spikes that might indicate new dashboards, automated queries, or potential performance issues. Sustained high rates may require scaling query processing capacity.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 142 + }, + "id": 130, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])) by (path) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "range": true, + "refId": "A" + } + ], + "title": "HTTP request rate ($instance_insert)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "99th percentile response time for VictoriaLogs HTTP endpoints, grouped by instance and path. This means 99% of requests are faster than this value. **Lower numbers are better**, as they indicate faster responses and fewer slow requests.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*(bytes)/" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 142 + }, + "id": 113, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_rows_ingested_total{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])) by (type) > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_bytes_ingested_total{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])) by (type) > 0", + "hide": false, + "instant": false, + "legendFormat": "{{type}} (bytes)", + "range": true, + "refId": "B" + } + ], + "title": "Logs ingestion rate ($instance_insert)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Number of concurrent insert operations has reached the configured limit: `-maxConcurrentInserts` (default: 2x CPU cores)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 150 + }, + "id": 207, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(increase(vm_concurrent_insert_limit_reached_total{job=~\"$job\", instance=~\"$instance_insert\"})) by (job)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Concurrent insert limit reached ($instance_insert)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Number of insert requests that timed out while waiting for available concurrency slots. This indicates sustained ingestion overload beyond configured limits.\n\nHigh values suggest:\n- Insert queue is consistently full\n- Insert requests waiting too long for execution slots\n- System under sustained heavy ingestion load\n- Need for horizontal scaling or ingestion optimization\n\nCombined with `Insert concurrency limit reached`, provides complete picture of ingestion rejection patterns.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 150 + }, + "id": 203, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vm_concurrent_insert_limit_timeout_total{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])) by (job) > 0", + "interval": "", + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Insert timeouts ($instance_insert)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Number of active message processors handling log ingestion. These processors parse and process incoming log messages before storage. The count typically correlates with the ingestion load.\n\nIf this number is unusually high or inflated (which is rare), check memory usage. It may indicate a heavy ingestion load or processing bottlenecks that could benefit from performance tuning.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -1799,8 +8713,8 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", + "fillOpacity": 10, + "gradientMode": "opacity", "hideFrom": { "legend": false, "tooltip": false, @@ -1823,6 +8737,7 @@ "mode": "off" } }, + "decimals": 0, "links": [], "mappings": [], "min": 0, @@ -1830,7 +8745,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 }, { "color": "red", @@ -1845,10 +8761,10 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 543 + "x": 0, + "y": 158 }, - "id": 26, + "id": 114, "options": { "legend": { "calcs": [ @@ -1868,7 +8784,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1876,16 +8792,29 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "increase(vl_streams_created_total{job=~\"$job\", instance=~\"$instance\"}[1d])", + "expr": "max(vl_insert_processors_count{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval]) by (job)", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "new log streams over 24h", + "legendFormat": "{{job}}", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vm_concurrent_insert_capacity{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "soft limit", + "range": true, + "refId": "B" } ], - "title": "Log stream churn rate", + "title": "Message processors ($instance_insert)", "type": "timeseries" }, { @@ -1893,92 +8822,92 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Flags explicitly set to non-default values", + "description": "99th percentile duration of flush operations in vlinsert by type (max across instances). Spikes imply slow disks or backpressure from remote storage.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" }, "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "inspect": false + "thresholdsStyle": { + "mode": "off" + } }, + "links": [], "mappings": [], + "min": 0, "thresholds": { "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 }, { "color": "red", "value": 80 } ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Time" - }, - "properties": [ - { - "id": "custom.hidden", - "value": true - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value" - }, - "properties": [ - { - "id": "custom.hidden", - "value": true - } - ] }, - { - "matcher": { - "id": "byName", - "options": "job" - }, - "properties": [ - { - "id": "custom.hidden", - "value": true - } - ] - } - ] + "unit": "s" + }, + "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 607 + "x": 12, + "y": 158 }, - "id": 70, + "id": 119, "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" ], - "show": false + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, - "showHeader": true + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1986,28 +8915,27 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": false, - "expr": "sum(flag{is_set=\"true\", job=~\"$job\", instance=~\"$instance\"}) by(job, instance, name, value)", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, + "expr": "sum(vl_insert_flush_duration_seconds{job=~\"$job\", instance=~\"$instance_insert\", quantile=\"0.99\"}) by (type) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "range": true, "refId": "A" } ], - "title": "Non-default flags", - "type": "table" + "title": "Flush duration p99 ($instance_insert)", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "$ds" }, - "description": "Shows how many log entries were ignored or dropped on insertion due to various reasons:\n* timestamp out of retention period or timestamp in future;\n* number of fields per entry exceeded.", + "description": "Number of active log streams tracked by vlinsert per instance. Steady growth indicates increasing stream cardinality; review stream field design.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -2042,12 +8970,15 @@ "mode": "off" } }, + "links": [], "mappings": [], + "min": 0, "thresholds": { "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 }, { "color": "red", @@ -2062,10 +8993,10 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 607 + "x": 0, + "y": 166 }, - "id": 71, + "id": 120, "options": { "legend": { "calcs": [ @@ -2082,10 +9013,10 @@ "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "none" + "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2093,42 +9024,28 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(vl_rows_dropped_total{job=~\"$job\", instance=~\"$instance\"}[1h])) by (job, reason)", - "hide": false, - "interval": "", - "legendFormat": "{{job}} - {{reason}}", + "expr": "sum(vl_insert_active_streams{job=~\"$job\", instance=~\"$instance_insert\"}) by (job)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "Logs dropped for last 1h", + "title": "Tracked streams ($instance_insert)", "type": "timeseries" - } - ], - "title": "Troubleshooting", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 37 - }, - "id": 28, - "panels": [ + }, { "datasource": { "type": "prometheus", "uid": "$ds" }, - "description": "Percentage of used memory (resident).\nThe application's performance will significantly degrade when memory usage is close to 100%.", + "description": "Number of remote write send errors over the last 24 hours per vlinsert instance. Non‑zero values typically indicate connectivity issues, timeouts, or 4xx/5xx responses.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "fixedColor": "red", + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -2171,7 +9088,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2179,17 +9096,17 @@ } ] }, - "unit": "percentunit" + "unit": "short" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 150 + "x": 12, + "y": 166 }, - "id": 38, + "id": 121, "options": { "legend": { "calcs": [ @@ -2209,7 +9126,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2217,26 +9134,41 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": false, - "expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by(job)", - "interval": "", - "legendFormat": "__auto", + "expr": "sum(increase(vl_insert_remote_send_errors_total{job=~\"$job\", instance=~\"$instance_insert\"}[24h])) by (job)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "RSS memory % usage ($instance)", + "title": "Send errors over 24h ($instance_insert)", "type": "timeseries" - }, + } + ], + "title": "vlinsert", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 60 + }, + "id": 122, + "panels": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, + "description": "CPU utilization per vlselect instance as a fraction of available cores. Sustained values above 80% suggest CPU bottlenecks; correlate with query duration.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -2279,7 +9211,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2287,17 +9219,33 @@ } ] }, - "unit": "percentunit" + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2495C", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 150 + "x": 0, + "y": 135 }, - "id": 44, + "id": 99, "options": { "legend": { "calcs": [ @@ -2314,10 +9262,10 @@ "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "none" + "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2326,16 +9274,31 @@ }, "editorMode": "code", "exemplar": false, - "expr": "max(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}\n) by(job)", + "expr": "max(process_cpu_cores_available{job=~\"$job\", instance=~\"$instance_select\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Limit", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance_select\"}[$__rate_interval])) by (job)", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "__auto", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "CPU % usage ($instance)", + "title": "CPU % usage ($instance_select)", "type": "timeseries" }, { @@ -2343,11 +9306,11 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Share for memory allocated by the process itself. When memory usage reaches 100% it will be likely OOM-killed.\nSafe memory usage % considered to be below 80%", + "description": "How much memory (RAM) the VictoriaLogs process is actually using, compared to its allowed container or system limit. See 'Memory Usage' panel for a detailed breakdown.\n\n- Good: Below 70% most of the time, maybe spiking a bit under load.\n- Bad: Above 90% for more than 5 minutes = risk of out-of-memory (`OOM`) kill.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -2372,7 +9335,7 @@ "scaleDistribution": { "type": "linear" }, - "showPoints": "never", + "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", @@ -2390,7 +9353,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2398,17 +9361,33 @@ } ] }, - "unit": "percentunit" + "unit": "bytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 158 + "x": 12, + "y": 135 }, - "id": 42, + "id": 97, "options": { "legend": { "calcs": [ @@ -2423,12 +9402,12 @@ "sortDesc": true }, "tooltip": { - "hideZeros": false, + "hideZeros": true, "mode": "multi", "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2437,14 +9416,28 @@ }, "editorMode": "code", "exemplar": false, - "expr": "max(\n max_over_time(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by(job)", + "expr": "sum(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance_select\"}[$__rate_interval]) by (job)", "interval": "", - "legendFormat": "__auto", + "legendFormat": "{{job}}", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance_select\"})", + "hide": false, + "interval": "", + "legendFormat": "limit", + "range": true, + "refId": "B" } ], - "title": "RSS anonymous memory % usage ($instance)", + "title": "Physical memory usage ($instance_select)", "type": "timeseries" }, { @@ -2452,7 +9445,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.", + "description": "Rate of incoming query requests by endpoint path. This metric tracks the query load on VictoriaLogs, including different query interfaces and internal operations. \n\nHigher rates indicate increased query activity from users or applications. Monitor for sudden spikes that might indicate new dashboards, automated queries, or potential performance issues. Sustained high rates may require scaling query processing capacity.", "fieldConfig": { "defaults": { "color": { @@ -2488,10 +9481,9 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, "links": [], "mappings": [], "min": 0, @@ -2500,21 +9492,25 @@ "steps": [ { "color": "green", - "value": null + "value": 0 + }, + { + "color": "red", + "value": 80 } ] }, - "unit": "s" + "unit": "short" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 158 + "x": 0, + "y": 143 }, - "id": 76, + "id": 125, "options": { "legend": { "calcs": [ @@ -2534,7 +9530,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2542,31 +9538,16 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "max(rate(process_pressure_cpu_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance_select\"}[$__rate_interval])) by (path) > 0", "format": "time_series", "interval": "", - "intervalFactor": 2, - "legendFormat": "{{job}} - waiting", + "intervalFactor": 1, + "legendFormat": "{{path}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "max(rate(process_pressure_cpu_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{job}} - stalled", - "range": true, - "refId": "B" } ], - "title": "CPU pressure", + "title": "HTTP request rate ($instance_select)", "type": "timeseries" }, { @@ -2574,7 +9555,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.", + "description": "99th percentile of query execution duration. This represents the time it takes for 99% of queries to complete:\n\n- High values indicate slow query performance that affects user experience. \n- Spikes may suggest complex queries, resource contention, or inefficient indexes. Monitor for trends that could indicate degrading performance.", "fieldConfig": { "defaults": { "color": { @@ -2589,7 +9570,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2610,10 +9591,9 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, "links": [], "mappings": [], "min": 0, @@ -2622,7 +9602,11 @@ "steps": [ { "color": "green", - "value": null + "value": 0 + }, + { + "color": "red", + "value": 80 } ] }, @@ -2633,10 +9617,10 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 166 + "x": 12, + "y": 143 }, - "id": 77, + "id": 126, "options": { "legend": { "calcs": [ @@ -2656,7 +9640,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2664,31 +9648,15 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "max(rate(process_pressure_memory_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", + "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance_select\", quantile=\"0.99\"}) by (path) > 0", "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{job}} - waiting", + "intervalFactor": 1, + "legendFormat": "{{path}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "max(rate(process_pressure_memory_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{job}} - stalled", - "range": true, - "refId": "B" } ], - "title": "Memory pressure", + "title": "HTTP request duration p99 ($instance_select)", "type": "timeseries" }, { @@ -2696,7 +9664,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the number of bytes read/write from the storage layer.", + "description": "The number of concurrent select operations has reached the configured limit: `-search.maxConcurrentRequests`. To check the default capacity, refer to the `-vl_concurrent_select_capacity` metric.", "fieldConfig": { "defaults": { "color": { @@ -2737,12 +9705,13 @@ }, "links": [], "mappings": [], + "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2750,18 +9719,21 @@ } ] }, - "unit": "bytes" + "unit": "short" }, "overrides": [ { "matcher": { - "id": "byRegexp", - "options": "/read.*/" + "id": "byName", + "options": "limit" }, "properties": [ { - "id": "custom.transform", - "value": "negative-Y" + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } } ] } @@ -2770,10 +9742,10 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 166 + "x": 0, + "y": 151 }, - "id": 52, + "id": 208, "options": { "legend": { "calcs": [ @@ -2790,10 +9762,10 @@ "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "none" + "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2801,32 +9773,16 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) > 0", + "expr": "max(increase(vl_concurrent_select_limit_reached_total{job=~\"$job\", instance=~\"$instance_select\"})) by (job)", "format": "time_series", - "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "read {{job}}", + "legendFormat": "{{job}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "sum(rate(process_io_storage_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) > 0", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "write {{job}}", - "range": true, - "refId": "B" } ], - "title": "Disk writes/reads ($instance)", + "title": "Concurrent query limit reached ($instance_select)", "type": "timeseries" }, { @@ -2834,7 +9790,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the percentage of open file descriptors compared to the limit set in the OS.\nReaching the limit of open files can cause various issues and must be prevented.\n\nSee how to change limits here https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a", + "description": "Number of queries that timed out while waiting for available concurrency slots. This indicates sustained query overload beyond configured limits.\n\nHigh values suggest:\n- Query queue is consistently full\n- Queries waiting too long for execution slots\n- System under sustained heavy load\n- Need for horizontal scaling or query optimization\n\nCombined with `Select concurrency limit reached`, provides complete picture of query rejection patterns.", "fieldConfig": { "defaults": { "color": { @@ -2849,11 +9805,12 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, + "vis": false, "viz": false }, "insertNulls": false, @@ -2873,70 +9830,45 @@ "mode": "off" } }, - "decimals": 2, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", - "value": 80 + "value": 1 } ] }, - "unit": "percentunit" + "unit": "reqps" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "max" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C4162A", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 174 + "x": 12, + "y": 151 }, - "id": 46, + "id": 204, "options": { "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", - "showLegend": true, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2944,17 +9876,14 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "max(\n max_over_time(process_open_fds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_max_fds{job=~\"$job\", instance=~\"$instance\"}\n) by(job)", - "format": "time_series", - "hide": false, + "expr": "sum(increase(vl_concurrent_select_limit_timeout_total{job=~\"$job\", instance=~\"$instance_select\"}[$__rate_interval])) by (job)", "interval": "", - "intervalFactor": 2, - "legendFormat": "__auto", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "Open FDs ($instance)", + "title": "Query timeouts ($instance_select)", "type": "timeseries" }, { @@ -2962,11 +9891,12 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the number of read/write syscalls such as read, pread, write, pwrite.", + "description": "Number of remote send errors reported by vlselect. Check the logs when sending fails to see the details.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "fixedColor": "red", + "mode": "fixed" }, "custom": { "axisBorderShow": false, @@ -3003,12 +9933,13 @@ }, "links": [], "mappings": [], + "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3018,28 +9949,15 @@ }, "unit": "short" }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 174 + "x": 0, + "y": 159 }, - "id": 56, + "id": 133, "options": { "legend": { "calcs": [ @@ -3056,10 +9974,10 @@ "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "none" + "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -3067,43 +9985,41 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(process_io_read_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) > 0", + "expr": "sum(increase(vl_select_remote_send_errors_total{job=~\"$job\", instance=~\"$instance_select\"}[$__rate_interval])) by (job) > 0", "format": "time_series", - "hide": false, - "interval": "", "intervalFactor": 1, - "legendFormat": "read {{job}}", + "legendFormat": "{{type}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "sum(rate(process_io_write_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) > 0", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "write {{job}}", - "range": true, - "refId": "B" } ], - "title": "Disk write/read calls ($instance)", + "title": "Send errors ($instance_select)", "type": "timeseries" - }, + } + ], + "title": "vlselect", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 61 + }, + "id": 151, + "panels": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, + "description": "Number of storage blocks scanned per query (99th percentile). Each block contains logs for a specific time period and field combination. High values indicate queries scanning too many blocks, often caused by:\n\n- Wide time ranges without specific filters\n- Queries missing indexed fields (like `_stream`, `kubernetes.*`)\n- Non-selective filters that don't utilize `bloom filters`\n\nCorrelate with `Bytes/query p99` - if blocks are high but bytes are low, blocks contain little data (good). If both are high, query is reading large amounts of data.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3138,16 +10054,15 @@ "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3155,7 +10070,7 @@ } ] }, - "unit": "short" + "unit": "none" }, "overrides": [] }, @@ -3163,9 +10078,9 @@ "h": 8, "w": 12, "x": 0, - "y": 182 + "y": 136 }, - "id": 50, + "id": 152, "options": { "legend": { "calcs": [ @@ -3185,35 +10100,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(max_over_time(go_goroutines{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "__auto", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_processed_blocks_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job)", + "hide": false, + "interval": "", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "Goroutines ($instance)", + "title": "Blocks/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.", + "description": "Total bytes read from disk per query (99th percentile). This represents the complete `I/O` overhead for query execution, including:\n\n- Block headers and metadata\n- Bloom filter data for candidate selection\n- Column headers and indexes\n- Actual log values and timestamps\n\nHigh values indicate expensive queries. Compare with specific breakdown panels below to identify bottlenecks. Monitor trends over time and correlate with query complexity.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3245,23 +10161,26 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 + }, + { + "color": "red", + "value": 80 } ] }, - "unit": "s" + "unit": "bytes" }, "overrides": [] }, @@ -3269,9 +10188,9 @@ "h": 8, "w": 12, "x": 12, - "y": 182 + "y": 136 }, - "id": 78, + "id": 150, "options": { "legend": { "calcs": [ @@ -3288,53 +10207,39 @@ "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "desc" + "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "max(rate(process_pressure_io_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{job}} - waiting", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "max(rate(process_pressure_io_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_total_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job)", "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{job}} - stalled", + "legendFormat": "{{job}}", "range": true, - "refId": "B" + "refId": "A" } ], - "title": "IO pressure", + "title": "Bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, + "description": "Bytes read from block headers per query (99th percentile). Block headers contain metadata about each storage block including `time ranges`, `field names`, and data location pointers.\n\nHigh values indicate:\n- Query `time range` spans many blocks (reduce time range or add time-based filters)\n- Missing stream-level filters (`_stream` field) causing full block header scans\n- High cardinality fields creating excessive blocks\n\nMonitor relative changes over time - sudden increases suggest inefficient query patterns or changes in data structure.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3369,16 +10274,15 @@ "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3386,7 +10290,7 @@ } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, @@ -3394,9 +10298,9 @@ "h": 8, "w": 12, "x": 0, - "y": 190 + "y": 144 }, - "id": 54, + "id": 153, "options": { "legend": { "calcs": [ @@ -3416,35 +10320,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(max_over_time(process_num_threads{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "__auto", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_block_headers_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job) > 0.001", + "hide": false, + "interval": "", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "Threads ($instance)", + "title": "Block header bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, - "description": "", + "description": "Bytes read from Bloom filters per query (99th percentile). `Bloom filters` are probabilistic data structures that quickly eliminate blocks that definitely don't contain search terms.\n\nHigh values indicate:\n- Queries with low-selectivity text filters (common words like `error`, `info`)\n- Missing or ineffective field-based filters\n- Queries that force scanning many candidate blocks\n\nOptimize by adding specific field filters (`kubernetes.container_name`, `_stream`) before text searches. Monitor for sudden increases that indicate poor filter selectivity.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3479,15 +10384,15 @@ "mode": "off" } }, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3495,7 +10400,7 @@ } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, @@ -3503,9 +10408,9 @@ "h": 8, "w": 12, "x": 12, - "y": 190 + "y": 144 }, - "id": 60, + "id": 154, "options": { "legend": { "calcs": [ @@ -3525,36 +10430,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(max_over_time(vm_tcplistener_conns{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(job)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_bloom_filters_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job) > 0.001", "hide": false, - "intervalFactor": 1, - "legendFormat": "__auto", + "interval": "", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "TCP connections ($instance)", + "title": "Bloom filter bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows the percent of CPU spent on garbage collection.\n\nIf % is high, then CPU usage can be decreased by changing GOGC to higher values. Increasing GOGC value will increase memory usage, and decrease CPU usage.\n\nTry searching for keyword `GOGC` at https://docs.victoriametrics.com/victoriametrics/troubleshooting/ ", + "description": "Bytes read from actual log values per query (99th percentile). This represents the uncompressed log content being retrieved and processed for the query result.\n\nHigh values indicate:\n- Queries returning large result sets (add `LIMIT` clause)\n- Retrieving logs with large payloads (`JSON` objects, stack traces)\n- Missing filters that would reduce matching log volume\n- Functions like `uniq` or `stats` processing many log entries\n\nReduce by: adding selective filters, using field extractors instead of full log retrieval, limiting result count.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3589,16 +10494,15 @@ "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3606,7 +10510,7 @@ } ] }, - "unit": "percentunit" + "unit": "bytes" }, "overrides": [] }, @@ -3614,9 +10518,9 @@ "h": 8, "w": 12, "x": 0, - "y": 198 + "y": 152 }, - "id": 74, + "id": 155, "options": { "legend": { "calcs": [ @@ -3626,44 +10530,46 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "desc" + "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "max(\n rate(go_gc_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) \n / rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n ) by(job)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_values_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job)", + "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "__auto", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "CPU spent on GC ($instance)", + "title": "Value bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, - "description": "", + "description": "Bytes read from timestamp column (`_time`) per query (99th percentile). The `_time` column is automatically indexed and used for time-range filtering during query execution.\n\nHigh values indicate:\n- Queries with very wide time ranges requiring timestamp scanning\n- Time-based aggregations over large datasets\n- Missing time-range restrictions in query filters\n\nThis is usually the smallest component of query `I/O`. Spikes correlate with time range width and data density in the queried period.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3698,15 +10604,15 @@ "mode": "off" } }, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3714,7 +10620,7 @@ } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, @@ -3722,9 +10628,9 @@ "h": 8, "w": 12, "x": 12, - "y": 198 + "y": 152 }, - "id": 58, + "id": 156, "options": { "legend": { "calcs": [ @@ -3744,36 +10650,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(rate(vm_tcplistener_accepts_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_timestamps_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job)", "hide": false, - "intervalFactor": 1, - "legendFormat": "__auto", + "interval": "", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "TCP connections rate ($instance)", + "title": "_time bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows the rate of allocations in memory. Sudden increase in allocations would mean increased pressure on Go Garbage Collector and can saturate CPU resources of the application.", + "description": "Bytes read from column header indexes per query (99th percentile). `Column header indexes` contain metadata about which fields exist in each block and their data types.\n\nHigh values suggest:\n- Queries scanning many blocks due to missing field filters\n- High field cardinality creating large index structures\n- Queries accessing many different field names across blocks\n- Schema evolution causing index fragmentation\n\nOptimize by using consistent `field names` and adding field-specific filters early in query pipeline.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3805,19 +10711,22 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 + }, + { + "color": "red", + "value": 80 } ] }, @@ -3829,9 +10738,9 @@ "h": 8, "w": 12, "x": 0, - "y": 206 + "y": 160 }, - "id": 75, + "id": 157, "options": { "legend": { "calcs": [ @@ -3841,44 +10750,46 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "desc" + "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "max(rate(go_memstats_alloc_bytes_total{job=~\"$job\"}[$__rate_interval])) by (job)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_columns_header_indexes_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job)", + "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "__auto", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "Memory allocations rate", + "title": "Column header index bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.", + "description": "Bytes read from column headers per query (99th percentile). `Column headers` store the actual field schema and compression metadata for each block's columns.\n\nThis metric helps identify:\n- Schema complexity overhead (many fields per log entry)\n- Inefficient field access patterns\n- Blocks with heterogeneous schemas requiring header reads\n\nCompare with other `I/O` breakdown panels to understand query cost distribution. High column header reads suggest schema optimization opportunities or need for more selective field access patterns.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3910,27 +10821,26 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", - "value": 0.1 + "value": 80 } ] }, - "unit": "s" + "unit": "bytes" }, "overrides": [] }, @@ -3938,9 +10848,9 @@ "h": 8, "w": 12, "x": 12, - "y": 206 + "y": 160 }, - "id": 61, + "id": 158, "options": { "legend": { "calcs": [ @@ -3950,42 +10860,44 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "desc" + "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "max(histogram_quantile(0.99, sum(rate(go_sched_latencies_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (job, instance, le))) by(job)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_columns_headers_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job)", + "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "__auto", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "Go scheduling latency", + "title": "Column header bytes/query p99 ($instance)", "type": "timeseries" } ], - "title": "Resource usage", + "title": "Slow query troubleshooting", "type": "row" } ], "preload": false, "refresh": "", - "schemaVersion": 40, + "schemaVersion": 41, "tags": [ "victoriametrics", "victorialogs" @@ -3993,7 +10905,10 @@ "templating": { "list": [ { - "current": {}, + "current": { + "text": "default", + "value": "default" + }, "includeAll": false, "name": "ds", "options": [], @@ -4003,7 +10918,12 @@ "type": "datasource" }, { - "current": {}, + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, "datasource": { "type": "prometheus", "uid": "$ds" @@ -4022,7 +10942,12 @@ "type": "query" }, { - "current": {}, + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, "datasource": { "type": "prometheus", "uid": "$ds" @@ -4041,7 +10966,10 @@ "type": "query" }, { - "current": {}, + "current": { + "text": "", + "value": "" + }, "datasource": { "type": "prometheus", "uid": "${ds}" @@ -4068,6 +10996,160 @@ "filters": [], "name": "adhoc", "type": "adhoc" + }, + { + "allowCustomValue": true, + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "definition": "label_values(vl_merges_total,instance)", + "hide": 2, + "includeAll": true, + "multi": true, + "name": "instance_storage_all", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vl_merges_total,instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "allowCustomValue": true, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "definition": "label_values(vl_merges_total{instance=~\"$instance\"},instance)", + "hide": 2, + "includeAll": true, + "multi": true, + "name": "instance_storage", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vl_merges_total{instance=~\"$instance\"},instance)", + "refId": "VariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "allowCustomValue": true, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "definition": "label_values(vlinsert_backend_conn_bytes_read_total,instance)", + "hide": 2, + "includeAll": true, + "multi": true, + "name": "instance_insert_all", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vlinsert_backend_conn_bytes_read_total,instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "allowCustomValue": true, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "definition": "label_values(vlinsert_backend_conn_bytes_read_total{instance=~\"$instance\"},instance)", + "hide": 2, + "includeAll": true, + "multi": true, + "name": "instance_insert", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vlinsert_backend_conn_bytes_read_total{instance=~\"$instance\"},instance)", + "refId": "VariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "allowCustomValue": true, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "definition": "label_values(vlselect_backend_conn_reads_total,instance)", + "description": "Instances of vlselect discovered from metrics; used to scope vlselect panels.", + "hide": 2, + "includeAll": true, + "multi": true, + "name": "instance_select_all", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vlselect_backend_conn_reads_total,instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "allowCustomValue": true, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "definition": "label_values(vlselect_backend_conn_reads_total{instance=~\"$instance\"},instance)", + "description": "Instances of vlselect discovered from metrics; used to scope vlselect panels.", + "hide": 2, + "includeAll": true, + "multi": true, + "name": "instance_select", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vlselect_backend_conn_reads_total{instance=~\"$instance\"},instance)", + "refId": "VariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" } ] }, diff --git a/dashboards/victorialogs.json b/dashboards/victorialogs.json index bf6f8198a8..da3279fe1e 100644 --- a/dashboards/victorialogs.json +++ b/dashboards/victorialogs.json @@ -42,7 +42,30 @@ "hide": true, "iconColor": "orange", "name": "restarts", - "textFormat": "{{job}} restarted" + "textFormat": "{{instance}} restarted" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "enable": false, + "filter": { + "exclude": false, + "ids": [ + 75 + ] + }, + "hide": false, + "iconColor": "#57f26d1c", + "name": "gc", + "target": { + "expr": "go_memstats_last_gc_time_seconds{job=~\"$job\", instance=~\"$instance\"} * 1000", + "interval": "", + "refId": "Anno" + }, + "textFormat": "GC event", + "useValueForTime": true } ] }, @@ -50,7 +73,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 1, + "id": 8, "links": [], "panels": [ { @@ -71,7 +94,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows total amount of log entries in the storage.", + "description": "Total amount of log entries in the storage.", "fieldConfig": { "defaults": { "color": { @@ -83,7 +106,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -92,12 +115,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 4, "x": 0, "y": 1 }, - "id": 10, + "id": 133, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -117,7 +140,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -131,7 +154,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -143,7 +166,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the cumulative number of log entries ingested over the last 24h.", + "description": "The total number of log entries ingested over the past 24 hours.", "fieldConfig": { "defaults": { "color": { @@ -155,7 +178,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -164,12 +187,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 4, "y": 1 }, - "id": 65, + "id": 134, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -189,7 +212,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -203,7 +226,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -215,7 +238,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Total amount of used disk space.\nAccounts for all compressed log entries and index size.\n\nSee how to [control disk usage](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage).", + "description": "Average ingestion rate of log entries.", "fieldConfig": { "defaults": { "color": { @@ -227,21 +250,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "bytes" + "unit": "short" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 9, "y": 1 }, - "id": 24, + "id": 135, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -261,7 +284,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -270,16 +293,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", + "expr": "sum(rate(vl_rows_ingested_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], - "title": "Disk space usage", + "title": "Insert req/s", "type": "stat" }, { @@ -287,7 +310,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the average ingestion rate of log entries.", + "description": "Total amount of used disk space.\nAccounts for all compressed log entries and index size.\n\nSee how to [control disk usage](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage).", "fieldConfig": { "defaults": { "color": { @@ -299,21 +322,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 14, "y": 1 }, - "id": 22, + "id": 136, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -333,7 +356,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -342,16 +365,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(vl_rows_ingested_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], - "title": "Insert req/s", + "title": "Disk space usage", "type": "stat" }, { @@ -359,7 +382,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Total number of available CPUs for VictoriaLogs process", + "description": "Integer number of CPU cores available to the application. This value is automatically rounded down from fractional CPU quotas. For optimal performance, fractional CPU units should be avoided. See the [best practices](https://docs.victoriametrics.com/victoriametrics/bestpractices/#kubernetes) documentation for more details.", "fieldConfig": { "defaults": { "color": { @@ -371,7 +394,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -384,12 +407,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 19, "y": 1 }, - "id": 30, + "id": 137, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -409,7 +432,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -423,7 +446,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -437,12 +460,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 4, "x": 0, - "y": 3 + "y": 5 }, - "id": 63, + "id": 138, "options": { "code": { "language": "plaintext", @@ -452,7 +475,7 @@ "content": "
$version
", "mode": "markdown" }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "title": "Version", "type": "text" }, @@ -461,7 +484,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the cumulative number of log entries ingested over the last 24h. \n\nThe size is calculated before compression.", + "description": "The cumulative number of log entries ingested over the last 24h. \n\nThe size is calculated before compression.", "fieldConfig": { "defaults": { "color": { @@ -473,7 +496,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -482,12 +505,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 4, - "y": 3 + "y": 5 }, - "id": 64, + "id": 139, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -507,7 +530,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -521,7 +544,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -533,7 +556,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "The ratio between original data size and compressed data stored on disk.\n\nCompression ratio doesn't account for indexdb size. It also may change with time, as [merges](https://docs.victoriametrics.com/victorialogs/#forced-merge) are improving compression of historical data.", + "description": "Rate of HTTP read requests.", "fieldConfig": { "defaults": { "color": { @@ -545,21 +568,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "none" + "unit": "short" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 9, - "y": 3 + "y": 5 }, - "id": 25, + "id": 140, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -579,7 +602,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -588,16 +611,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": " sum(vl_uncompressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) / sum(vl_compressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance\", path=~\"/select/.*\"}[$__rate_interval]))", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], - "title": "Compression ratio", + "title": "Read req/s", "type": "stat" }, { @@ -605,7 +628,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the rate of HTTP read requests.", + "description": "The ratio between original data size and compressed data stored on disk. This metric excludes indexdb size. \n\nThe ratio can go up or down as the system performs automatic maintenance and applies retention policies. For examples:\n- Background merges: [Merges](https://docs.victoriametrics.com/victorialogs/#forced-merge) improve compression by combining data into larger, more efficiently compressed blocks\n- Retention policies: When old data is deleted due to retention settings, the ratio changes as different time periods have varying compression characteristics\n\n", "fieldConfig": { "defaults": { "color": { @@ -617,21 +640,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "short" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 14, - "y": 3 + "y": 5 }, - "id": 36, + "id": 141, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -651,7 +674,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -660,16 +683,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance\", path=~\"/select/.*\"}[$__rate_interval]))", + "expr": " sum(vl_uncompressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) / sum(vl_compressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "__auto", + "legendFormat": "{{instance}}", "refId": "A" } ], - "title": "Read req/s", + "title": "Compression ratio", "type": "stat" }, { @@ -677,7 +700,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Total size of available memory for VictoriaLogs process", + "description": "Total system memory available to the application. This represents the system or container's memory capacity or limit, not the currently free memory.", "fieldConfig": { "defaults": { "color": { @@ -689,7 +712,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -698,12 +721,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 19, - "y": 3 + "y": 5 }, - "id": 34, + "id": 142, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -723,19 +746,20 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "uid": "$ds" }, + "editorMode": "code", "exemplar": false, "expr": "sum(vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"})", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -748,7 +772,7 @@ "h": 1, "w": 24, "x": 0, - "y": 5 + "y": 9 }, "id": 18, "panels": [], @@ -760,7 +784,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows ingestion rate in number of log entries and bytes per second.", + "description": "Ingestion rate in number of log entries and bytes per second.", "fieldConfig": { "defaults": { "color": { @@ -807,7 +831,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -840,7 +864,7 @@ "h": 8, "w": 12, "x": 0, - "y": 6 + "y": 10 }, "id": 2, "options": { @@ -861,7 +885,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -891,7 +915,7 @@ "refId": "B" } ], - "title": "Logs ingestion rate ", + "title": "Logs ingestion rate", "type": "timeseries" }, { @@ -941,12 +965,13 @@ "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -962,7 +987,7 @@ "h": 8, "w": 12, "x": 12, - "y": 6 + "y": 10 }, "id": 14, "options": { @@ -984,7 +1009,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1001,7 +1026,7 @@ "refId": "A" } ], - "title": "Requests rate ", + "title": "Requests rate", "type": "timeseries" }, { @@ -1024,7 +1049,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1056,7 +1081,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1072,7 +1097,7 @@ "h": 8, "w": 12, "x": 0, - "y": 14 + "y": 18 }, "id": 69, "options": { @@ -1091,7 +1116,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1100,11 +1125,11 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(vm_http_request_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, path) > 0", + "expr": "sum(rate(vm_http_request_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (path)", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}} - {{path}}", + "legendFormat": "{{path}}", "range": true, "refId": "A" }, @@ -1115,12 +1140,12 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(vl_http_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, path) > 0", + "expr": "sum(rate(vl_http_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (path)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}} - {{path}}", + "legendFormat": "{{path}}", "range": true, "refId": "B" } @@ -1133,7 +1158,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "The less time it takes is better.\n", + "description": "99th percentile response time for VictoriaLogs HTTP endpoints, grouped by instance and path. This means 99% of requests are faster than this value. **Lower numbers are better**, as they indicate faster responses and fewer slow requests.", "fieldConfig": { "defaults": { "color": { @@ -1148,7 +1173,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1175,12 +1200,13 @@ "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1196,7 +1222,7 @@ "h": 8, "w": 12, "x": 12, - "y": 14 + "y": 18 }, "id": 66, "options": { @@ -1218,7 +1244,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1226,15 +1252,15 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\"}) by (instance, path) > 0", + "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\"}) by (path) > 0", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{instance}} - {{path}}", + "legendFormat": "{{path}}", "range": true, "refId": "A" } ], - "title": "Query duration 0.99 quantile", + "title": "Request duration p99", "type": "timeseries" }, { @@ -1242,7 +1268,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the amount of on-disk space occupied by all the data stored in the storage.\n\nSee how to [control disk usage](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage).", + "description": "Amount of on-disk space occupied by all the data stored in the storage.\n\nSee how to [control disk usage](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage).", "fieldConfig": { "defaults": { "color": { @@ -1254,6 +1280,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1289,7 +1317,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1305,7 +1333,7 @@ "h": 8, "w": 12, "x": 0, - "y": 22 + "y": 26 }, "id": 6, "options": { @@ -1327,7 +1355,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1335,16 +1363,17 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", + "expr": "max(vl_total_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)", "format": "time_series", + "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "disk usage", + "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "Disk space usage ", + "title": "Disk space usage", "type": "timeseries" }, { @@ -1352,7 +1381,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the rate of logging the messages by their level. Unexpected spike in rate is a good reason to check logs.", + "description": "Rate of VictoriaLogs' own application log messages (errors, warnings, debug) - NOT the logs that VictoriaLogs is collecting from external sources.", "fieldConfig": { "defaults": { "color": { @@ -1367,7 +1396,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "bars", - "fillOpacity": 100, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1397,7 +1426,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1406,13 +1435,44 @@ ] } }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*warn:.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*error:.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 22 + "y": 26 }, "id": 67, "options": { @@ -1434,7 +1494,7 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1442,147 +1502,3353 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (instance, level, location) > 0", + "expr": "sum(rate(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (instance, level, location)", "interval": "5m", "legendFormat": "{{instance}} - {{level}}: {{location}}", "range": true, "refId": "A" } ], - "title": "Logging rate", + "title": "VictoriaLogs internal logging", "type": "timeseries" }, { - "collapsed": true, + "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 30 + "y": 34 }, "id": 68, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "$ds" + "panels": [], + "title": "Troubleshooting", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. \n\nNormally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "description": "Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "stepAfter", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 0, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 315 - }, - "id": 62, - "options": { - "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "sortBy": "Last *", - "sortDesc": true + "insertNulls": false, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "desc" + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "pluginVersion": "11.5.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$ds" + "decimals": 0, + "links": [], + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 }, - "editorMode": "code", - "expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) > 0) by(job)", - "format": "time_series", - "instant": false, - "legendFormat": "{{job}}", - "refId": "A" - } + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 62, + "options": { + "legend": { + "calcs": [ + "lastNotNull" ], - "title": "Restarts", - "type": "timeseries" + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, - "description": "The number of the new [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) created over the last 24h.\n\nPrefer having as low churn rate as possible. \nSee [How to determine which fields must be associated with log streams?](https://docs.victoriametrics.com/victorialogs/keyconcepts/#how-to-determine-which-fields-must-be-associated-with-log-streams)\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", + "editorMode": "code", + "expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Restarts", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "The number of new [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) created over the last 24 hours. Lower rate is better. See [How to determine which fields must be associated with log streams?](https://docs.victoriametrics.com/victorialogs/keyconcepts/#how-to-determine-which-fields-must-be-associated-with-log-streams)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 35 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(increase(vl_streams_created_total{job=~\"$job\", instance=~\"$instance\"}[1d])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Streams churn rate 24h ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Flags explicitly set to non-default values", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "job" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 43 + }, + "id": 70, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(flag{is_set=\"true\", job=~\"$job\", instance=~\"$instance\"}) by(job, instance, name, value)", + "format": "table", + "instant": true, + "legendFormat": "{{name}}={{value}}", + "range": false, + "refId": "A" + } + ], + "title": "Non-default flags", + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Number of log entries ignored or dropped on insertion due to the following reasons:\n* Timestamp out of the retention period or in the future\n* Number of fields per entry exceeded\n* Line too long\n\nIf this occurs, check the VictoriaLogs log for details.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 43 + }, + "id": 71, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(vl_rows_dropped_total{job=~\"$job\", instance=~\"$instance\"}[1h])) by (reason) ", + "hide": false, + "interval": "", + "legendFormat": "{{reason}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(increase(vl_too_long_lines_skipped_total{job=~\"$job\", instance=~\"$instance\"}[1h]))", + "hide": false, + "instant": false, + "legendFormat": "line_too_long", + "range": true, + "refId": "B" + } + ], + "title": "Logs dropped for last 1h", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 51 + }, + "id": 28, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Percentage of physical RAM used compared to the memory limit. If this percentage is high, check the `RSS` anonymous vs resident ratio panel for more details.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 0.8 + }, + { + "color": "red", + "value": 0.9 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 52 + }, + "id": 79, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n)", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "RSS % of memory limit", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "How much memory (RAM) the VictoriaLogs process is actually using, compared to its allowed container or system limit. See 'Memory Usage' panel for a detailed breakdown.\n\n- Good: Below 70% most of the time, maybe spiking a bit under load.\n- Bad: Above 90% for more than 5 minutes = risk of out-of-memory (OOM) kill.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 17, + "x": 7, + "y": 52 + }, + "id": 38, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by (instance)", + "interval": "", + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Physical memory usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Portion of RAM that CANNOT be reclaimed without swapping. If both the `RSS`-to-limit percentage and this ratio are high, the process is at high risk of an `OOM` kill.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 57 + }, + "id": 78, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(process_resident_memory_anon_bytes{job=~\"$job\",instance=~\"$instance\"}\n/\nprocess_resident_memory_bytes{job=~\"$job\",instance=~\"$instance\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Anonymous / Resident ratio", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "- Anonymous resident memory: Private memory allocated to the application that **cannot** be reclaimed by the kernel. Refer to the [Check/profile](https://docs.victoriametrics.com/victorialogs/#profiling) Go heap section for troubleshooting.\n- File-backed resident memory: Memory mapped from files, which can be safely reclaimed. Increases during querying. Correlate with `I/O` panels for further analysis.\n- Shared resident memory: Typically negligible. Large spikes may indicate unexpected shared memory consumers.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 17, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 62 + }, + "id": 40, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "anonymous resident memory", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_file_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "file-backed resident memory", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_shared_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "shared resident memory", + "range": true, + "refId": "C" + } + ], + "title": "Memory usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 62 + }, + "id": 118, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}\n) by(instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU % usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Lower is better, e.g. 20% means the process was delayed by memory pressure 20% of the time. See [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n- waiting: Time fraction where at least one thread was blocked on memory.\n- stalled: Time fraction where every thread was blocked on memory (severe pressure).\n\nIf queries slow down and both series spike, the host is likely limited by RAM or I/O throughput.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 0.2 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 70 + }, + "id": 73, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_pressure_memory_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} - waiting", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_pressure_memory_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} - stalled", + "range": true, + "refId": "B" + } + ], + "title": "Memory pressure", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Helps troubleshoot high CPU usage or throttling:\n\n- waiting: The percentage of time at least one task in the VictoriaLogs process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: The percentage of time all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 1%. It only becomes a concern if it consistently climbs above 5–10% and aligns with latency spikes or GC slowdowns.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 70 + }, + "id": 72, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_pressure_cpu_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}: waiting", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_pressure_cpu_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}: stalled", + "range": true, + "refId": "B" + } + ], + "title": "CPU pressure", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "The higher the better.\n\nThis computes the fraction of read bytes that came from the page cache (i.e., not from disk). It answers: \"Of all the bytes my process read via read(), how many were cache hits?\"\n\nThat's why you can see many read syscalls (read() calls), but the actual disk reads stay low; because the data was already in RAM.\n\n\n\n\n\n\n\n\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 0, + "y": 78 + }, + "id": 80, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "1 - (sum(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) / sum(rate(process_io_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])))", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Page-cache hit ratio", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Number of read/write calls application makes:\n\n- read call: Number of read*()-family system calls your process has issued since start. Each call can move 1 byte or megabytes, cached or uncached.\n- write call: Number of write*()-family system calls (including write, pwrite, writev, etc.) made by the process.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "read calls" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 4, + "y": 78 + }, + "id": 56, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_io_read_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "read calls", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_io_write_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "write calls", + "range": true, + "refId": "B" + } + ], + "title": "Read/Write syscalls ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Percentage of open file descriptors (files, sockets, pipes, etc.,) compared to the limit set in the OS. Reaching the limit of open files can cause various issues and must be prevented.\n\nSee [how to change limits](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a).", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "max" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 78 + }, + "id": 46, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max_over_time(process_open_fds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n/\nprocess_max_fds{job=~\"$job\", instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Open FDs % usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Measure the actual bytes read from and written to disk by the process:\n\n- read: physical bytes the kernel actually pulled from the storage device on behalf of the process (after checking page-cache).\n- write: physical bytes the kernel ultimately wrote to the storage device for the process (after combining, caching, or delaying writes).", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "read" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 86 + }, + "id": 52, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "read", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_io_storage_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "write", + "range": true, + "refId": "C" + } + ], + "title": "Disk writes/reads ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html). The lower the better.\n\n- waiting: at least one runnable thread blocked on block-`I/O` (disk, NVMe, network-storage) while others could still make progress.\n- stalled: all non-idle threads simultaneously waiting on `I/O`; no useful user code ran during these periods → true `I/O` thrashing.\n\nIf stalled > 0 while querying, it's recommended to increase queue depth on NVMe, raise blk-mq budgets, or relax cgroup I/O limits.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 86 + }, + "id": 74, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_pressure_io_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}: waiting", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_pressure_io_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}: stalled", + "range": true, + "refId": "B" + } + ], + "title": "IO pressure", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Current number of active TCP connections to VictoriaLogs. This metric helps monitor connection pool usage and identify potential connection leaks. High values may indicate clients not properly closing connections or connection pooling issues. Monitor for gradual increases that could lead to resource exhaustion.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 94 + }, + "id": 60, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vm_tcplistener_conns{job=~\"$job\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "TCP connections ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 94 + }, + "id": 50, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Goroutines ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Rate of incoming TCP connections accepted by VictoriaLogs. This metric indicates network activity and client connection patterns. Sudden spikes may indicate increased load or potential DDoS attacks. Sustained high rates should be correlated with resource usage to ensure adequate capacity.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 102 + }, + "id": 58, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(vm_tcplistener_accepts_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "TCP connections rate ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 102 + }, + "id": 54, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(process_num_threads{job=~\"$job\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Threads ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Percent of CPU spent on garbage collection.\n\nIf % is high, then CPU usage can be decreased by changing `GOGC` to higher values. Increasing `GOGC` value will increase memory usage, and decrease CPU usage.\n\nTry searching for keyword `GOGC` at https://docs.victoriametrics.com/victoriametrics/troubleshooting/ ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 110 + }, + "id": 119, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(\n rate(go_gc_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) \n / rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n ) by(instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU spent on GC ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 0.1 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 110 + }, + "id": 61, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(histogram_quantile(0.99, sum(rate(go_sched_latencies_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (instance, le))) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Go scheduling latency", + "type": "timeseries" + } + ], + "title": "Resource usage", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 52 + }, + "id": 82, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Total number of time-based (daily) partitions in storage. The number typically grows over time as new data arrives and is partitioned by time periods. \n\nExcessive partition counts may indicate retention policy issues or very high data ingestion rates that could impact query performance.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 0, + "y": 53 + }, + "id": 88, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(vl_partitions{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Partition Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Number of storage parts (data files) in each tier. More parts mean fragmentation; fewer parts suggest successful merging. High part counts may slow queries and trigger background merge operations.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 3, + "y": 53 + }, + "id": 84, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_storage_parts{job=~\"$job\", instance=~\"$instance\"}) by(type)", + "hide": false, + "interval": "", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Part count max by type ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Disk space usage and limits for VictoriaLogs storage. Tracks current data usage against the configured retention limit or available disk space.\n\nThe red line indicates the space retention limit. When usage approaches this limit, older data will be automatically deleted. If the space retention limit (`-retention.maxDiskSpaceUsageBytes`) is not specified, the red line represents the maximum disk space. In that case, the storage will switch to read-only mode when the limit is reached.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "max space retention of instances" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + }, + { + "id": "custom.insertNulls", + "value": 3600000 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 53 + }, + "id": 110, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_max_disk_space_usage_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) by (job)\n/\nmax(vl_total_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by (job) > 0", + "hide": false, + "interval": "", + "legendFormat": "max space retention of instances", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) by (job)\n/\nmax(vl_total_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by (job)", + "hide": false, + "interval": "", + "legendFormat": "{{job}}", + "range": true, + "refId": "B" + } + ], + "title": "Disk ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "Number of storage merge operations by type (sum across instances). Merges compact smaller parts into larger ones; bursts are normal after activity spikes.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 61 + }, + "id": 112, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(vl_merges_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(type)", + "hide": false, + "interval": "$__rate_interval", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Merge events", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "99th percentile duration of merge operations by storage type. Merge operations combine smaller storage parts into larger ones for optimization. \n\nNormal merge durations vary by storage type and data volume. Consistently high durations may indicate storage performance issues, high write load, or insufficient resources for background operations. Monitor for trends that could impact overall system performance.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 61 + }, + "id": 86, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_merge_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.9\"}) by (type) > 0", + "hide": false, + "interval": "", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Merge duration p99 ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "description": "99th percentile of data volume processed during merge operations by storage type. \n\nThis metric indicates the scale of background storage optimization activities. Larger merge sizes generally improve storage efficiency but require more resources. Consistently high values may indicate heavy write loads or large storage parts that need optimization. Monitor correlation with merge duration for performance insights.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 69 + }, + "id": 109, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_merge_bytes{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\"}) by (type) > 0", + "hide": false, + "interval": "", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Merge bytes p99 ($instance)", + "type": "timeseries" + } + ], + "title": "Storage", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 53 + }, + "id": 83, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Rate of incoming log insertion requests by endpoint path. This metric tracks the ingestion load on VictoriaLogs, including different insertion methods and protocols.\n\nHigher rates indicate increased log ingestion activity. Monitor for sudden spikes that might indicate new log sources, application deployments, or potential issues requiring capacity planning. Sustained high rates may require scaling ingestion capacity.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*(bytes)/" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 54 + }, + "id": 113, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_rows_ingested_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (type) > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_bytes_ingested_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (type) > 0", + "hide": false, + "instant": false, + "legendFormat": "{{type}} (bytes)", + "range": true, + "refId": "B" + } + ], + "title": "Logs ingestion rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Rate of incoming query requests by endpoint path. This metric tracks the query load on VictoriaLogs, including different query interfaces and internal operations. \n\nHigher rates indicate increased query activity from users or applications. Monitor for sudden spikes that might indicate new dashboards, automated queries, or potential performance issues. Sustained high rates may require scaling query processing capacity.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -1615,11 +4881,13 @@ "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 }, { "color": "red", @@ -1635,9 +4903,9 @@ "h": 8, "w": 12, "x": 12, - "y": 315 + "y": 54 }, - "id": 26, + "id": 117, "options": { "legend": { "calcs": [ @@ -1657,7 +4925,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1665,16 +4933,16 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "increase(vl_streams_created_total{job=~\"$job\", instance=~\"$instance\"}[1d])", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance\",path=~\"^/(internal/)?insert.*\"}[$__rate_interval])) by (path) > 0", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "new log streams over 24h", + "legendFormat": "{{path}}", "range": true, "refId": "A" } ], - "title": "Log stream churn rate", + "title": "Request rate", "type": "timeseries" }, { @@ -1682,92 +4950,202 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Flags explicitly set to non-default values", + "description": "99th percentile of insert operation duration. This represents the time it takes for 99% of insert operations to complete. High values indicate slow ingestion performance that could affect overall system throughput. Spikes may suggest storage bottlenecks, resource contention, or inefficient data processing.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" }, "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "inspect": false + "thresholdsStyle": { + "mode": "off" + } }, + "links": [], "mappings": [], + "min": 0, "thresholds": { "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 }, { "color": "red", "value": 80 } ] - } + }, + "unit": "s" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Time" - }, - "properties": [ - { - "id": "custom.hidden", - "value": true - } - ] + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 62 + }, + "id": 103, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" }, - { - "matcher": { - "id": "byName", - "options": "Value" - }, - "properties": [ - { - "id": "custom.hidden", - "value": true - } - ] + "editorMode": "code", + "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\", path=~\"^/(internal/)?insert.*\"}) by (path) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "range": true, + "refId": "A" + } + ], + "title": "Insert duration p99", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "99th percentile response time for VictoriaLogs HTTP endpoints, grouped by instance and path. This means 99% of requests are faster than this value. **Lower numbers are better**, as they indicate faster responses and fewer slow requests.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "matcher": { - "id": "byName", - "options": "job" + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "properties": [ + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ { - "id": "custom.hidden", - "value": true + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 } ] - } - ] + }, + "unit": "s" + }, + "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 379 + "x": 12, + "y": 62 }, - "id": 70, + "id": 116, "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" ], - "show": false + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, - "showHeader": true + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1775,28 +5153,27 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": false, - "expr": "sum(flag{is_set=\"true\", job=~\"$job\", instance=~\"$instance\"}) by(job, instance, name, value)", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, + "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\", path=~\"^/(internal/)?insert.*\"}) by (path) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "range": true, "refId": "A" } ], - "title": "Non-default flags", - "type": "table" + "title": "Request duration p99 ($instance)", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "$ds" }, - "description": "Shows how many log entries were ignored or dropped on insertion due to various reasons:\n* timestamp out of retention period or timestamp in future;\n* number of fields per entry exceeded.", + "description": "Number of active message processors handling log ingestion. These processors parse and process incoming log messages before storage. The count typically correlates with the ingestion load.\n\nIf this number is unusually high or inflated (which is rare), check memory usage. It may indicate a heavy, concurrently ingestion load or processing bottlenecks that could benefit from performance tuning.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -1808,7 +5185,7 @@ "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, - "gradientMode": "none", + "gradientMode": "opacity", "hideFrom": { "legend": false, "tooltip": false, @@ -1831,12 +5208,17 @@ "mode": "off" } }, + "decimals": 0, + "links": [], "mappings": [], + "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 }, { "color": "red", @@ -1851,10 +5233,10 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 379 + "x": 0, + "y": 70 }, - "id": 71, + "id": 97, "options": { "legend": { "calcs": [ @@ -1871,10 +5253,10 @@ "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "none" + "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1882,42 +5264,41 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(vl_rows_dropped_total{job=~\"$job\", instance=~\"$instance\"}[1h])) by (instance, reason)", - "hide": false, + "expr": "max(vl_insert_processors_count{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) by (instance)", + "format": "time_series", "interval": "", - "legendFormat": "{{instance}} - {{reason}}", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vm_concurrent_insert_capacity{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "soft limit", "range": true, - "refId": "A" + "refId": "B" } ], - "title": "Logs dropped for last 1h", + "title": "Message processors ($instance)", "type": "timeseries" - } - ], - "title": "Troubleshooting", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 31 - }, - "id": 28, - "panels": [ + }, { "datasource": { "type": "prometheus", "uid": "$ds" }, - "description": "Percentage of used memory (resident).\nThe application's performance will significantly degrade when memory usage is close to 100%.", + "description": "Number of log rows waiting to be written to storage, categorized by type. Pending rows indicate temporary queuing during ingestion. Consistently high values may suggest storage write bottlenecks or insufficient write capacity.\n\nPending rows are flushed in two ways:\n\n- After a specific time period (typically 1 second)\n- When the pending row size exceeds a threshold (typically 1.75 MB)", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -1960,7 +5341,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1968,17 +5349,17 @@ } ] }, - "unit": "percentunit" + "unit": "short" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 32 + "x": 12, + "y": 70 }, - "id": 38, + "id": 104, "options": { "legend": { "calcs": [ @@ -1998,7 +5379,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2006,15 +5387,16 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": false, - "expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by(instance)", + "expr": "sum(vl_pending_rows{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) by (type)", + "format": "time_series", "interval": "", - "legendFormat": "__auto", + "intervalFactor": 1, + "legendFormat": "{{type}}", "range": true, "refId": "A" } ], - "title": "RSS memory % usage ($instance)", + "title": "Pending rows ($instance)", "type": "timeseries" }, { @@ -2022,6 +5404,7 @@ "type": "prometheus", "uid": "$ds" }, + "description": "Number of concurrent insert operations has reached the configured limit: -maxConcurrentInserts (default: 2x CPU cores\n", "fieldConfig": { "defaults": { "color": { @@ -2037,7 +5420,7 @@ "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, - "gradientMode": "none", + "gradientMode": "opacity", "hideFrom": { "legend": false, "tooltip": false, @@ -2068,7 +5451,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2076,17 +5459,33 @@ } ] }, - "unit": "percentunit" + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 32 + "x": 0, + "y": 78 }, - "id": 44, + "id": 143, "options": { "legend": { "calcs": [ @@ -2103,10 +5502,10 @@ "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "none" + "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2114,17 +5513,16 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": false, - "expr": "max(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}\n) by(instance)", + "expr": "max(rate(vm_concurrent_insert_limit_reached_total)) by (instance)", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "__auto", + "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "CPU % usage ($instance)", + "title": "Concurrent insert limit reached ($instance)", "type": "timeseries" }, { @@ -2132,7 +5530,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Share for memory allocated by the process itself. When memory usage reaches 100% it will be likely OOM-killed.\nSafe memory usage % considered to be below 80%", + "description": "Number of insert requests that timed out while waiting for available concurrency slots. This indicates sustained ingestion overload beyond configured limits.\n\nHigh values suggest:\n- Insert queue is consistently full\n- Insert requests waiting too long for execution slots\n- System under sustained heavy ingestion load\n- Need for horizontal scaling or ingestion optimization\n\nCombined with `Insert concurrency limit reached`, provides complete picture of ingestion rejection patterns.", "fieldConfig": { "defaults": { "color": { @@ -2152,6 +5550,7 @@ "hideFrom": { "legend": false, "tooltip": false, + "vis": false, "viz": false }, "insertNulls": false, @@ -2171,33 +5570,31 @@ "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", - "value": 80 + "value": 1 } ] }, - "unit": "percentunit" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 40 + "x": 12, + "y": 78 }, - "id": 42, + "id": 131, "options": { "legend": { "calcs": [ @@ -2207,17 +5604,15 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "desc" + "mode": "single", + "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2225,15 +5620,14 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": false, - "expr": "max(\n max_over_time(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by(instance)", + "expr": "sum(rate(vm_concurrent_insert_limit_timeout_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", "interval": "", - "legendFormat": "__auto", + "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "RSS anonymous memory % usage ($instance)", + "title": "Insert timeouts ($Instance)", "type": "timeseries" }, { @@ -2241,7 +5635,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "", + "description": "99th percentile duration of background flush operations by type. High values may indicate disk pressure or heavy ingestion. Correlate with `I/O` panels.", "fieldConfig": { "defaults": { "color": { @@ -2288,7 +5682,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2296,33 +5690,17 @@ } ] }, - "unit": "short" + "unit": "s" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 40 + "x": 0, + "y": 86 }, - "id": 48, + "id": 115, "options": { "legend": { "calcs": [ @@ -2342,7 +5720,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2350,38 +5728,37 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "sum(vl_insert_flush_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\"}) by (type) > 0", "format": "time_series", - "interval": "", "intervalFactor": 1, - "legendFormat": "CPU cores used", + "legendFormat": "{{type}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "exemplar": false, - "expr": "process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "Limit", - "refId": "B" } ], - "title": "CPU ($instance)", + "title": "Flush duration p99", "type": "timeseries" - }, + } + ], + "title": "Ingestion", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 54 + }, + "id": 81, + "panels": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, - "description": "", + "description": "Rate of incoming query requests by endpoint path. This metric tracks the query load on VictoriaLogs, including different query interfaces and internal operations. \n\nHigher rates indicate increased query activity from users or applications. Monitor for sudden spikes that might indicate new dashboards, automated queries, or potential performance issues. Sustained high rates may require scaling query processing capacity.", "fieldConfig": { "defaults": { "color": { @@ -2423,12 +5800,13 @@ "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2436,7 +5814,7 @@ } ] }, - "unit": "bytes" + "unit": "short" }, "overrides": [] }, @@ -2444,9 +5822,9 @@ "h": 8, "w": 12, "x": 0, - "y": 48 + "y": 55 }, - "id": 40, + "id": 102, "options": { "legend": { "calcs": [ @@ -2466,73 +5844,24 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, - "expr": "sum(go_memstats_sys_bytes{job=~\"$job\", instance=~\"$instance\"}) + sum(vm_cache_size_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "requested from system", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "expr": "sum(go_memstats_heap_inuse_bytes{job=~\"$job\", instance=~\"$instance\"}) + sum(vm_cache_size_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "heap inuse", - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "expr": "sum(go_memstats_stack_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "stack inuse", - "refId": "C" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "expr": "sum(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "resident", - "refId": "D" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "exemplar": false, - "expr": "sum(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"})", + "editorMode": "code", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance\",path=~\"^/(internal/)?select.*\"}[$__rate_interval])) by (path) > 0", "format": "time_series", - "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "resident anonymous", - "refId": "E" + "legendFormat": "{{path}}", + "range": true, + "refId": "A" } ], - "title": "Memory usage ($instance)", + "title": "Query rate", "type": "timeseries" }, { @@ -2540,7 +5869,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.", + "description": "99th percentile of query execution duration. This represents the time it takes for 99% of queries to complete:\n\n- High values indicate slow query performance that affects user experience. \n- Spikes may suggest complex queries, resource contention, or inefficient indexes. Monitor for trends that could indicate degrading performance.", "fieldConfig": { "defaults": { "color": { @@ -2576,19 +5905,23 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 + }, + { + "color": "red", + "value": 80 } ] }, @@ -2600,9 +5933,9 @@ "h": 8, "w": 12, "x": 12, - "y": 48 + "y": 55 }, - "id": 72, + "id": 108, "options": { "legend": { "calcs": [ @@ -2622,7 +5955,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2630,31 +5963,15 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(process_pressure_cpu_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job, instance)", + "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\", path=~\"^/(internal/)?select.*\"}) by (path) > 0", "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - waiting", + "intervalFactor": 1, + "legendFormat": "{{path}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "sum(rate(process_pressure_cpu_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job, instance)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - stalled", - "range": true, - "refId": "B" } ], - "title": "CPU pressure", + "title": "Query duration p99", "type": "timeseries" }, { @@ -2662,7 +5979,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.", + "description": "Number of concurrent select (query) operations compared to the configured limit.\n\nHigh utilization near the limit may indicate query bottlenecks or insufficient query processing capacity.\n\nIf it's consistently high while CPU usage remains low, consider increasing the concurrency limit (`-search.maxConcurrentRequests`) or optimizing query performance to support more concurrent users.", "fieldConfig": { "defaults": { "color": { @@ -2698,10 +6015,9 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, "links": [], "mappings": [], "min": 0, @@ -2710,21 +6026,41 @@ "steps": [ { "color": "green", - "value": null + "value": 0 + }, + { + "color": "red", + "value": 80 } ] }, - "unit": "s" + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 56 + "y": 63 }, - "id": 73, + "id": 107, "options": { "legend": { "calcs": [ @@ -2744,7 +6080,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2752,11 +6088,11 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(process_pressure_memory_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job, instance)", + "expr": "max(max_over_time(vl_concurrent_select_current{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - waiting", + "intervalFactor": 1, + "legendFormat": "current", "range": true, "refId": "A" }, @@ -2766,17 +6102,17 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(process_pressure_memory_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job, instance)", + "expr": "max(vl_concurrent_select_capacity{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - stalled", + "intervalFactor": 1, + "legendFormat": "limit", "range": true, "refId": "B" } ], - "title": "Memory pressure", + "title": "Concurrent queries ($instance)", "type": "timeseries" }, { @@ -2784,7 +6120,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the number of bytes read/write from the storage layer.", + "description": "Number of queries that timed out while waiting for available concurrency slots. This indicates sustained query overload beyond configured limits.\n\nHigh values suggest:\n- Query queue is consistently full\n- Queries waiting too long for execution slots\n- System under sustained heavy load\n- Need for horizontal scaling or query optimization\n\nCombined with `Select concurrency limit reached`, provides complete picture of query rejection patterns.", "fieldConfig": { "defaults": { "color": { @@ -2799,11 +6135,12 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, + "vis": false, "viz": false }, "insertNulls": false, @@ -2823,105 +6160,86 @@ "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", - "value": 80 + "value": 1 } ] }, - "unit": "bytes" + "unit": "reqps" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "read" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 56 - }, - "id": 52, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", + "y": 63 + }, + "id": 132, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", "placement": "bottom", - "showLegend": true, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, - "expr": "sum(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "hide": false, + "editorMode": "code", + "expr": "sum(rate(vl_concurrent_select_limit_timeout_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance) > 0", "interval": "", - "intervalFactor": 1, - "legendFormat": "read", + "legendFormat": "{{instance}}", + "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "expr": "sum(rate(process_io_storage_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "write", - "refId": "B" } ], - "title": "Disk writes/reads ($instance)", + "title": "Query timeouts ($instance)", "type": "timeseries" - }, + } + ], + "title": "Querying", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 55 + }, + "id": 126, + "panels": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, + "description": "Number of storage blocks scanned per query (99th percentile). Each block contains logs for a specific time period and field combination. High values indicate queries scanning too many blocks, often caused by:\n\n- Wide time ranges without specific filters\n- Queries missing indexed fields (like `_stream`, `kubernetes.*`)\n- Non-selective filters that don't utilize `bloom filters`\n\nCorrelate with `Bytes/query p99` - if blocks are high but bytes are low, blocks contain little data (good). If both are high, query is reading large amounts of data.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -2932,7 +6250,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2956,16 +6274,15 @@ "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2973,7 +6290,7 @@ } ] }, - "unit": "short" + "unit": "none" }, "overrides": [] }, @@ -2981,9 +6298,9 @@ "h": 8, "w": 12, "x": 0, - "y": 64 + "y": 56 }, - "id": 50, + "id": 130, "options": { "legend": { "calcs": [ @@ -3003,35 +6320,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by(instance)", - "format": "time_series", - "intervalFactor": 2, + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_processed_blocks_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance)", + "hide": false, + "interval": "", "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "Goroutines ($instance)", + "title": "Blocks/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows the number of read/write syscalls such as read, pread, write, pwrite.", + "description": "Total bytes read from disk per query (99th percentile). This represents the complete `I/O` overhead for query execution, including:\n\n- Block headers and metadata\n- Bloom filter data for candidate selection\n- Column headers and indexes\n- Actual log values and timestamps\n\nHigh values indicate expensive queries. Compare with specific breakdown panels below to identify bottlenecks. Monitor trends over time and correlate with query complexity.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3042,7 +6360,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3066,14 +6384,15 @@ "mode": "off" } }, - "links": [], "mappings": [], + "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3081,30 +6400,17 @@ } ] }, - "unit": "short" + "unit": "bytes" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "read calls" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 64 + "y": 56 }, - "id": 56, + "id": 121, "options": { "legend": { "calcs": [ @@ -3124,51 +6430,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(rate(process_io_read_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_total_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance)", "hide": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "read calls", + "legendFormat": "{{instance}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "sum(rate(process_io_write_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "write calls", - "range": true, - "refId": "B" } ], - "title": "Disk write/read calls ($instance)", + "title": "Bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, + "description": "Bytes read from block headers per query (99th percentile). Block headers contain metadata about each storage block including `time ranges`, `field names`, and data location pointers.\n\nHigh values indicate:\n- Query `time range` spans many blocks (reduce time range or add time-based filters)\n- Missing stream-level filters (`_stream` field) causing full block header scans\n- High cardinality fields creating excessive blocks\n\nMonitor relative changes over time - sudden increases suggest inefficient query patterns or changes in data structure.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3179,7 +6470,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3203,16 +6494,15 @@ "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3220,7 +6510,7 @@ } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, @@ -3228,9 +6518,9 @@ "h": 8, "w": 12, "x": 0, - "y": 72 + "y": 64 }, - "id": 54, + "id": 124, "options": { "legend": { "calcs": [ @@ -3250,35 +6540,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(process_num_threads{job=~\"$job\", instance=~\"$instance\"}) by(instance)", - "format": "time_series", - "intervalFactor": 2, + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_block_headers_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance) > 0.001", + "hide": false, + "interval": "", "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "Threads ($instance)", + "title": "Block header bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.", + "description": "Bytes read from Bloom filters per query (99th percentile). `Bloom filters` are probabilistic data structures that quickly eliminate blocks that definitely don't contain search terms.\n\nHigh values indicate:\n- Queries with low-selectivity text filters (common words like `error`, `info`)\n- Missing or ineffective field-based filters\n- Queries that force scanning many candidate blocks\n\nOptimize by adding specific field filters (`kubernetes.container_name`, `_stream`) before text searches. Monitor for sudden increases that indicate poor filter selectivity.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3310,23 +6601,26 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 + }, + { + "color": "red", + "value": 80 } ] }, - "unit": "s" + "unit": "bytes" }, "overrides": [] }, @@ -3334,9 +6628,9 @@ "h": 8, "w": 12, "x": 12, - "y": 72 + "y": 64 }, - "id": 74, + "id": 129, "options": { "legend": { "calcs": [ @@ -3346,61 +6640,44 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "desc" + "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "sum(rate(process_pressure_io_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job, instance)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - waiting", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(rate(process_pressure_io_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job, instance)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_bloom_filters_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance) > 0.001", "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - stalled", + "legendFormat": "{{instance}}", "range": true, - "refId": "B" + "refId": "A" } ], - "title": "IO pressure", + "title": "Bloom filter bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, - "description": "", + "description": "Bytes read from actual log values per query (99th percentile). This represents the uncompressed log content being retrieved and processed for the query result.\n\nHigh values indicate:\n- Queries returning large result sets (add `LIMIT` clause)\n- Retrieving logs with large payloads (`JSON` objects, stack traces)\n- Missing filters that would reduce matching log volume\n- Functions like `uniq` or `stats` processing many log entries\n\nReduce by: adding selective filters, using field extractors instead of full log retrieval, limiting result count.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3411,7 +6688,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3435,15 +6712,15 @@ "mode": "off" } }, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3451,7 +6728,7 @@ } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, @@ -3459,9 +6736,9 @@ "h": 8, "w": 12, "x": 0, - "y": 80 + "y": 72 }, - "id": 58, + "id": 122, "options": { "legend": { "calcs": [ @@ -3481,36 +6758,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(rate(vm_tcplistener_accepts_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_values_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance)", "hide": false, - "intervalFactor": 1, + "interval": "", "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "TCP connections rate ($instance)", + "title": "Value bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, - "description": "", + "description": "Bytes read from timestamp column (`_time`) per query (99th percentile). The `_time` column is automatically indexed and used for time-range filtering during query execution.\n\nHigh values indicate:\n- Queries with very wide time ranges requiring timestamp scanning\n- Time-based aggregations over large datasets\n- Missing time-range restrictions in query filters\n\nThis is usually the smallest component of query `I/O`. Spikes correlate with time range width and data density in the queried period.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3521,7 +6798,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3545,15 +6822,15 @@ "mode": "off" } }, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3561,7 +6838,7 @@ } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, @@ -3569,9 +6846,9 @@ "h": 8, "w": 12, "x": 12, - "y": 80 + "y": 72 }, - "id": 60, + "id": 127, "options": { "legend": { "calcs": [ @@ -3591,36 +6868,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(vm_tcplistener_conns{job=~\"$job\", instance=~\"$instance\"}) by(instance)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_timestamps_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance)", "hide": false, - "intervalFactor": 1, + "interval": "", "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "TCP connections ($instance)", + "title": "_time bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.", + "description": "Bytes read from column header indexes per query (99th percentile). `Column header indexes` contain metadata about which fields exist in each block and their data types.\n\nHigh values suggest:\n- Queries scanning many blocks due to missing field filters\n- High field cardinality creating large index structures\n- Queries accessing many different field names across blocks\n- Schema evolution causing index fragmentation\n\nOptimize by using consistent `field names` and adding field-specific filters early in query pipeline.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3652,27 +6929,26 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", - "value": 0.1 + "value": 80 } ] }, - "unit": "s" + "unit": "bytes" }, "overrides": [] }, @@ -3680,9 +6956,9 @@ "h": 8, "w": 12, "x": 0, - "y": 88 + "y": 80 }, - "id": 61, + "id": 128, "options": { "legend": { "calcs": [ @@ -3692,44 +6968,46 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "desc" + "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "max(histogram_quantile(0.99, sum(rate(go_sched_latencies_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (job, instance, le))) by(job)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_columns_header_indexes_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance)", + "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "__auto", + "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "Go scheduling latency", + "title": "Column header index bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows the percentage of open file descriptors compared to the limit set in the OS.\nReaching the limit of open files can cause various issues and must be prevented.\n\nSee how to change limits here https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a", + "description": "Bytes read from column headers per query (99th percentile). `Column headers` store the actual field schema and compression metadata for each block's columns.\n\nThis metric helps identify:\n- Schema complexity overhead (many fields per log entry)\n- Inefficient field access patterns\n- Blocks with heterogeneous schemas requiring header reads\n\nCompare with other `I/O` breakdown panels to understand query cost distribution. High column header reads suggest schema optimization opportunities or need for more selective field access patterns.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3764,16 +7042,15 @@ "mode": "off" } }, - "decimals": 2, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3781,33 +7058,17 @@ } ] }, - "unit": "percentunit" + "unit": "bytes" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "max" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C4162A", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 88 + "y": 80 }, - "id": 46, + "id": 123, "options": { "legend": { "calcs": [ @@ -3827,35 +7088,34 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "max_over_time(process_open_fds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n/\nprocess_max_fds{job=~\"$job\", instance=~\"$instance\"}", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_columns_headers_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance)", "hide": false, "interval": "", - "intervalFactor": 2, "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "Open FDs ($instance)", + "title": "Column header bytes/query p99 ($instance)", "type": "timeseries" } ], - "title": "Resource usage", + "title": "Slow Query Troubleshooting", "type": "row" } ], "preload": false, "refresh": "", - "schemaVersion": 40, + "schemaVersion": 41, "tags": [ "victoriametrics", "victorialogs" @@ -3864,8 +7124,8 @@ "list": [ { "current": { - "text": "VictoriaMetrics", - "value": "P4169E866C3094E38" + "text": "default", + "value": "default" }, "includeAll": false, "name": "ds", @@ -3876,7 +7136,10 @@ "type": "datasource" }, { - "current": {}, + "current": { + "text": "kubernetes-pods", + "value": "kubernetes-pods" + }, "datasource": { "type": "prometheus", "uid": "$ds" @@ -3894,7 +7157,12 @@ "type": "query" }, { - "current": {}, + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, "datasource": { "type": "prometheus", "uid": "$ds" @@ -3913,7 +7181,10 @@ "type": "query" }, { - "current": {}, + "current": { + "text": "", + "value": "" + }, "datasource": { "type": "prometheus", "uid": "${ds}" diff --git a/dashboards/vm/victorialogs-cluster.json b/dashboards/vm/victorialogs-cluster.json index a5307afdb4..6fb5821d40 100644 --- a/dashboards/vm/victorialogs-cluster.json +++ b/dashboards/vm/victorialogs-cluster.json @@ -1,4 +1,3 @@ - { "annotations": { "list": [ @@ -51,7 +50,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 1, + "id": 6, "links": [], "panels": [ { @@ -72,7 +71,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows total amount of log entries in the storage.", + "description": "Total amount of log entries in the storage.", "fieldConfig": { "defaults": { "color": { @@ -84,7 +83,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -93,12 +92,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 4, "x": 0, "y": 1 }, - "id": 10, + "id": 79, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -118,7 +117,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -132,7 +131,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -144,7 +143,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the cumulative number of log entries ingested over the last 24h.", + "description": "The total number of log entries ingested over the past 24 hours.", "fieldConfig": { "defaults": { "color": { @@ -156,7 +155,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -165,12 +164,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 4, "y": 1 }, - "id": 65, + "id": 80, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -190,7 +189,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -204,7 +203,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -216,7 +215,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Total amount of used disk space.\nAccounts for all compressed log entries and index size.\n\nSee how to [control disk usage](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage).", + "description": "Average ingestion rate of log entries.", "fieldConfig": { "defaults": { "color": { @@ -228,21 +227,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "bytes" + "unit": "short" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 9, "y": 1 }, - "id": 24, + "id": 82, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -262,7 +261,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -271,16 +270,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", + "expr": "sum(rate(vl_rows_ingested_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], - "title": "Disk space usage", + "title": "Insert req/s", "type": "stat" }, { @@ -288,7 +287,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the average ingestion rate of log entries.", + "description": "Total amount of used disk space.\nAccounts for all compressed log entries and index size.\n\nSee how to [control disk usage](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage).", "fieldConfig": { "defaults": { "color": { @@ -300,21 +299,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 14, "y": 1 }, - "id": 22, + "id": 85, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -334,7 +333,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -343,16 +342,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(vl_rows_ingested_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], - "title": "Insert req/s", + "title": "Disk space usage", "type": "stat" }, { @@ -360,7 +359,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Total number of available CPUs for VictoriaLogs process", + "description": "Integer number of CPU cores available to the application. This value is automatically rounded down from fractional CPU quotas. For optimal performance, fractional CPU units should be avoided. See the [best practices](https://docs.victoriametrics.com/victoriametrics/bestpractices/#kubernetes) documentation for more details.", "fieldConfig": { "defaults": { "color": { @@ -372,7 +371,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -385,12 +384,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 19, "y": 1 }, - "id": 30, + "id": 87, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -410,7 +409,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -424,7 +423,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -438,10 +437,10 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 4, "x": 0, - "y": 3 + "y": 5 }, "id": 63, "options": { @@ -453,7 +452,7 @@ "content": "
$version
", "mode": "markdown" }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "title": "Version", "type": "text" }, @@ -462,7 +461,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the cumulative number of log entries ingested over the last 24h. \n\nThe size is calculated before compression.", + "description": "The cumulative number of log entries ingested over the last 24h. \n\nThe size is calculated before compression.", "fieldConfig": { "defaults": { "color": { @@ -474,7 +473,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -483,12 +482,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 4, - "y": 3 + "y": 5 }, - "id": 64, + "id": 81, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -508,7 +507,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -522,7 +521,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -534,7 +533,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "The ratio between original data size and compressed data stored on disk.\n\nCompression ratio doesn't account for indexdb size. It also may change with time, as [merges](https://docs.victoriametrics.com/victorialogs/#forced-merge) are improving compression of historical data.", + "description": "Rate of HTTP read requests.", "fieldConfig": { "defaults": { "color": { @@ -546,21 +545,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "none" + "unit": "short" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 9, - "y": 3 + "y": 5 }, - "id": 25, + "id": 83, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -580,7 +579,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -589,16 +588,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": " sum(vl_uncompressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) / sum(vl_compressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance\", path=~\"/select/.*\"}[$__rate_interval]))", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], - "title": "Compression ratio", + "title": "Read req/s", "type": "stat" }, { @@ -606,7 +605,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the rate of HTTP read requests.", + "description": "The ratio between original data size and compressed data stored on disk. This metric excludes indexdb size. \n\nThe ratio can go up or down as the system performs automatic maintenance and applies retention policies. For examples:\n- Background merges: [Merges](https://docs.victoriametrics.com/victorialogs/#forced-merge) improve compression by combining data into larger, more efficiently compressed blocks\n- Retention policies: When old data is deleted due to retention settings, the ratio changes as different time periods have varying compression characteristics\n\n", "fieldConfig": { "defaults": { "color": { @@ -618,21 +617,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "short" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 14, - "y": 3 + "y": 5 }, - "id": 36, + "id": 86, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -652,7 +651,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -661,16 +660,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance\", path=~\"/select/.*\"}[$__rate_interval]))", + "expr": " sum(vl_uncompressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) / sum(vl_compressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "__auto", + "legendFormat": "{{instance}}", "refId": "A" } ], - "title": "Read req/s", + "title": "Compression ratio", "type": "stat" }, { @@ -678,7 +677,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Total size of available memory for VictoriaLogs process", + "description": "Total system memory available to the application. This represents the system or container's memory capacity or limit, not the currently free memory.", "fieldConfig": { "defaults": { "color": { @@ -690,7 +689,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -699,12 +698,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 19, - "y": 3 + "y": 5 }, - "id": 34, + "id": 88, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -724,20 +723,20 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { - "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, + "editorMode": "code", "exemplar": false, "expr": "sum(vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"})", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -768,7 +767,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -797,8 +796,8 @@ }, "properties": [ { - "id": "displayName", - "value": "Count" + "id": "custom.hidden", + "value": true } ] } @@ -808,7 +807,7 @@ "h": 6, "w": 9, "x": 0, - "y": 5 + "y": 9 }, "id": 72, "options": { @@ -824,12 +823,12 @@ "showHeader": true, "sortBy": [ { - "desc": true, - "displayName": "Count" + "desc": false, + "displayName": "tier" } ] }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -838,14 +837,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(job, short_version)", + "expr": "sum by (instance, version, tier) (\n label_set(\n vm_app_version{job=~\"$job\",instance=~\"$instance_select_all\"}\n and on(instance,version)\n vm_app_version{job=~\"$job\",instance=~\"$instance_insert_all\"} * 0 + 1,\n \"tier\",\"select,insert\"\n )\nor\n label_set(\n vm_app_version{job=~\"$job\",instance=~\"$instance_select_all\"}\n unless on(instance,version)\n vm_app_version{job=~\"$job\",instance=~\"$instance_insert_all\"} * 0 + 1,\n \"tier\",\"select\"\n )\nor\n label_set(\n vm_app_version{job=~\"$job\",instance=~\"$instance_insert_all\"}\n unless on(instance,version)\n vm_app_version{job=~\"$job\",instance=~\"$instance_select_all\"} * 0 + 1,\n \"tier\",\"insert\"\n )\nor\n label_set(\n vm_app_version{job=~\"$job\",instance=~\"$instance_storage_all\"} * 0 + 1,\n \"tier\",\"storage\"\n )\nor\n label_set(\n (\n vm_app_version{job=~\"$job\",instance=~\"$instance\"}\n unless on(instance,version)\n (\n vm_app_version{job=~\"$job\",instance=~\"$instance_select_all\"}\n or\n vm_app_version{job=~\"$job\",instance=~\"$instance_insert_all\"}\n or\n vm_app_version{job=~\"$job\",instance=~\"$instance_storage_all\"}\n )\n ) * 0 + 1,\n \"tier\",\"unknown\"\n )\n)\n", "format": "table", + "hide": false, "instant": true, + "legendFormat": "{{instance}}", "range": false, "refId": "A" } ], - "title": "", + "title": "Component versions ($job)", "type": "table" }, { @@ -853,6 +854,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, + "description": "Uptime shows whether the services are working and what their roles are. A VictoriaLogs instance can serve all roles; if it is not yet serving requests from the network, it will show as unknown.", "fieldConfig": { "defaults": { "color": { @@ -895,12 +897,13 @@ "decimals": 0, "links": [], "mappings": [], + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -916,7 +919,7 @@ "h": 6, "w": 15, "x": 9, - "y": 5 + "y": 9 }, "id": 73, "options": { @@ -936,7 +939,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -944,11 +947,63 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(min_over_time(up{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job)", + "expr": "sum(min_over_time(up{job=~\"$job\", instance=~\"$instance_storage_all\"}[$__rate_interval]))", "format": "time_series", "instant": false, - "legendFormat": "{{job}}", + "legendFormat": "storage", "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum (\n min_over_time(\n up{job=~\"$job\", instance=~\"$instance_select_all\"}[$__rate_interval]\n )\n unless on(instance)\n up{job=~\"$job\", instance=~\"$instance_insert_all\"}\n)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "select", + "refId": "B" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum (\n min_over_time(\n up{job=~\"$job\", instance=~\"$instance_insert_all\"}[$__rate_interval]\n )\n unless on(instance)\n up{job=~\"$job\", instance=~\"$instance_select_all\"}\n)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "insert", + "refId": "C" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(\n min_over_time(\n up{job=~\"$job\", instance=~\"$instance_select_all\"}[$__rate_interval]\n )\n and on(instance)\n up{job=~\"$job\", instance=~\"$instance_insert_all\"}\n)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "insert,select", + "refId": "D" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum by (instance) (\n min_over_time(up{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n)\nunless on (instance)\nsum by (instance) (\n min_over_time(up{job=~\"$job\", instance=~\"$instance_storage_all\"}[$__rate_interval])\n or\n min_over_time(up{job=~\"$job\", instance=~\"$instance_insert_all\"}[$__rate_interval])\n or\n min_over_time(up{job=~\"$job\", instance=~\"$instance_select_all\"}[$__rate_interval])\n)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "unknown", + "refId": "E" } ], "title": "Uptime ($job)", @@ -960,7 +1015,7 @@ "h": 1, "w": 24, "x": 0, - "y": 11 + "y": 15 }, "id": 18, "panels": [], @@ -972,7 +1027,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows ingestion rate in number of log entries and bytes per second.", + "description": "Ingestion rate in number of log entries and bytes per second.", "fieldConfig": { "defaults": { "color": { @@ -1019,7 +1074,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1052,7 +1107,7 @@ "h": 8, "w": 12, "x": 0, - "y": 12 + "y": 16 }, "id": 2, "options": { @@ -1073,7 +1128,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1103,7 +1158,7 @@ "refId": "B" } ], - "title": "Logs ingestion rate ", + "title": "Logs ingestion rate ($instance)", "type": "timeseries" }, { @@ -1158,7 +1213,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1174,7 +1229,7 @@ "h": 8, "w": 12, "x": 12, - "y": 12 + "y": 16 }, "id": 14, "options": { @@ -1196,7 +1251,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1213,7 +1268,7 @@ "refId": "A" } ], - "title": "Requests rate ", + "title": "Requests rate ($instance)", "type": "timeseries" }, { @@ -1268,7 +1323,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1284,7 +1339,7 @@ "h": 8, "w": 12, "x": 0, - "y": 20 + "y": 24 }, "id": 69, "options": { @@ -1303,7 +1358,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1337,7 +1392,7 @@ "refId": "B" } ], - "title": "Requests error rate", + "title": "Requests error rate ($instance)", "type": "timeseries" }, { @@ -1345,7 +1400,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "The less time it takes is better.\n", + "description": "99th percentile response time for VictoriaLogs HTTP endpoints, grouped by instance and path. This means 99% of requests are faster than this value. **Lower numbers are better**, as they indicate faster responses and fewer slow requests.", "fieldConfig": { "defaults": { "color": { @@ -1360,7 +1415,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1387,12 +1442,13 @@ "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1408,7 +1464,7 @@ "h": 8, "w": 12, "x": 12, - "y": 20 + "y": 24 }, "id": 66, "options": { @@ -1430,7 +1486,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1446,7 +1502,7 @@ "refId": "A" } ], - "title": "Query duration 0.99 quantile", + "title": "Request duration p99 ($instance)", "type": "timeseries" }, { @@ -1454,7 +1510,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the amount of on-disk space occupied by all the data stored in the storage.\n\nSee how to [control disk usage](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage).", + "description": "99th percentile response time for VictoriaLogs HTTP endpoints, grouped by instance and path. This means 99% of requests are faster than this value. **Lower numbers are better**, as they indicate faster responses and fewer slow requests.", "fieldConfig": { "defaults": { "color": { @@ -1501,7 +1557,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1517,7 +1573,7 @@ "h": 8, "w": 12, "x": 0, - "y": 28 + "y": 32 }, "id": 6, "options": { @@ -1539,7 +1595,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1547,16 +1603,16 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", + "expr": "max(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "disk usage", + "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "Disk space usage ", + "title": "Disk space usage ($instance)", "type": "timeseries" }, { @@ -1564,7 +1620,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the rate of logging the messages by their level. Unexpected spike in rate is a good reason to check logs.", + "description": "Rate of VictoriaLogs' own application log messages (debug, warnings, errors) - NOT the logs that VictoriaLogs is collecting from external sources.", "fieldConfig": { "defaults": { "color": { @@ -1609,7 +1665,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1624,7 +1680,7 @@ "h": 8, "w": 12, "x": 12, - "y": 28 + "y": 32 }, "id": 67, "options": { @@ -1646,7 +1702,7 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1654,142 +1710,6999 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (job, level, location) > 0", + "expr": "sum(rate(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (job, level, location)", "interval": "5m", "legendFormat": "{{job}} - {{level}}: {{location}}", "range": true, "refId": "A" } ], - "title": "Logging rate", + "title": "VictoriaLogs internal logging ($instance)", "type": "timeseries" }, { - "collapsed": true, + "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 36 + "y": 40 }, "id": 68, - "panels": [ - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "panels": [], + "title": "Troubleshooting", + "type": "row" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. \n\nNormally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "description": "Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "stepAfter", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 0, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 543 - }, - "id": 62, - "options": { - "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "sortBy": "Last *", - "sortDesc": true + "insertNulls": false, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "desc" + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "pluginVersion": "11.5.0", - "targets": [ - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "decimals": 0, + "links": [], + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 }, - "editorMode": "code", - "expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) > 0) by(job)", - "format": "time_series", - "instant": false, - "legendFormat": "{{job}}", - "refId": "A" - } + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 62, + "options": { + "legend": { + "calcs": [ + "lastNotNull" ], - "title": "Restarts", - "type": "timeseries" + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" - }, - "description": "The number of the new [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) created over the last 24h.\n\nPrefer having as low churn rate as possible. \nSee [How to determine which fields must be associated with log streams?](https://docs.victoriametrics.com/victorialogs/keyconcepts/#how-to-determine-which-fields-must-be-associated-with-log-streams)\n", + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job)", + "format": "time_series", + "instant": false, + "legendFormat": "{{job}}", + "refId": "A" + } + ], + "title": "Restarts", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "The number of new [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) created over the last 24 hours. A lower rate is better. See [How to determine which fields must be associated with log streams?](https://docs.victoriametrics.com/victorialogs/keyconcepts/#how-to-determine-which-fields-must-be-associated-with-log-streams) for more details.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "increase(vl_streams_created_total{job=~\"$job\", instance=~\"$instance\"}[1d])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "new log streams over 24h", + "range": true, + "refId": "A" + } + ], + "title": "Streams churn rate 24h", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Flags explicitly set to non-default values", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "job" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 70, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(flag{is_set=\"true\", job=~\"$job\", instance=~\"$instance\"}) by(job, instance, name, value)", + "format": "table", + "instant": true, + "legendFormat": "{{name}}={{value}}", + "range": false, + "refId": "A" + } + ], + "title": "Non-default flags", + "type": "table" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Number of log entries ignored or dropped on insertion due to the following reasons:\n* Timestamp out of the retention period or in the future\n* Number of fields per entry exceeded\n* Line too long\n\nIf this occurs, check the VictoriaLogs log for details.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "fieldMinMax": false, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 71, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(vl_rows_dropped_total{job=~\"$job\", instance=~\"$instance\"}[1h])) by (reason)", + "hide": false, + "interval": "", + "legendFormat": "{{reason}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(vl_too_long_lines_skipped_total{job=~\"$job\", instance=~\"$instance\"}[1h]))", + "hide": false, + "interval": "", + "legendFormat": "line_too_long", + "range": true, + "refId": "B" + } + ], + "title": "Logs dropped for last 1h", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 57 + }, + "id": 28, + "panels": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Percentage of physical RAM used compared to the memory limit. If this percentage is high, check the `RSS` anonymous vs resident ratio panel for more details.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 0.8 + }, + { + "color": "red", + "value": 0.9 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 58 + }, + "id": 134, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n)", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "RSS % of memory limit", + "type": "stat" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "How much memory (RAM) the VictoriaLogs process is actually using, compared to its allowed container or system limit. See 'Memory Usage' panel for a detailed breakdown.\n\n- Good: Below 70% most of the time, maybe spiking a bit under load.\n- Bad: Above 90% for more than 5 minutes = risk of out-of-memory (`OOM`) kill.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 17, + "x": 7, + "y": 58 + }, + "id": 136, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by (job)", + "interval": "", + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Physical memory % usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Portion of RAM that cannot be reclaimed without swapping. If both the `RSS`-to-limit percentage and this ratio are high, the process is at high risk of an `OOM` kill.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 63 + }, + "id": 135, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(process_resident_memory_anon_bytes{job=~\"$job\",instance=~\"$instance\"}\n/\nprocess_resident_memory_bytes{job=~\"$job\",instance=~\"$instance\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Anonymous / Resident ratio", + "type": "stat" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "- Anonymous resident memory: Private memory allocated to the application that **cannot** be reclaimed by the kernel. Refer to the [Check/profile](https://docs.victoriametrics.com/victorialogs/#profiling) Go heap section for troubleshooting.\n- File-backed resident memory: Memory mapped from files, which can be safely reclaimed. Increases during querying. Correlate with `I/O` panels for further analysis.\n- Shared resident memory: Typically negligible. Large spikes may indicate unexpected shared memory consumers.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 17, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 68 + }, + "id": 137, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "anonymous resident memory", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_file_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "file-backed resident memory", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_shared_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "shared resident memory", + "range": true, + "refId": "C" + } + ], + "title": "Memory usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 68 + }, + "id": 44, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}\n) by(job)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU % usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Lower is better, e.g. 20% means the process was delayed by memory pressure 20% of the time. See [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n- waiting: Time fraction where at least one thread was blocked on memory.\n- stalled: Time fraction where every thread was blocked on memory (severe pressure).\n\nIf queries slow down and both series spike, the host is likely limited by RAM or I/O throughput.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 0.2 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 76 + }, + "id": 138, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_pressure_memory_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}: waiting", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_pressure_memory_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}: stalled", + "range": true, + "refId": "B" + } + ], + "title": "Memory pressure", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Helps troubleshoot high CPU usage or throttling:\n\n- waiting: The percentage of time at least one task in the VictoriaLogs process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: The percentage of time all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there’s a CPU burst, it's normal to see waiting or stalled > 1%. It only becomes a concern if it consistently climbs above 5–10% and aligns with latency spikes or GC slowdowns.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 76 + }, + "id": 144, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_pressure_cpu_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job) > 0.005", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}: waiting", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_pressure_cpu_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job) > 0.005", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}: stalled", + "range": true, + "refId": "B" + } + ], + "title": "CPU pressure", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "The higher the better.\n\nThis computes the fraction of read bytes that came from the page cache (i.e., not from disk). It answers: \"Of all the bytes my process read via read(), how many were cache hits?\"\n\nThat's why you can see many read syscalls (read() calls), but the actual disk reads stay low; because the data was already in RAM.\n\n\n\n\n\n\n\n\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 0, + "y": 84 + }, + "id": 139, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "1 - (sum(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) / sum(rate(process_io_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])))", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Page-cache hit ratio", + "type": "stat" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Number of read/write calls application makes:\n\n- read call: Number of read*()-family system calls your process has issued since start. Each call can move 1 byte or megabytes, cached or uncached.\n- write call: Number of write*()-family system calls (including write, pwrite, writev, etc.) made by the process.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "read calls" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 4, + "y": 84 + }, + "id": 140, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_io_read_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "read calls", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_io_write_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "write calls", + "range": true, + "refId": "B" + } + ], + "title": "Read/Write syscalls ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Percentage of open file descriptors (files, sockets, pipes, etc.,) compared to the limit set in the OS. Reaching the limit of open files can cause various issues and must be prevented.\n\nSee [how to change limits](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a).", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "max" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 84 + }, + "id": 145, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(max_over_time(process_open_fds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n/\nprocess_max_fds{job=~\"$job\", instance=~\"$instance\"}) by (job)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Open FDs & usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Measure the actual bytes read from and written to disk by the process:\n\n- read: physical bytes the kernel actually pulled from the storage device on behalf of the process (after checking page-cache).\n- write: physical bytes the kernel ultimately wrote to the storage device for the process (after combining, caching, or delaying writes).", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "read" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 92 + }, + "id": 141, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "read", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_io_storage_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "write", + "range": true, + "refId": "C" + } + ], + "title": "Disk writes/reads ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html). The lower the better.\n\n- waiting: at least one runnable thread blocked on block-`I/O` (disk, NVMe, network-storage) while others could still make progress.\n- stalled: all non-idle threads simultaneously waiting on `I/O`; no useful user code ran during these periods → true `I/O` thrashing.\n\nIf stalled > 0 while querying, it's recommended to increase queue depth on NVMe, raise blk-mq budgets, or relax cgroup I/O limits.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 92 + }, + "id": 143, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_pressure_io_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job) > 0.005", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}} - waiting", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_pressure_io_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job) > 0.005", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}} - stalled", + "range": true, + "refId": "B" + } + ], + "title": "IO pressure", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Current number of active TCP connections to VictoriaLogs. This metric helps monitor connection pool usage and identify potential connection leaks. High values may indicate clients not properly closing connections or connection pooling issues. Monitor for gradual increases that could lead to resource exhaustion.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 100 + }, + "id": 146, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vm_tcplistener_conns{job=~\"$job\", instance=~\"$instance\"}) by (job)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "TCP connections ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 100 + }, + "id": 148, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by (job)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Goroutines ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Rate of incoming TCP connections accepted by VictoriaLogs. This metric indicates network activity and client connection patterns. Sudden spikes may indicate increased load or potential DDoS attacks. Sustained high rates should be correlated with resource usage to ensure adequate capacity.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 108 + }, + "id": 147, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(vm_tcplistener_accepts_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "TCP connections rate ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 108 + }, + "id": 149, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(process_num_threads{job=~\"$job\", instance=~\"$instance\"}) by (job)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Threads ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Percent of CPU spent on garbage collection.\n\nIf % is high, then CPU usage can be decreased by changing `GOGC` to higher values. Increasing `GOGC` value will increase memory usage, and decrease CPU usage.\n\nTry searching for keyword `GOGC` at https://docs.victoriametrics.com/victoriametrics/troubleshooting/ ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 116 + }, + "id": 74, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(\n rate(go_gc_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) \n / rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n ) by(job)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU spent on GC ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 0.1 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 116 + }, + "id": 61, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(histogram_quantile(0.99, sum(rate(go_sched_latencies_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (job, le))) by (job)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Go scheduling latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Rate of allocations in memory. Sudden increase in allocations would mean increased pressure on Go Garbage Collector and can saturate CPU resources of the application.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 124 + }, + "id": 75, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(go_memstats_alloc_bytes_total{job=~\"$job\"}[$__rate_interval])) by (job)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Memory allocations rate", + "type": "timeseries" + } + ], + "title": "Resource usage", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 58 + }, + "id": 89, + "panels": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "CPU utilization per instance as a fraction of available cores. Sustained values above 80% indicate CPU saturation; correlate with CPU PSI and query latency.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 133 + }, + "id": 116, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "0": "m", + "1": "a", + "2": "x", + "3": "(", + "4": "\n", + "5": " ", + "6": " ", + "7": "r", + "8": "a", + "9": "t", + "10": "e", + "11": "(", + "12": "\n", + "13": " ", + "14": " ", + "15": " ", + "16": " ", + "17": "p", + "18": "r", + "19": "o", + "20": "c", + "21": "e", + "22": "s", + "23": "s", + "24": "_", + "25": "c", + "26": "p", + "27": "u", + "28": "_", + "29": "s", + "30": "e", + "31": "c", + "32": "o", + "33": "n", + "34": "d", + "35": "s", + "36": "_", + "37": "t", + "38": "o", + "39": "t", + "40": "a", + "41": "l", + "42": "{", + "43": "j", + "44": "o", + "45": "b", + "46": "=", + "47": "~", + "48": "\"", + "49": "$", + "50": "j", + "51": "o", + "52": "b", + "53": "\"", + "54": ",", + "55": "i", + "56": "n", + "57": "s", + "58": "t", + "59": "a", + "60": "n", + "61": "c", + "62": "e", + "63": "=", + "64": "~", + "65": "\"", + "66": "$", + "67": "i", + "68": "n", + "69": "s", + "70": "t", + "71": "a", + "72": "n", + "73": "c", + "74": "e", + "75": "_", + "76": "s", + "77": "t", + "78": "o", + "79": "r", + "80": "a", + "81": "g", + "82": "e", + "83": "\"", + "84": "}", + "85": "[", + "86": "t", + "87": "m", + "88": "p", + "89": "_", + "90": "v", + "91": "i", + "92": "c", + "93": "t", + "94": "o", + "95": "r", + "96": "i", + "97": "a", + "98": "m", + "99": "e", + "100": "t", + "101": "r", + "102": "i", + "103": "c", + "104": "s", + "105": "_", + "106": "p", + "107": "r", + "108": "e", + "109": "t", + "110": "t", + "111": "i", + "112": "f", + "113": "y", + "114": "_", + "115": "q", + "116": "u", + "117": "e", + "118": "r", + "119": "y", + "120": "_", + "121": "_", + "122": "r", + "123": "a", + "124": "t", + "125": "e", + "126": "_", + "127": "i", + "128": "n", + "129": "t", + "130": "e", + "131": "r", + "132": "v", + "133": "a", + "134": "l", + "135": "]", + "136": "\n", + "137": " ", + "138": " ", + "139": ")", + "140": "\n", + "141": " ", + "142": " ", + "143": " ", + "144": " ", + "145": "/", + "146": "\n", + "147": " ", + "148": " ", + "149": "p", + "150": "r", + "151": "o", + "152": "c", + "153": "e", + "154": "s", + "155": "s", + "156": "_", + "157": "c", + "158": "p", + "159": "u", + "160": "_", + "161": "c", + "162": "o", + "163": "r", + "164": "e", + "165": "s", + "166": "_", + "167": "a", + "168": "v", + "169": "a", + "170": "i", + "171": "l", + "172": "a", + "173": "b", + "174": "l", + "175": "e", + "176": "{", + "177": "j", + "178": "o", + "179": "b", + "180": "=", + "181": "~", + "182": "\"", + "183": "$", + "184": "j", + "185": "o", + "186": "b", + "187": "\"", + "188": ",", + "189": "i", + "190": "n", + "191": "s", + "192": "t", + "193": "a", + "194": "n", + "195": "c", + "196": "e", + "197": "=", + "198": "~", + "199": "\"", + "200": "$", + "201": "i", + "202": "n", + "203": "s", + "204": "t", + "205": "a", + "206": "n", + "207": "c", + "208": "e", + "209": "_", + "210": "s", + "211": "t", + "212": "o", + "213": "r", + "214": "a", + "215": "g", + "216": "e", + "217": "\"", + "218": "}", + "219": "\n", + "220": ")", + "editorMode": "code", + "expr": "max(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance_storage\"}\n)", + "legendFormat": "max", + "range": true, + "refId": "A" + }, + { + "0": "m", + "1": "a", + "2": "x", + "3": "(", + "4": "\n", + "5": " ", + "6": " ", + "7": "r", + "8": "a", + "9": "t", + "10": "e", + "11": "(", + "12": "\n", + "13": " ", + "14": " ", + "15": " ", + "16": " ", + "17": "p", + "18": "r", + "19": "o", + "20": "c", + "21": "e", + "22": "s", + "23": "s", + "24": "_", + "25": "c", + "26": "p", + "27": "u", + "28": "_", + "29": "s", + "30": "e", + "31": "c", + "32": "o", + "33": "n", + "34": "d", + "35": "s", + "36": "_", + "37": "t", + "38": "o", + "39": "t", + "40": "a", + "41": "l", + "42": "{", + "43": "j", + "44": "o", + "45": "b", + "46": "=", + "47": "~", + "48": "\"", + "49": "$", + "50": "j", + "51": "o", + "52": "b", + "53": "\"", + "54": ",", + "55": "i", + "56": "n", + "57": "s", + "58": "t", + "59": "a", + "60": "n", + "61": "c", + "62": "e", + "63": "=", + "64": "~", + "65": "\"", + "66": "$", + "67": "i", + "68": "n", + "69": "s", + "70": "t", + "71": "a", + "72": "n", + "73": "c", + "74": "e", + "75": "_", + "76": "s", + "77": "t", + "78": "o", + "79": "r", + "80": "a", + "81": "g", + "82": "e", + "83": "\"", + "84": "}", + "85": "[", + "86": "t", + "87": "m", + "88": "p", + "89": "_", + "90": "v", + "91": "i", + "92": "c", + "93": "t", + "94": "o", + "95": "r", + "96": "i", + "97": "a", + "98": "m", + "99": "e", + "100": "t", + "101": "r", + "102": "i", + "103": "c", + "104": "s", + "105": "_", + "106": "p", + "107": "r", + "108": "e", + "109": "t", + "110": "t", + "111": "i", + "112": "f", + "113": "y", + "114": "_", + "115": "q", + "116": "u", + "117": "e", + "118": "r", + "119": "y", + "120": "_", + "121": "_", + "122": "r", + "123": "a", + "124": "t", + "125": "e", + "126": "_", + "127": "i", + "128": "n", + "129": "t", + "130": "e", + "131": "r", + "132": "v", + "133": "a", + "134": "l", + "135": "]", + "136": "\n", + "137": " ", + "138": " ", + "139": ")", + "140": "\n", + "141": " ", + "142": " ", + "143": " ", + "144": " ", + "145": "/", + "146": "\n", + "147": " ", + "148": " ", + "149": "p", + "150": "r", + "151": "o", + "152": "c", + "153": "e", + "154": "s", + "155": "s", + "156": "_", + "157": "c", + "158": "p", + "159": "u", + "160": "_", + "161": "c", + "162": "o", + "163": "r", + "164": "e", + "165": "s", + "166": "_", + "167": "a", + "168": "v", + "169": "a", + "170": "i", + "171": "l", + "172": "a", + "173": "b", + "174": "l", + "175": "e", + "176": "{", + "177": "j", + "178": "o", + "179": "b", + "180": "=", + "181": "~", + "182": "\"", + "183": "$", + "184": "j", + "185": "o", + "186": "b", + "187": "\"", + "188": ",", + "189": "i", + "190": "n", + "191": "s", + "192": "t", + "193": "a", + "194": "n", + "195": "c", + "196": "e", + "197": "=", + "198": "~", + "199": "\"", + "200": "$", + "201": "i", + "202": "n", + "203": "s", + "204": "t", + "205": "a", + "206": "n", + "207": "c", + "208": "e", + "209": "_", + "210": "s", + "211": "t", + "212": "o", + "213": "r", + "214": "a", + "215": "g", + "216": "e", + "217": "\"", + "218": "}", + "219": "\n", + "220": ")", + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "min(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance_storage\"}\n)", + "hide": false, + "legendFormat": "min", + "range": true, + "refId": "B" + }, + { + "0": "m", + "1": "a", + "2": "x", + "3": "(", + "4": "\n", + "5": " ", + "6": " ", + "7": "r", + "8": "a", + "9": "t", + "10": "e", + "11": "(", + "12": "\n", + "13": " ", + "14": " ", + "15": " ", + "16": " ", + "17": "p", + "18": "r", + "19": "o", + "20": "c", + "21": "e", + "22": "s", + "23": "s", + "24": "_", + "25": "c", + "26": "p", + "27": "u", + "28": "_", + "29": "s", + "30": "e", + "31": "c", + "32": "o", + "33": "n", + "34": "d", + "35": "s", + "36": "_", + "37": "t", + "38": "o", + "39": "t", + "40": "a", + "41": "l", + "42": "{", + "43": "j", + "44": "o", + "45": "b", + "46": "=", + "47": "~", + "48": "\"", + "49": "$", + "50": "j", + "51": "o", + "52": "b", + "53": "\"", + "54": ",", + "55": "i", + "56": "n", + "57": "s", + "58": "t", + "59": "a", + "60": "n", + "61": "c", + "62": "e", + "63": "=", + "64": "~", + "65": "\"", + "66": "$", + "67": "i", + "68": "n", + "69": "s", + "70": "t", + "71": "a", + "72": "n", + "73": "c", + "74": "e", + "75": "_", + "76": "s", + "77": "t", + "78": "o", + "79": "r", + "80": "a", + "81": "g", + "82": "e", + "83": "\"", + "84": "}", + "85": "[", + "86": "t", + "87": "m", + "88": "p", + "89": "_", + "90": "v", + "91": "i", + "92": "c", + "93": "t", + "94": "o", + "95": "r", + "96": "i", + "97": "a", + "98": "m", + "99": "e", + "100": "t", + "101": "r", + "102": "i", + "103": "c", + "104": "s", + "105": "_", + "106": "p", + "107": "r", + "108": "e", + "109": "t", + "110": "t", + "111": "i", + "112": "f", + "113": "y", + "114": "_", + "115": "q", + "116": "u", + "117": "e", + "118": "r", + "119": "y", + "120": "_", + "121": "_", + "122": "r", + "123": "a", + "124": "t", + "125": "e", + "126": "_", + "127": "i", + "128": "n", + "129": "t", + "130": "e", + "131": "r", + "132": "v", + "133": "a", + "134": "l", + "135": "]", + "136": "\n", + "137": " ", + "138": " ", + "139": ")", + "140": "\n", + "141": " ", + "142": " ", + "143": " ", + "144": " ", + "145": "/", + "146": "\n", + "147": " ", + "148": " ", + "149": "p", + "150": "r", + "151": "o", + "152": "c", + "153": "e", + "154": "s", + "155": "s", + "156": "_", + "157": "c", + "158": "p", + "159": "u", + "160": "_", + "161": "c", + "162": "o", + "163": "r", + "164": "e", + "165": "s", + "166": "_", + "167": "a", + "168": "v", + "169": "a", + "170": "i", + "171": "l", + "172": "a", + "173": "b", + "174": "l", + "175": "e", + "176": "{", + "177": "j", + "178": "o", + "179": "b", + "180": "=", + "181": "~", + "182": "\"", + "183": "$", + "184": "j", + "185": "o", + "186": "b", + "187": "\"", + "188": ",", + "189": "i", + "190": "n", + "191": "s", + "192": "t", + "193": "a", + "194": "n", + "195": "c", + "196": "e", + "197": "=", + "198": "~", + "199": "\"", + "200": "$", + "201": "i", + "202": "n", + "203": "s", + "204": "t", + "205": "a", + "206": "n", + "207": "c", + "208": "e", + "209": "_", + "210": "s", + "211": "t", + "212": "o", + "213": "r", + "214": "a", + "215": "g", + "216": "e", + "217": "\"", + "218": "}", + "219": "\n", + "220": ")", + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "quantile(0.5,\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance_storage\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance_storage\", instance=~\"$instance\"}\n)", + "hide": false, + "legendFormat": "median", + "range": true, + "refId": "C" + } + ], + "title": "CPU % usage ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "How much memory (RAM) the VictoriaLogs process is actually using, compared to its allowed container or system limit. See 'Memory Usage' panel for a detailed breakdown.\n\n- Good: Below 70% most of the time, maybe spiking a bit under load.\n- Bad: Above 90% for more than 5 minutes = risk of out-of-memory (`OOM`) kill.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 133 + }, + "id": 117, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": true, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n max_over_time(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance_storage\"}\n)", + "interval": "", + "legendFormat": "max", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "min(\n max_over_time(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance_storage\"}\n)", + "hide": false, + "interval": "", + "legendFormat": "min", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "quantile(0.5,\n max_over_time(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance_storage\"}\n)", + "hide": false, + "interval": "", + "legendFormat": "median", + "range": true, + "refId": "C" + } + ], + "title": "Physical memory usage ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "description": "Disk space usage and limits for VictoriaLogs storage. Tracks current data usage against the configured retention limit and total disk space.\n\nThe orange line indicates the space retention limit. When usage approaches this limit, older data will be automatically deleted. If the space retention limit (`-retention.maxDiskSpaceUsageBytes`) is not specified, it won't show.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "max space retention of instances" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + }, + { + "id": "custom.insertNulls", + "value": 3600000 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 141 + }, + "id": 209, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_max_disk_space_usage_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) by (job)\n/\nmax(vl_total_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by (job) > 0", + "hide": false, + "interval": "", + "legendFormat": "max space retention of instances", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) by (job)\n/\nmax(vl_total_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by (job)", + "hide": false, + "interval": "", + "legendFormat": "{{job}}", + "range": true, + "refId": "B" + } + ], + "title": "Disk ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Rate of incoming query requests by endpoint path. This metric tracks the query load on VictoriaLogs, including different query interfaces and internal operations. \n\nHigher rates indicate increased query activity from users or applications. Monitor for sudden spikes that might indicate new dashboards, automated queries, or potential performance issues. Sustained high rates may require scaling query processing capacity.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 141 + }, + "id": 129, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])) by (path) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "range": true, + "refId": "A" + } + ], + "title": "HTTP request rate ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "99th percentile of HTTP request duration. This represents the time it takes for 99% of HTTP operations to complete. High values indicate slow ingestion/querying performance that could affect overall system throughput. Spikes may suggest storage bottlenecks, resource contention, or inefficient data processing.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 149 + }, + "id": 101, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance_storage\", quantile=\"0.99\"}) by (path) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "range": true, + "refId": "A" + } + ], + "title": "HTTP request duration p99 ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "99th percentile response time for VictoriaLogs HTTP endpoints, grouped by instance and path. This means 99% of requests are faster than this value. **Lower numbers are better**, as they indicate faster responses and fewer slow requests.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*(bytes)/" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 149 + }, + "id": 102, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_rows_ingested_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])) by (type) > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_bytes_ingested_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])) by (type) > 0", + "hide": false, + "instant": false, + "legendFormat": "{{type}} (bytes)", + "range": true, + "refId": "B" + } + ], + "title": "Logs ingestion rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Number of concurrent insert operations has reached the configured limit: `-maxConcurrentInserts` (default: 2x CPU cores)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 157 + }, + "id": 100, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(increase(vm_concurrent_insert_limit_reached_total{job=~\"$job\", instance=~\"$instance_storage\"})) by (job)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Concurrent insert limit reached ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Number of insert requests that timed out while waiting for available concurrency slots. This indicates sustained ingestion overload beyond configured limits.\n\nHigh values suggest:\n- Insert queue is consistently full\n- Insert requests waiting too long for execution slots\n- System under sustained heavy ingestion load\n- Need for horizontal scaling or ingestion optimization\n\nCombined with `Insert concurrency limit reached`, provides complete picture of ingestion rejection patterns.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 157 + }, + "id": 202, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(vm_concurrent_insert_limit_timeout_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])) by (job)", + "interval": "", + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Insert timeouts ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Number of active message processors handling log ingestion. These processors parse and process incoming log messages before storage. The count typically correlates with the ingestion load.\n\nIf this number is unusually high or inflated (which is rare), check memory usage. It may indicate a heavy, concurrently ingestion load or processing bottlenecks that could benefit from performance tuning.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 165 + }, + "id": 112, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vl_insert_processors_count{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval]) by (job)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vm_concurrent_insert_capacity{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "soft limit", + "range": true, + "refId": "B" + } + ], + "title": "Message processors ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Number of log rows waiting to be written to storage, categorized by type. Pending rows indicate temporary queuing during ingestion. Consistently high values may suggest storage write bottlenecks or insufficient write capacity.\n\nPending rows are flushed in two ways:\n\n- After a specific time period (typically 1 second)\n- When the pending row size exceeds a threshold (typically 1.75 MB)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 165 + }, + "id": 98, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(vl_pending_rows{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval]) by (type)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Pending rows ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "99th percentile duration of background flush operations by type (max across instances). High values may indicate disk pressure or heavy ingestion. Correlate with `I/O` panels.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 173 + }, + "id": 118, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": false + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vl_insert_flush_duration_seconds{job=~\"$job\", instance=~\"$instance_storage\", quantile=\"0.99\"}) by (type) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Flush duration p99 ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "The number of new [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) created over the last 24 hours. Lower is better. See [How to determine which fields must be associated with log streams?](https://docs.victoriametrics.com/victorialogs/keyconcepts/#how-to-determine-which-fields-must-be-associated-with-log-streams)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 173 + }, + "id": 103, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(increase(vl_streams_created_total{job=~\"$job\", instance=~\"$instance_storage\"}[1d])) by (job)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Streams churn rate 24h ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Rate of incoming query requests by endpoint path. This metric tracks the query load on VictoriaLogs, including different query interfaces and internal operations. \n\nHigher rates indicate increased query activity from users or applications. Monitor for sudden spikes that might indicate new dashboards, automated queries, or potential performance issues. Sustained high rates may require scaling query processing capacity.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 181 + }, + "id": 104, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance_storage\",path=~\"^/(internal/)?select.*\"}[$__rate_interval])) by (path) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "range": true, + "refId": "A" + } + ], + "title": "Query requests rate ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "The number of concurrent select operations has reached the configured limit: `-search.maxConcurrentRequests`. To check the default capacity, refer to the `-vl_concurrent_select_capacity` metric.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 181 + }, + "id": 205, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(increase(vl_concurrent_select_limit_reached_total{job=~\"$job\", instance=~\"$instance_storage\"})) by (job)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Concurrent query limit reached ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Number of queries that timed out while waiting for available concurrency slots. This indicates sustained query overload beyond configured limits.\n\nHigh values suggest:\n- Query queue is consistently full\n- Queries waiting too long for execution slots\n- System under sustained heavy load\n- Need for horizontal scaling or query optimization\n\nCombined with `Select concurrency limit reached`, provides complete picture of query rejection patterns.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 189 + }, + "id": 200, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_concurrent_select_limit_timeout_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])) by (job) > 0", + "interval": "", + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Query timeouts ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Total number of time-based (daily) partitions in storage. The number typically grows over time as new data arrives and is partitioned by time periods. \n\nExcessive partition counts may indicate retention policy issues or very high data ingestion rates that could impact query performance.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 0, + "y": 196 + }, + "id": 90, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(vl_partitions{job=~\"$job\", instance=~\"$instance_storage\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Partition Count ($instance_storage)", + "type": "stat" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "description": "Number of storage parts (data files) in each tier. More parts mean fragmentation; fewer parts suggest successful merging. High part counts may slow queries and trigger background merge operations.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 3, + "y": 196 + }, + "id": 91, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_storage_parts{job=~\"$job\", instance=~\"$instance_storage\"}) by(type)", + "hide": false, + "interval": "", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Part count max by type ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "description": "Number of storage merge operations by type (sum across instances). Merges compact smaller parts into larger ones; bursts are normal after activity spikes.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 196 + }, + "id": 93, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(vl_merges_total{job=~\"$job\", instance=~\"$instance_storage\"}[$__rate_interval])) by(type)", + "hide": false, + "interval": "$__rate_interval", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Merge events ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "description": "99th percentile duration of merge operations by storage type. Merge operations combine smaller storage parts into larger ones for optimization. \n\nNormal merge durations vary by storage type and data volume. Consistently high durations may indicate storage performance issues, high write load, or insufficient resources for background operations. Monitor for trends that could impact overall system performance.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 204 + }, + "id": 94, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_merge_duration_seconds{job=~\"$job\", instance=~\"$instance_storage\", quantile=\"0.99\"}) by (type) > 0", + "hide": false, + "interval": "", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Merge duration p99 ($instance_storage)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "description": "99th percentile of data volume processed during merge operations by storage type. \n\nThis metric indicates the scale of background storage optimization activities. Larger merge sizes generally improve storage efficiency but require more resources. Consistently high values may indicate heavy write loads or large storage parts that need optimization. Monitor correlation with merge duration for performance insights.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 204 + }, + "id": 95, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_merge_bytes{job=~\"$job\", instance=~\"$instance_storage\", quantile=\"0.99\"}) by (type) > 0", + "hide": false, + "interval": "", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Merge bytes p99 ($instance_storage)", + "type": "timeseries" + } + ], + "title": "vlstorage", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 59 + }, + "id": 109, + "panels": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "CPU utilization per instance as a fraction of available cores. Sustained values above 80% indicate CPU saturation; correlate with CPU PSI and query latency.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 134 + }, + "id": 206, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "0": "m", + "1": "a", + "2": "x", + "3": "(", + "4": "\n", + "5": " ", + "6": " ", + "7": "r", + "8": "a", + "9": "t", + "10": "e", + "11": "(", + "12": "\n", + "13": " ", + "14": " ", + "15": " ", + "16": " ", + "17": "p", + "18": "r", + "19": "o", + "20": "c", + "21": "e", + "22": "s", + "23": "s", + "24": "_", + "25": "c", + "26": "p", + "27": "u", + "28": "_", + "29": "s", + "30": "e", + "31": "c", + "32": "o", + "33": "n", + "34": "d", + "35": "s", + "36": "_", + "37": "t", + "38": "o", + "39": "t", + "40": "a", + "41": "l", + "42": "{", + "43": "j", + "44": "o", + "45": "b", + "46": "=", + "47": "~", + "48": "\"", + "49": "$", + "50": "j", + "51": "o", + "52": "b", + "53": "\"", + "54": ",", + "55": "i", + "56": "n", + "57": "s", + "58": "t", + "59": "a", + "60": "n", + "61": "c", + "62": "e", + "63": "=", + "64": "~", + "65": "\"", + "66": "$", + "67": "i", + "68": "n", + "69": "s", + "70": "t", + "71": "a", + "72": "n", + "73": "c", + "74": "e", + "75": "_", + "76": "s", + "77": "t", + "78": "o", + "79": "r", + "80": "a", + "81": "g", + "82": "e", + "83": "\"", + "84": "}", + "85": "[", + "86": "t", + "87": "m", + "88": "p", + "89": "_", + "90": "v", + "91": "i", + "92": "c", + "93": "t", + "94": "o", + "95": "r", + "96": "i", + "97": "a", + "98": "m", + "99": "e", + "100": "t", + "101": "r", + "102": "i", + "103": "c", + "104": "s", + "105": "_", + "106": "p", + "107": "r", + "108": "e", + "109": "t", + "110": "t", + "111": "i", + "112": "f", + "113": "y", + "114": "_", + "115": "q", + "116": "u", + "117": "e", + "118": "r", + "119": "y", + "120": "_", + "121": "_", + "122": "r", + "123": "a", + "124": "t", + "125": "e", + "126": "_", + "127": "i", + "128": "n", + "129": "t", + "130": "e", + "131": "r", + "132": "v", + "133": "a", + "134": "l", + "135": "]", + "136": "\n", + "137": " ", + "138": " ", + "139": ")", + "140": "\n", + "141": " ", + "142": " ", + "143": " ", + "144": " ", + "145": "/", + "146": "\n", + "147": " ", + "148": " ", + "149": "p", + "150": "r", + "151": "o", + "152": "c", + "153": "e", + "154": "s", + "155": "s", + "156": "_", + "157": "c", + "158": "p", + "159": "u", + "160": "_", + "161": "c", + "162": "o", + "163": "r", + "164": "e", + "165": "s", + "166": "_", + "167": "a", + "168": "v", + "169": "a", + "170": "i", + "171": "l", + "172": "a", + "173": "b", + "174": "l", + "175": "e", + "176": "{", + "177": "j", + "178": "o", + "179": "b", + "180": "=", + "181": "~", + "182": "\"", + "183": "$", + "184": "j", + "185": "o", + "186": "b", + "187": "\"", + "188": ",", + "189": "i", + "190": "n", + "191": "s", + "192": "t", + "193": "a", + "194": "n", + "195": "c", + "196": "e", + "197": "=", + "198": "~", + "199": "\"", + "200": "$", + "201": "i", + "202": "n", + "203": "s", + "204": "t", + "205": "a", + "206": "n", + "207": "c", + "208": "e", + "209": "_", + "210": "s", + "211": "t", + "212": "o", + "213": "r", + "214": "a", + "215": "g", + "216": "e", + "217": "\"", + "218": "}", + "219": "\n", + "220": ")", + "editorMode": "code", + "expr": "max(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance_insert\"}\n)", + "legendFormat": "max", + "range": true, + "refId": "A" + }, + { + "0": "m", + "1": "a", + "2": "x", + "3": "(", + "4": "\n", + "5": " ", + "6": " ", + "7": "r", + "8": "a", + "9": "t", + "10": "e", + "11": "(", + "12": "\n", + "13": " ", + "14": " ", + "15": " ", + "16": " ", + "17": "p", + "18": "r", + "19": "o", + "20": "c", + "21": "e", + "22": "s", + "23": "s", + "24": "_", + "25": "c", + "26": "p", + "27": "u", + "28": "_", + "29": "s", + "30": "e", + "31": "c", + "32": "o", + "33": "n", + "34": "d", + "35": "s", + "36": "_", + "37": "t", + "38": "o", + "39": "t", + "40": "a", + "41": "l", + "42": "{", + "43": "j", + "44": "o", + "45": "b", + "46": "=", + "47": "~", + "48": "\"", + "49": "$", + "50": "j", + "51": "o", + "52": "b", + "53": "\"", + "54": ",", + "55": "i", + "56": "n", + "57": "s", + "58": "t", + "59": "a", + "60": "n", + "61": "c", + "62": "e", + "63": "=", + "64": "~", + "65": "\"", + "66": "$", + "67": "i", + "68": "n", + "69": "s", + "70": "t", + "71": "a", + "72": "n", + "73": "c", + "74": "e", + "75": "_", + "76": "s", + "77": "t", + "78": "o", + "79": "r", + "80": "a", + "81": "g", + "82": "e", + "83": "\"", + "84": "}", + "85": "[", + "86": "t", + "87": "m", + "88": "p", + "89": "_", + "90": "v", + "91": "i", + "92": "c", + "93": "t", + "94": "o", + "95": "r", + "96": "i", + "97": "a", + "98": "m", + "99": "e", + "100": "t", + "101": "r", + "102": "i", + "103": "c", + "104": "s", + "105": "_", + "106": "p", + "107": "r", + "108": "e", + "109": "t", + "110": "t", + "111": "i", + "112": "f", + "113": "y", + "114": "_", + "115": "q", + "116": "u", + "117": "e", + "118": "r", + "119": "y", + "120": "_", + "121": "_", + "122": "r", + "123": "a", + "124": "t", + "125": "e", + "126": "_", + "127": "i", + "128": "n", + "129": "t", + "130": "e", + "131": "r", + "132": "v", + "133": "a", + "134": "l", + "135": "]", + "136": "\n", + "137": " ", + "138": " ", + "139": ")", + "140": "\n", + "141": " ", + "142": " ", + "143": " ", + "144": " ", + "145": "/", + "146": "\n", + "147": " ", + "148": " ", + "149": "p", + "150": "r", + "151": "o", + "152": "c", + "153": "e", + "154": "s", + "155": "s", + "156": "_", + "157": "c", + "158": "p", + "159": "u", + "160": "_", + "161": "c", + "162": "o", + "163": "r", + "164": "e", + "165": "s", + "166": "_", + "167": "a", + "168": "v", + "169": "a", + "170": "i", + "171": "l", + "172": "a", + "173": "b", + "174": "l", + "175": "e", + "176": "{", + "177": "j", + "178": "o", + "179": "b", + "180": "=", + "181": "~", + "182": "\"", + "183": "$", + "184": "j", + "185": "o", + "186": "b", + "187": "\"", + "188": ",", + "189": "i", + "190": "n", + "191": "s", + "192": "t", + "193": "a", + "194": "n", + "195": "c", + "196": "e", + "197": "=", + "198": "~", + "199": "\"", + "200": "$", + "201": "i", + "202": "n", + "203": "s", + "204": "t", + "205": "a", + "206": "n", + "207": "c", + "208": "e", + "209": "_", + "210": "s", + "211": "t", + "212": "o", + "213": "r", + "214": "a", + "215": "g", + "216": "e", + "217": "\"", + "218": "}", + "219": "\n", + "220": ")", + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "min(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance_insert\"}\n)", + "hide": false, + "legendFormat": "min", + "range": true, + "refId": "B" + }, + { + "0": "m", + "1": "a", + "2": "x", + "3": "(", + "4": "\n", + "5": " ", + "6": " ", + "7": "r", + "8": "a", + "9": "t", + "10": "e", + "11": "(", + "12": "\n", + "13": " ", + "14": " ", + "15": " ", + "16": " ", + "17": "p", + "18": "r", + "19": "o", + "20": "c", + "21": "e", + "22": "s", + "23": "s", + "24": "_", + "25": "c", + "26": "p", + "27": "u", + "28": "_", + "29": "s", + "30": "e", + "31": "c", + "32": "o", + "33": "n", + "34": "d", + "35": "s", + "36": "_", + "37": "t", + "38": "o", + "39": "t", + "40": "a", + "41": "l", + "42": "{", + "43": "j", + "44": "o", + "45": "b", + "46": "=", + "47": "~", + "48": "\"", + "49": "$", + "50": "j", + "51": "o", + "52": "b", + "53": "\"", + "54": ",", + "55": "i", + "56": "n", + "57": "s", + "58": "t", + "59": "a", + "60": "n", + "61": "c", + "62": "e", + "63": "=", + "64": "~", + "65": "\"", + "66": "$", + "67": "i", + "68": "n", + "69": "s", + "70": "t", + "71": "a", + "72": "n", + "73": "c", + "74": "e", + "75": "_", + "76": "s", + "77": "t", + "78": "o", + "79": "r", + "80": "a", + "81": "g", + "82": "e", + "83": "\"", + "84": "}", + "85": "[", + "86": "t", + "87": "m", + "88": "p", + "89": "_", + "90": "v", + "91": "i", + "92": "c", + "93": "t", + "94": "o", + "95": "r", + "96": "i", + "97": "a", + "98": "m", + "99": "e", + "100": "t", + "101": "r", + "102": "i", + "103": "c", + "104": "s", + "105": "_", + "106": "p", + "107": "r", + "108": "e", + "109": "t", + "110": "t", + "111": "i", + "112": "f", + "113": "y", + "114": "_", + "115": "q", + "116": "u", + "117": "e", + "118": "r", + "119": "y", + "120": "_", + "121": "_", + "122": "r", + "123": "a", + "124": "t", + "125": "e", + "126": "_", + "127": "i", + "128": "n", + "129": "t", + "130": "e", + "131": "r", + "132": "v", + "133": "a", + "134": "l", + "135": "]", + "136": "\n", + "137": " ", + "138": " ", + "139": ")", + "140": "\n", + "141": " ", + "142": " ", + "143": " ", + "144": " ", + "145": "/", + "146": "\n", + "147": " ", + "148": " ", + "149": "p", + "150": "r", + "151": "o", + "152": "c", + "153": "e", + "154": "s", + "155": "s", + "156": "_", + "157": "c", + "158": "p", + "159": "u", + "160": "_", + "161": "c", + "162": "o", + "163": "r", + "164": "e", + "165": "s", + "166": "_", + "167": "a", + "168": "v", + "169": "a", + "170": "i", + "171": "l", + "172": "a", + "173": "b", + "174": "l", + "175": "e", + "176": "{", + "177": "j", + "178": "o", + "179": "b", + "180": "=", + "181": "~", + "182": "\"", + "183": "$", + "184": "j", + "185": "o", + "186": "b", + "187": "\"", + "188": ",", + "189": "i", + "190": "n", + "191": "s", + "192": "t", + "193": "a", + "194": "n", + "195": "c", + "196": "e", + "197": "=", + "198": "~", + "199": "\"", + "200": "$", + "201": "i", + "202": "n", + "203": "s", + "204": "t", + "205": "a", + "206": "n", + "207": "c", + "208": "e", + "209": "_", + "210": "s", + "211": "t", + "212": "o", + "213": "r", + "214": "a", + "215": "g", + "216": "e", + "217": "\"", + "218": "}", + "219": "\n", + "220": ")", + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "quantile(0.5,\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance_insert\"}\n)", + "hide": false, + "legendFormat": "median", + "range": true, + "refId": "C" + } + ], + "title": "CPU % usage ($instance_insert)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "99th percentile of insert operation duration. This represents the time it takes for 99% of insert operations to complete. High values indicate slow ingestion performance that could affect overall system throughput. Spikes may suggest storage bottlenecks, resource contention, or inefficient data processing.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 134 + }, + "id": 127, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance_insert_all\", instance=~\"$instance\", quantile=\"0.99\"}) by (path) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "range": true, + "refId": "A" + } + ], + "title": "HTTP request duration p99 ($instance_insert)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Rate of incoming query requests by endpoint path. This metric tracks the query load on VictoriaLogs, including different query interfaces and internal operations. \n\nHigher rates indicate increased query activity from users or applications. Monitor for sudden spikes that might indicate new dashboards, automated queries, or potential performance issues. Sustained high rates may require scaling query processing capacity.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 142 + }, + "id": 130, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])) by (path) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "range": true, + "refId": "A" + } + ], + "title": "HTTP request rate ($instance_insert)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "99th percentile response time for VictoriaLogs HTTP endpoints, grouped by instance and path. This means 99% of requests are faster than this value. **Lower numbers are better**, as they indicate faster responses and fewer slow requests.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*(bytes)/" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 142 + }, + "id": 113, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_rows_ingested_total{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])) by (type) > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_bytes_ingested_total{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])) by (type) > 0", + "hide": false, + "instant": false, + "legendFormat": "{{type}} (bytes)", + "range": true, + "refId": "B" + } + ], + "title": "Logs ingestion rate ($instance_insert)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Number of concurrent insert operations has reached the configured limit: `-maxConcurrentInserts` (default: 2x CPU cores)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 150 + }, + "id": 207, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(increase(vm_concurrent_insert_limit_reached_total{job=~\"$job\", instance=~\"$instance_insert\"})) by (job)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Concurrent insert limit reached ($instance_insert)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Number of insert requests that timed out while waiting for available concurrency slots. This indicates sustained ingestion overload beyond configured limits.\n\nHigh values suggest:\n- Insert queue is consistently full\n- Insert requests waiting too long for execution slots\n- System under sustained heavy ingestion load\n- Need for horizontal scaling or ingestion optimization\n\nCombined with `Insert concurrency limit reached`, provides complete picture of ingestion rejection patterns.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 150 + }, + "id": 203, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vm_concurrent_insert_limit_timeout_total{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])) by (job) > 0", + "interval": "", + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Insert timeouts ($instance_insert)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Number of active message processors handling log ingestion. These processors parse and process incoming log messages before storage. The count typically correlates with the ingestion load.\n\nIf this number is unusually high or inflated (which is rare), check memory usage. It may indicate a heavy ingestion load or processing bottlenecks that could benefit from performance tuning.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -1800,8 +8713,8 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", + "fillOpacity": 10, + "gradientMode": "opacity", "hideFrom": { "legend": false, "tooltip": false, @@ -1824,6 +8737,7 @@ "mode": "off" } }, + "decimals": 0, "links": [], "mappings": [], "min": 0, @@ -1831,7 +8745,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 }, { "color": "red", @@ -1846,10 +8761,10 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 543 + "x": 0, + "y": 158 }, - "id": 26, + "id": 114, "options": { "legend": { "calcs": [ @@ -1869,7 +8784,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1877,16 +8792,29 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "increase(vl_streams_created_total{job=~\"$job\", instance=~\"$instance\"}[1d])", + "expr": "max(vl_insert_processors_count{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval]) by (job)", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "new log streams over 24h", + "legendFormat": "{{job}}", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vm_concurrent_insert_capacity{job=~\"$job\", instance=~\"$instance_insert\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "soft limit", + "range": true, + "refId": "B" } ], - "title": "Log stream churn rate", + "title": "Message processors ($instance_insert)", "type": "timeseries" }, { @@ -1894,92 +8822,92 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Flags explicitly set to non-default values", + "description": "99th percentile duration of flush operations in vlinsert by type (max across instances). Spikes imply slow disks or backpressure from remote storage.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" }, "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "inspect": false + "thresholdsStyle": { + "mode": "off" + } }, + "links": [], "mappings": [], + "min": 0, "thresholds": { "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 }, { "color": "red", "value": 80 } ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Time" - }, - "properties": [ - { - "id": "custom.hidden", - "value": true - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value" - }, - "properties": [ - { - "id": "custom.hidden", - "value": true - } - ] }, - { - "matcher": { - "id": "byName", - "options": "job" - }, - "properties": [ - { - "id": "custom.hidden", - "value": true - } - ] - } - ] + "unit": "s" + }, + "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 607 + "x": 12, + "y": 158 }, - "id": 70, + "id": 119, "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" ], - "show": false + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, - "showHeader": true + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1987,28 +8915,27 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": false, - "expr": "sum(flag{is_set=\"true\", job=~\"$job\", instance=~\"$instance\"}) by(job, instance, name, value)", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, + "expr": "sum(vl_insert_flush_duration_seconds{job=~\"$job\", instance=~\"$instance_insert\", quantile=\"0.99\"}) by (type) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "range": true, "refId": "A" } ], - "title": "Non-default flags", - "type": "table" + "title": "Flush duration p99 ($instance_insert)", + "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows how many log entries were ignored or dropped on insertion due to various reasons:\n* timestamp out of retention period or timestamp in future;\n* number of fields per entry exceeded.", + "description": "Number of active log streams tracked by vlinsert per instance. Steady growth indicates increasing stream cardinality; review stream field design.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -2043,12 +8970,15 @@ "mode": "off" } }, + "links": [], "mappings": [], + "min": 0, "thresholds": { "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 }, { "color": "red", @@ -2063,10 +8993,10 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 607 + "x": 0, + "y": 166 }, - "id": 71, + "id": 120, "options": { "legend": { "calcs": [ @@ -2083,10 +9013,10 @@ "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "none" + "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2094,42 +9024,28 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(vl_rows_dropped_total{job=~\"$job\", instance=~\"$instance\"}[1h])) by (job, reason)", - "hide": false, - "interval": "", - "legendFormat": "{{job}} - {{reason}}", + "expr": "sum(vl_insert_active_streams{job=~\"$job\", instance=~\"$instance_insert\"}) by (job)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "Logs dropped for last 1h", + "title": "Tracked streams ($instance_insert)", "type": "timeseries" - } - ], - "title": "Troubleshooting", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 37 - }, - "id": 28, - "panels": [ + }, { "datasource": { "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Percentage of used memory (resident).\nThe application's performance will significantly degrade when memory usage is close to 100%.", + "description": "Number of remote write send errors over the last 24 hours per vlinsert instance. Non‑zero values typically indicate connectivity issues, timeouts, or 4xx/5xx responses.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "fixedColor": "red", + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -2172,7 +9088,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2180,17 +9096,17 @@ } ] }, - "unit": "percentunit" + "unit": "short" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 150 + "x": 12, + "y": 166 }, - "id": 38, + "id": 121, "options": { "legend": { "calcs": [ @@ -2210,7 +9126,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2218,26 +9134,41 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": false, - "expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by(job)", - "interval": "", - "legendFormat": "__auto", + "expr": "sum(increase(vl_insert_remote_send_errors_total{job=~\"$job\", instance=~\"$instance_insert\"}[24h])) by (job)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "RSS memory % usage ($instance)", + "title": "Send errors over 24h ($instance_insert)", "type": "timeseries" - }, + } + ], + "title": "vlinsert", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 60 + }, + "id": 122, + "panels": [ { "datasource": { "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, + "description": "CPU utilization per vlselect instance as a fraction of available cores. Sustained values above 80% suggest CPU bottlenecks; correlate with query duration.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -2280,7 +9211,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2288,17 +9219,33 @@ } ] }, - "unit": "percentunit" + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2495C", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 150 + "x": 0, + "y": 135 }, - "id": 44, + "id": 99, "options": { "legend": { "calcs": [ @@ -2315,10 +9262,10 @@ "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "none" + "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2327,16 +9274,31 @@ }, "editorMode": "code", "exemplar": false, - "expr": "max(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}\n) by(job)", + "expr": "max(process_cpu_cores_available{job=~\"$job\", instance=~\"$instance_select\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Limit", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance_select\"}[$__rate_interval])) by (job)", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "__auto", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "CPU % usage ($instance)", + "title": "CPU % usage ($instance_select)", "type": "timeseries" }, { @@ -2344,11 +9306,11 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Share for memory allocated by the process itself. When memory usage reaches 100% it will be likely OOM-killed.\nSafe memory usage % considered to be below 80%", + "description": "How much memory (RAM) the VictoriaLogs process is actually using, compared to its allowed container or system limit. See 'Memory Usage' panel for a detailed breakdown.\n\n- Good: Below 70% most of the time, maybe spiking a bit under load.\n- Bad: Above 90% for more than 5 minutes = risk of out-of-memory (`OOM`) kill.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -2373,7 +9335,7 @@ "scaleDistribution": { "type": "linear" }, - "showPoints": "never", + "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", @@ -2391,7 +9353,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2399,17 +9361,33 @@ } ] }, - "unit": "percentunit" + "unit": "bytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 158 + "x": 12, + "y": 135 }, - "id": 42, + "id": 97, "options": { "legend": { "calcs": [ @@ -2424,12 +9402,12 @@ "sortDesc": true }, "tooltip": { - "hideZeros": false, + "hideZeros": true, "mode": "multi", "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2438,14 +9416,28 @@ }, "editorMode": "code", "exemplar": false, - "expr": "max(\n max_over_time(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by(job)", + "expr": "sum(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance_select\"}[$__rate_interval]) by (job)", "interval": "", - "legendFormat": "__auto", + "legendFormat": "{{job}}", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance_select\"})", + "hide": false, + "interval": "", + "legendFormat": "limit", + "range": true, + "refId": "B" } ], - "title": "RSS anonymous memory % usage ($instance)", + "title": "Physical memory usage ($instance_select)", "type": "timeseries" }, { @@ -2453,7 +9445,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.", + "description": "Rate of incoming query requests by endpoint path. This metric tracks the query load on VictoriaLogs, including different query interfaces and internal operations. \n\nHigher rates indicate increased query activity from users or applications. Monitor for sudden spikes that might indicate new dashboards, automated queries, or potential performance issues. Sustained high rates may require scaling query processing capacity.", "fieldConfig": { "defaults": { "color": { @@ -2489,10 +9481,9 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, "links": [], "mappings": [], "min": 0, @@ -2501,21 +9492,25 @@ "steps": [ { "color": "green", - "value": null + "value": 0 + }, + { + "color": "red", + "value": 80 } ] }, - "unit": "s" + "unit": "short" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 158 + "x": 0, + "y": 143 }, - "id": 76, + "id": 125, "options": { "legend": { "calcs": [ @@ -2535,7 +9530,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2543,31 +9538,16 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "max(rate(process_pressure_cpu_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance_select\"}[$__rate_interval])) by (path) > 0", "format": "time_series", "interval": "", - "intervalFactor": 2, - "legendFormat": "{{job}} - waiting", + "intervalFactor": 1, + "legendFormat": "{{path}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "max(rate(process_pressure_cpu_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{job}} - stalled", - "range": true, - "refId": "B" } ], - "title": "CPU pressure", + "title": "HTTP request rate ($instance_select)", "type": "timeseries" }, { @@ -2575,7 +9555,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.", + "description": "99th percentile of query execution duration. This represents the time it takes for 99% of queries to complete:\n\n- High values indicate slow query performance that affects user experience. \n- Spikes may suggest complex queries, resource contention, or inefficient indexes. Monitor for trends that could indicate degrading performance.", "fieldConfig": { "defaults": { "color": { @@ -2590,7 +9570,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2611,10 +9591,9 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, "links": [], "mappings": [], "min": 0, @@ -2623,7 +9602,11 @@ "steps": [ { "color": "green", - "value": null + "value": 0 + }, + { + "color": "red", + "value": 80 } ] }, @@ -2634,10 +9617,10 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 166 + "x": 12, + "y": 143 }, - "id": 77, + "id": 126, "options": { "legend": { "calcs": [ @@ -2657,7 +9640,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2665,31 +9648,15 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "max(rate(process_pressure_memory_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", + "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance_select\", quantile=\"0.99\"}) by (path) > 0", "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{job}} - waiting", + "intervalFactor": 1, + "legendFormat": "{{path}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "max(rate(process_pressure_memory_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{job}} - stalled", - "range": true, - "refId": "B" } ], - "title": "Memory pressure", + "title": "HTTP request duration p99 ($instance_select)", "type": "timeseries" }, { @@ -2697,7 +9664,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the number of bytes read/write from the storage layer.", + "description": "The number of concurrent select operations has reached the configured limit: `-search.maxConcurrentRequests`. To check the default capacity, refer to the `-vl_concurrent_select_capacity` metric.", "fieldConfig": { "defaults": { "color": { @@ -2738,12 +9705,13 @@ }, "links": [], "mappings": [], + "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2751,18 +9719,21 @@ } ] }, - "unit": "bytes" + "unit": "short" }, "overrides": [ { "matcher": { - "id": "byRegexp", - "options": "/read.*/" + "id": "byName", + "options": "limit" }, "properties": [ { - "id": "custom.transform", - "value": "negative-Y" + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } } ] } @@ -2771,10 +9742,10 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 166 + "x": 0, + "y": 151 }, - "id": 52, + "id": 208, "options": { "legend": { "calcs": [ @@ -2791,10 +9762,10 @@ "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "none" + "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2802,32 +9773,16 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) > 0", + "expr": "max(increase(vl_concurrent_select_limit_reached_total{job=~\"$job\", instance=~\"$instance_select\"})) by (job)", "format": "time_series", - "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "read {{job}}", + "legendFormat": "{{job}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "sum(rate(process_io_storage_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) > 0", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "write {{job}}", - "range": true, - "refId": "B" } ], - "title": "Disk writes/reads ($instance)", + "title": "Concurrent query limit reached ($instance_select)", "type": "timeseries" }, { @@ -2835,7 +9790,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the percentage of open file descriptors compared to the limit set in the OS.\nReaching the limit of open files can cause various issues and must be prevented.\n\nSee how to change limits here https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a", + "description": "Number of queries that timed out while waiting for available concurrency slots. This indicates sustained query overload beyond configured limits.\n\nHigh values suggest:\n- Query queue is consistently full\n- Queries waiting too long for execution slots\n- System under sustained heavy load\n- Need for horizontal scaling or query optimization\n\nCombined with `Select concurrency limit reached`, provides complete picture of query rejection patterns.", "fieldConfig": { "defaults": { "color": { @@ -2850,11 +9805,12 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, + "vis": false, "viz": false }, "insertNulls": false, @@ -2874,70 +9830,45 @@ "mode": "off" } }, - "decimals": 2, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", - "value": 80 + "value": 1 } ] }, - "unit": "percentunit" + "unit": "reqps" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "max" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C4162A", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 174 + "x": 12, + "y": 151 }, - "id": 46, + "id": 204, "options": { "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", - "showLegend": true, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2945,17 +9876,14 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "max(\n max_over_time(process_open_fds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_max_fds{job=~\"$job\", instance=~\"$instance\"}\n) by(job)", - "format": "time_series", - "hide": false, + "expr": "sum(increase(vl_concurrent_select_limit_timeout_total{job=~\"$job\", instance=~\"$instance_select\"}[$__rate_interval])) by (job)", "interval": "", - "intervalFactor": 2, - "legendFormat": "__auto", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "Open FDs ($instance)", + "title": "Query timeouts ($instance_select)", "type": "timeseries" }, { @@ -2963,11 +9891,12 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the number of read/write syscalls such as read, pread, write, pwrite.", + "description": "Number of remote send errors reported by vlselect. Check the logs when sending fails to see the details.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "fixedColor": "red", + "mode": "fixed" }, "custom": { "axisBorderShow": false, @@ -3004,12 +9933,13 @@ }, "links": [], "mappings": [], + "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3019,28 +9949,15 @@ }, "unit": "short" }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 174 + "x": 0, + "y": 159 }, - "id": 56, + "id": 133, "options": { "legend": { "calcs": [ @@ -3057,10 +9974,10 @@ "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "none" + "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -3068,43 +9985,41 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(process_io_read_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) > 0", + "expr": "sum(increase(vl_select_remote_send_errors_total{job=~\"$job\", instance=~\"$instance_select\"}[$__rate_interval])) by (job) > 0", "format": "time_series", - "hide": false, - "interval": "", "intervalFactor": 1, - "legendFormat": "read {{job}}", + "legendFormat": "{{type}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "sum(rate(process_io_write_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) > 0", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "write {{job}}", - "range": true, - "refId": "B" } ], - "title": "Disk write/read calls ($instance)", + "title": "Send errors ($instance_select)", "type": "timeseries" - }, + } + ], + "title": "vlselect", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 61 + }, + "id": 151, + "panels": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, + "description": "Number of storage blocks scanned per query (99th percentile). Each block contains logs for a specific time period and field combination. High values indicate queries scanning too many blocks, often caused by:\n\n- Wide time ranges without specific filters\n- Queries missing indexed fields (like `_stream`, `kubernetes.*`)\n- Non-selective filters that don't utilize `bloom filters`\n\nCorrelate with `Bytes/query p99` - if blocks are high but bytes are low, blocks contain little data (good). If both are high, query is reading large amounts of data.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3139,16 +10054,15 @@ "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3156,7 +10070,7 @@ } ] }, - "unit": "short" + "unit": "none" }, "overrides": [] }, @@ -3164,9 +10078,9 @@ "h": 8, "w": 12, "x": 0, - "y": 182 + "y": 136 }, - "id": 50, + "id": 152, "options": { "legend": { "calcs": [ @@ -3186,35 +10100,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(max_over_time(go_goroutines{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "__auto", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_processed_blocks_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job)", + "hide": false, + "interval": "", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "Goroutines ($instance)", + "title": "Blocks/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.", + "description": "Total bytes read from disk per query (99th percentile). This represents the complete `I/O` overhead for query execution, including:\n\n- Block headers and metadata\n- Bloom filter data for candidate selection\n- Column headers and indexes\n- Actual log values and timestamps\n\nHigh values indicate expensive queries. Compare with specific breakdown panels below to identify bottlenecks. Monitor trends over time and correlate with query complexity.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3246,23 +10161,26 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 + }, + { + "color": "red", + "value": 80 } ] }, - "unit": "s" + "unit": "bytes" }, "overrides": [] }, @@ -3270,9 +10188,9 @@ "h": 8, "w": 12, "x": 12, - "y": 182 + "y": 136 }, - "id": 78, + "id": 150, "options": { "legend": { "calcs": [ @@ -3289,53 +10207,39 @@ "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "desc" + "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "max(rate(process_pressure_io_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{job}} - waiting", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "max(rate(process_pressure_io_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_total_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job)", "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{job}} - stalled", + "legendFormat": "{{job}}", "range": true, - "refId": "B" + "refId": "A" } ], - "title": "IO pressure", + "title": "Bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, + "description": "Bytes read from block headers per query (99th percentile). Block headers contain metadata about each storage block including `time ranges`, `field names`, and data location pointers.\n\nHigh values indicate:\n- Query `time range` spans many blocks (reduce time range or add time-based filters)\n- Missing stream-level filters (`_stream` field) causing full block header scans\n- High cardinality fields creating excessive blocks\n\nMonitor relative changes over time - sudden increases suggest inefficient query patterns or changes in data structure.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3370,16 +10274,15 @@ "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3387,7 +10290,7 @@ } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, @@ -3395,9 +10298,9 @@ "h": 8, "w": 12, "x": 0, - "y": 190 + "y": 144 }, - "id": 54, + "id": 153, "options": { "legend": { "calcs": [ @@ -3417,35 +10320,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(max_over_time(process_num_threads{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "__auto", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_block_headers_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job) > 0.001", + "hide": false, + "interval": "", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "Threads ($instance)", + "title": "Block header bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, - "description": "", + "description": "Bytes read from Bloom filters per query (99th percentile). `Bloom filters` are probabilistic data structures that quickly eliminate blocks that definitely don't contain search terms.\n\nHigh values indicate:\n- Queries with low-selectivity text filters (common words like `error`, `info`)\n- Missing or ineffective field-based filters\n- Queries that force scanning many candidate blocks\n\nOptimize by adding specific field filters (`kubernetes.container_name`, `_stream`) before text searches. Monitor for sudden increases that indicate poor filter selectivity.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3480,15 +10384,15 @@ "mode": "off" } }, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3496,7 +10400,7 @@ } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, @@ -3504,9 +10408,9 @@ "h": 8, "w": 12, "x": 12, - "y": 190 + "y": 144 }, - "id": 60, + "id": 154, "options": { "legend": { "calcs": [ @@ -3526,36 +10430,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(max_over_time(vm_tcplistener_conns{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(job)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_bloom_filters_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job) > 0.001", "hide": false, - "intervalFactor": 1, - "legendFormat": "__auto", + "interval": "", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "TCP connections ($instance)", + "title": "Bloom filter bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows the percent of CPU spent on garbage collection.\n\nIf % is high, then CPU usage can be decreased by changing GOGC to higher values. Increasing GOGC value will increase memory usage, and decrease CPU usage.\n\nTry searching for keyword `GOGC` at https://docs.victoriametrics.com/victoriametrics/troubleshooting/ ", + "description": "Bytes read from actual log values per query (99th percentile). This represents the uncompressed log content being retrieved and processed for the query result.\n\nHigh values indicate:\n- Queries returning large result sets (add `LIMIT` clause)\n- Retrieving logs with large payloads (`JSON` objects, stack traces)\n- Missing filters that would reduce matching log volume\n- Functions like `uniq` or `stats` processing many log entries\n\nReduce by: adding selective filters, using field extractors instead of full log retrieval, limiting result count.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3590,16 +10494,15 @@ "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3607,7 +10510,7 @@ } ] }, - "unit": "percentunit" + "unit": "bytes" }, "overrides": [] }, @@ -3615,9 +10518,9 @@ "h": 8, "w": 12, "x": 0, - "y": 198 + "y": 152 }, - "id": 74, + "id": 155, "options": { "legend": { "calcs": [ @@ -3627,44 +10530,46 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "desc" + "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "max(\n rate(go_gc_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) \n / rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n ) by(job)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_values_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job)", + "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "__auto", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "CPU spent on GC ($instance)", + "title": "Value bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, - "description": "", + "description": "Bytes read from timestamp column (`_time`) per query (99th percentile). The `_time` column is automatically indexed and used for time-range filtering during query execution.\n\nHigh values indicate:\n- Queries with very wide time ranges requiring timestamp scanning\n- Time-based aggregations over large datasets\n- Missing time-range restrictions in query filters\n\nThis is usually the smallest component of query `I/O`. Spikes correlate with time range width and data density in the queried period.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3699,15 +10604,15 @@ "mode": "off" } }, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3715,7 +10620,7 @@ } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, @@ -3723,9 +10628,9 @@ "h": 8, "w": 12, "x": 12, - "y": 198 + "y": 152 }, - "id": 58, + "id": 156, "options": { "legend": { "calcs": [ @@ -3745,36 +10650,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(rate(vm_tcplistener_accepts_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_timestamps_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job)", "hide": false, - "intervalFactor": 1, - "legendFormat": "__auto", + "interval": "", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "TCP connections rate ($instance)", + "title": "_time bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows the rate of allocations in memory. Sudden increase in allocations would mean increased pressure on Go Garbage Collector and can saturate CPU resources of the application.", + "description": "Bytes read from column header indexes per query (99th percentile). `Column header indexes` contain metadata about which fields exist in each block and their data types.\n\nHigh values suggest:\n- Queries scanning many blocks due to missing field filters\n- High field cardinality creating large index structures\n- Queries accessing many different field names across blocks\n- Schema evolution causing index fragmentation\n\nOptimize by using consistent `field names` and adding field-specific filters early in query pipeline.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3806,19 +10711,22 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 + }, + { + "color": "red", + "value": 80 } ] }, @@ -3830,9 +10738,9 @@ "h": 8, "w": 12, "x": 0, - "y": 206 + "y": 160 }, - "id": 75, + "id": 157, "options": { "legend": { "calcs": [ @@ -3842,44 +10750,46 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "desc" + "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "max(rate(go_memstats_alloc_bytes_total{job=~\"$job\"}[$__rate_interval])) by (job)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_columns_header_indexes_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job)", + "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "__auto", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "Memory allocations rate", + "title": "Column header index bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.", + "description": "Bytes read from column headers per query (99th percentile). `Column headers` store the actual field schema and compression metadata for each block's columns.\n\nThis metric helps identify:\n- Schema complexity overhead (many fields per log entry)\n- Inefficient field access patterns\n- Blocks with heterogeneous schemas requiring header reads\n\nCompare with other `I/O` breakdown panels to understand query cost distribution. High column header reads suggest schema optimization opportunities or need for more selective field access patterns.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3911,27 +10821,26 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", - "value": 0.1 + "value": 80 } ] }, - "unit": "s" + "unit": "bytes" }, "overrides": [] }, @@ -3939,9 +10848,9 @@ "h": 8, "w": 12, "x": 12, - "y": 206 + "y": 160 }, - "id": 61, + "id": 158, "options": { "legend": { "calcs": [ @@ -3951,42 +10860,44 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "desc" + "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "max(histogram_quantile(0.99, sum(rate(go_sched_latencies_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (job, instance, le))) by(job)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_columns_headers_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (job, vmrange))) by (job)", + "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "__auto", + "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "Go scheduling latency", + "title": "Column header bytes/query p99 ($instance)", "type": "timeseries" } ], - "title": "Resource usage", + "title": "Slow query troubleshooting", "type": "row" } ], "preload": false, "refresh": "", - "schemaVersion": 40, + "schemaVersion": 41, "tags": [ "victoriametrics", "victorialogs" @@ -3994,7 +10905,10 @@ "templating": { "list": [ { - "current": {}, + "current": { + "text": "default", + "value": "default" + }, "includeAll": false, "name": "ds", "options": [], @@ -4004,7 +10918,12 @@ "type": "datasource" }, { - "current": {}, + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, "datasource": { "type": "victoriametrics-metrics-datasource", "uid": "$ds" @@ -4023,7 +10942,12 @@ "type": "query" }, { - "current": {}, + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, "datasource": { "type": "victoriametrics-metrics-datasource", "uid": "$ds" @@ -4042,7 +10966,10 @@ "type": "query" }, { - "current": {}, + "current": { + "text": "", + "value": "" + }, "datasource": { "type": "victoriametrics-metrics-datasource", "uid": "${ds}" @@ -4054,7 +10981,7 @@ "query": { "qryType": 1, "query": "label_values(vm_app_version{job=~\"$job\", instance=~\"$instance\"},short_version)", - "refId": "VictoriaMetricsVariableQueryEditor-VariableQuery" + "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, "regex": "", @@ -4069,6 +10996,160 @@ "filters": [], "name": "adhoc", "type": "adhoc" + }, + { + "allowCustomValue": true, + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "definition": "label_values(vl_merges_total,instance)", + "hide": 2, + "includeAll": true, + "multi": true, + "name": "instance_storage_all", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vl_merges_total,instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "allowCustomValue": true, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "definition": "label_values(vl_merges_total{instance=~\"$instance\"},instance)", + "hide": 2, + "includeAll": true, + "multi": true, + "name": "instance_storage", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vl_merges_total{instance=~\"$instance\"},instance)", + "refId": "VariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "allowCustomValue": true, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "definition": "label_values(vlinsert_backend_conn_bytes_read_total,instance)", + "hide": 2, + "includeAll": true, + "multi": true, + "name": "instance_insert_all", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vlinsert_backend_conn_bytes_read_total,instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "allowCustomValue": true, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "definition": "label_values(vlinsert_backend_conn_bytes_read_total{instance=~\"$instance\"},instance)", + "hide": 2, + "includeAll": true, + "multi": true, + "name": "instance_insert", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vlinsert_backend_conn_bytes_read_total{instance=~\"$instance\"},instance)", + "refId": "VariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "allowCustomValue": true, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "definition": "label_values(vlselect_backend_conn_reads_total,instance)", + "description": "Instances of vlselect discovered from metrics; used to scope vlselect panels.", + "hide": 2, + "includeAll": true, + "multi": true, + "name": "instance_select_all", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vlselect_backend_conn_reads_total,instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "allowCustomValue": true, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "definition": "label_values(vlselect_backend_conn_reads_total{instance=~\"$instance\"},instance)", + "description": "Instances of vlselect discovered from metrics; used to scope vlselect panels.", + "hide": 2, + "includeAll": true, + "multi": true, + "name": "instance_select", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vlselect_backend_conn_reads_total{instance=~\"$instance\"},instance)", + "refId": "VariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" } ] }, @@ -4080,6 +11161,5 @@ "timezone": "", "title": "VictoriaLogs - cluster (VM)", "uid": "XqCOFEX4z_vm", - "version": 1, - "weekStart": "" + "version": 1 } \ No newline at end of file diff --git a/dashboards/vm/victorialogs.json b/dashboards/vm/victorialogs.json index ea1b8da5c5..e973aa5209 100644 --- a/dashboards/vm/victorialogs.json +++ b/dashboards/vm/victorialogs.json @@ -1,4 +1,3 @@ - { "annotations": { "list": [ @@ -43,7 +42,30 @@ "hide": true, "iconColor": "orange", "name": "restarts", - "textFormat": "{{job}} restarted" + "textFormat": "{{instance}} restarted" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "enable": false, + "filter": { + "exclude": false, + "ids": [ + 75 + ] + }, + "hide": false, + "iconColor": "#57f26d1c", + "name": "gc", + "target": { + "expr": "go_memstats_last_gc_time_seconds{job=~\"$job\", instance=~\"$instance\"} * 1000", + "interval": "", + "refId": "Anno" + }, + "textFormat": "GC event", + "useValueForTime": true } ] }, @@ -51,7 +73,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 1, + "id": 8, "links": [], "panels": [ { @@ -72,7 +94,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows total amount of log entries in the storage.", + "description": "Total amount of log entries in the storage.", "fieldConfig": { "defaults": { "color": { @@ -84,7 +106,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -93,12 +115,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 4, "x": 0, "y": 1 }, - "id": 10, + "id": 133, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -118,7 +140,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -132,7 +154,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -144,7 +166,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the cumulative number of log entries ingested over the last 24h.", + "description": "The total number of log entries ingested over the past 24 hours.", "fieldConfig": { "defaults": { "color": { @@ -156,7 +178,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -165,12 +187,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 4, "y": 1 }, - "id": 65, + "id": 134, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -190,7 +212,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -204,7 +226,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -216,7 +238,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Total amount of used disk space.\nAccounts for all compressed log entries and index size.\n\nSee how to [control disk usage](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage).", + "description": "Average ingestion rate of log entries.", "fieldConfig": { "defaults": { "color": { @@ -228,21 +250,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "bytes" + "unit": "short" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 9, "y": 1 }, - "id": 24, + "id": 135, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -262,7 +284,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -271,16 +293,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", + "expr": "sum(rate(vl_rows_ingested_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], - "title": "Disk space usage", + "title": "Insert req/s", "type": "stat" }, { @@ -288,7 +310,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the average ingestion rate of log entries.", + "description": "Total amount of used disk space.\nAccounts for all compressed log entries and index size.\n\nSee how to [control disk usage](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage).", "fieldConfig": { "defaults": { "color": { @@ -300,21 +322,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 14, "y": 1 }, - "id": 22, + "id": 136, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -334,7 +356,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -343,16 +365,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(vl_rows_ingested_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], - "title": "Insert req/s", + "title": "Disk space usage", "type": "stat" }, { @@ -360,7 +382,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Total number of available CPUs for VictoriaLogs process", + "description": "Integer number of CPU cores available to the application. This value is automatically rounded down from fractional CPU quotas. For optimal performance, fractional CPU units should be avoided. See the [best practices](https://docs.victoriametrics.com/victoriametrics/bestpractices/#kubernetes) documentation for more details.", "fieldConfig": { "defaults": { "color": { @@ -372,7 +394,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -385,12 +407,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 19, "y": 1 }, - "id": 30, + "id": 137, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -410,7 +432,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -424,7 +446,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -438,12 +460,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 4, "x": 0, - "y": 3 + "y": 5 }, - "id": 63, + "id": 138, "options": { "code": { "language": "plaintext", @@ -453,7 +475,7 @@ "content": "
$version
", "mode": "markdown" }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "title": "Version", "type": "text" }, @@ -462,7 +484,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the cumulative number of log entries ingested over the last 24h. \n\nThe size is calculated before compression.", + "description": "The cumulative number of log entries ingested over the last 24h. \n\nThe size is calculated before compression.", "fieldConfig": { "defaults": { "color": { @@ -474,7 +496,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -483,12 +505,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 4, - "y": 3 + "y": 5 }, - "id": 64, + "id": 139, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -508,7 +530,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -522,7 +544,7 @@ "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -534,7 +556,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "The ratio between original data size and compressed data stored on disk.\n\nCompression ratio doesn't account for indexdb size. It also may change with time, as [merges](https://docs.victoriametrics.com/victorialogs/#forced-merge) are improving compression of historical data.", + "description": "Rate of HTTP read requests.", "fieldConfig": { "defaults": { "color": { @@ -546,21 +568,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "none" + "unit": "short" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 9, - "y": 3 + "y": 5 }, - "id": 25, + "id": 140, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -580,7 +602,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -589,16 +611,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": " sum(vl_uncompressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) / sum(vl_compressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance\", path=~\"/select/.*\"}[$__rate_interval]))", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], - "title": "Compression ratio", + "title": "Read req/s", "type": "stat" }, { @@ -606,7 +628,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the rate of HTTP read requests.", + "description": "The ratio between original data size and compressed data stored on disk. This metric excludes indexdb size. \n\nThe ratio can go up or down as the system performs automatic maintenance and applies retention policies. For examples:\n- Background merges: [Merges](https://docs.victoriametrics.com/victorialogs/#forced-merge) improve compression by combining data into larger, more efficiently compressed blocks\n- Retention policies: When old data is deleted due to retention settings, the ratio changes as different time periods have varying compression characteristics\n\n", "fieldConfig": { "defaults": { "color": { @@ -618,21 +640,21 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, - "unit": "short" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 14, - "y": 3 + "y": 5 }, - "id": 36, + "id": 141, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -652,7 +674,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -661,16 +683,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance\", path=~\"/select/.*\"}[$__rate_interval]))", + "expr": " sum(vl_uncompressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) / sum(vl_compressed_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "__auto", + "legendFormat": "{{instance}}", "refId": "A" } ], - "title": "Read req/s", + "title": "Compression ratio", "type": "stat" }, { @@ -678,7 +700,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Total size of available memory for VictoriaLogs process", + "description": "Total system memory available to the application. This represents the system or container's memory capacity or limit, not the currently free memory.", "fieldConfig": { "defaults": { "color": { @@ -690,7 +712,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 } ] }, @@ -699,12 +721,12 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 5, "x": 19, - "y": 3 + "y": 5 }, - "id": 34, + "id": 142, "maxDataPoints": 100, "options": { "colorMode": "value", @@ -724,19 +746,20 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "uid": "$ds" }, + "editorMode": "code", "exemplar": false, "expr": "sum(vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"})", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, - "legendFormat": "", + "legendFormat": "total", "refId": "A" } ], @@ -749,7 +772,7 @@ "h": 1, "w": 24, "x": 0, - "y": 5 + "y": 9 }, "id": 18, "panels": [], @@ -761,7 +784,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows ingestion rate in number of log entries and bytes per second.", + "description": "Ingestion rate in number of log entries and bytes per second.", "fieldConfig": { "defaults": { "color": { @@ -808,7 +831,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -841,7 +864,7 @@ "h": 8, "w": 12, "x": 0, - "y": 6 + "y": 10 }, "id": 2, "options": { @@ -862,7 +885,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -892,7 +915,7 @@ "refId": "B" } ], - "title": "Logs ingestion rate ", + "title": "Logs ingestion rate", "type": "timeseries" }, { @@ -942,12 +965,13 @@ "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -963,7 +987,7 @@ "h": 8, "w": 12, "x": 12, - "y": 6 + "y": 10 }, "id": 14, "options": { @@ -985,7 +1009,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1002,7 +1026,7 @@ "refId": "A" } ], - "title": "Requests rate ", + "title": "Requests rate", "type": "timeseries" }, { @@ -1025,7 +1049,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1057,7 +1081,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1073,7 +1097,7 @@ "h": 8, "w": 12, "x": 0, - "y": 14 + "y": 18 }, "id": 69, "options": { @@ -1092,7 +1116,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1101,11 +1125,11 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(vm_http_request_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, path) > 0", + "expr": "sum(rate(vm_http_request_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (path)", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}} - {{path}}", + "legendFormat": "{{path}}", "range": true, "refId": "A" }, @@ -1116,12 +1140,12 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(vl_http_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, path) > 0", + "expr": "sum(rate(vl_http_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (path)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{instance}} - {{path}}", + "legendFormat": "{{path}}", "range": true, "refId": "B" } @@ -1134,7 +1158,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "The less time it takes is better.\n", + "description": "99th percentile response time for VictoriaLogs HTTP endpoints, grouped by instance and path. This means 99% of requests are faster than this value. **Lower numbers are better**, as they indicate faster responses and fewer slow requests.", "fieldConfig": { "defaults": { "color": { @@ -1149,7 +1173,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1176,12 +1200,13 @@ "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1197,7 +1222,7 @@ "h": 8, "w": 12, "x": 12, - "y": 14 + "y": 18 }, "id": 66, "options": { @@ -1219,7 +1244,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1227,15 +1252,15 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\"}) by (instance, path) > 0", + "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\"}) by (path) > 0", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{instance}} - {{path}}", + "legendFormat": "{{path}}", "range": true, "refId": "A" } ], - "title": "Query duration 0.99 quantile", + "title": "Request duration p99", "type": "timeseries" }, { @@ -1243,7 +1268,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the amount of on-disk space occupied by all the data stored in the storage.\n\nSee how to [control disk usage](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage).", + "description": "Amount of on-disk space occupied by all the data stored in the storage.\n\nSee how to [control disk usage](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage).", "fieldConfig": { "defaults": { "color": { @@ -1255,6 +1280,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1290,7 +1317,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1306,7 +1333,7 @@ "h": 8, "w": 12, "x": 0, - "y": 22 + "y": 26 }, "id": 6, "options": { @@ -1328,7 +1355,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1336,16 +1363,17 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"})", + "expr": "max(vl_total_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)", "format": "time_series", + "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "disk usage", + "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "Disk space usage ", + "title": "Disk space usage", "type": "timeseries" }, { @@ -1353,7 +1381,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the rate of logging the messages by their level. Unexpected spike in rate is a good reason to check logs.", + "description": "Rate of VictoriaLogs' own application log messages (errors, warnings, debug) - NOT the logs that VictoriaLogs is collecting from external sources.", "fieldConfig": { "defaults": { "color": { @@ -1368,7 +1396,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "bars", - "fillOpacity": 100, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1398,7 +1426,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1407,13 +1435,44 @@ ] } }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*warn:.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*error:.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 22 + "y": 26 }, "id": 67, "options": { @@ -1435,7 +1494,7 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1443,147 +1502,3353 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (instance, level, location) > 0", + "expr": "sum(rate(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (instance, level, location)", "interval": "5m", "legendFormat": "{{instance}} - {{level}}: {{location}}", "range": true, "refId": "A" } ], - "title": "Logging rate", + "title": "VictoriaLogs internal logging", "type": "timeseries" }, { - "collapsed": true, + "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 30 + "y": 34 }, "id": 68, - "panels": [ - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "panels": [], + "title": "Troubleshooting", + "type": "row" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. \n\nNormally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "description": "Shows the number of restarts per job. The chart can be useful to identify periodic process restarts and correlate them with potential issues or anomalies. Normally, processes shouldn't restart unless restart was inited by user. The reason of restarts should be figured out by checking the logs of each specific service. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "stepAfter", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 0, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 315 - }, - "id": 62, - "options": { - "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "sortBy": "Last *", - "sortDesc": true + "insertNulls": false, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "desc" + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "pluginVersion": "11.5.0", - "targets": [ - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "decimals": 0, + "links": [], + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 }, - "editorMode": "code", - "expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) > 0) by(job)", - "format": "time_series", - "instant": false, - "legendFormat": "{{job}}", - "refId": "A" - } + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 62, + "options": { + "legend": { + "calcs": [ + "lastNotNull" ], - "title": "Restarts", - "type": "timeseries" + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "The number of the new [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) created over the last 24h.\n\nPrefer having as low churn rate as possible. \nSee [How to determine which fields must be associated with log streams?](https://docs.victoriametrics.com/victorialogs/keyconcepts/#how-to-determine-which-fields-must-be-associated-with-log-streams)\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", + "editorMode": "code", + "expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Restarts", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "The number of new [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) created over the last 24 hours. Lower rate is better. See [How to determine which fields must be associated with log streams?](https://docs.victoriametrics.com/victorialogs/keyconcepts/#how-to-determine-which-fields-must-be-associated-with-log-streams)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 35 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(increase(vl_streams_created_total{job=~\"$job\", instance=~\"$instance\"}[1d])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Streams churn rate 24h ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Flags explicitly set to non-default values", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "job" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 43 + }, + "id": 70, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(flag{is_set=\"true\", job=~\"$job\", instance=~\"$instance\"}) by(job, instance, name, value)", + "format": "table", + "instant": true, + "legendFormat": "{{name}}={{value}}", + "range": false, + "refId": "A" + } + ], + "title": "Non-default flags", + "type": "table" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Number of log entries ignored or dropped on insertion due to the following reasons:\n* Timestamp out of the retention period or in the future\n* Number of fields per entry exceeded\n* Line too long\n\nIf this occurs, check the VictoriaLogs log for details.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 43 + }, + "id": 71, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(vl_rows_dropped_total{job=~\"$job\", instance=~\"$instance\"}[1h])) by (reason) ", + "hide": false, + "interval": "", + "legendFormat": "{{reason}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(increase(vl_too_long_lines_skipped_total{job=~\"$job\", instance=~\"$instance\"}[1h]))", + "hide": false, + "instant": false, + "legendFormat": "line_too_long", + "range": true, + "refId": "B" + } + ], + "title": "Logs dropped for last 1h", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 51 + }, + "id": 28, + "panels": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Percentage of physical RAM used compared to the memory limit. If this percentage is high, check the `RSS` anonymous vs resident ratio panel for more details.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 0.8 + }, + { + "color": "red", + "value": 0.9 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 52 + }, + "id": 79, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n)", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "RSS % of memory limit", + "type": "stat" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "How much memory (RAM) the VictoriaLogs process is actually using, compared to its allowed container or system limit. See 'Memory Usage' panel for a detailed breakdown.\n\n- Good: Below 70% most of the time, maybe spiking a bit under load.\n- Bad: Above 90% for more than 5 minutes = risk of out-of-memory (OOM) kill.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 17, + "x": 7, + "y": 52 + }, + "id": 38, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by (instance)", + "interval": "", + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Physical memory usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Portion of RAM that CANNOT be reclaimed without swapping. If both the `RSS`-to-limit percentage and this ratio are high, the process is at high risk of an `OOM` kill.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 57 + }, + "id": 78, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(process_resident_memory_anon_bytes{job=~\"$job\",instance=~\"$instance\"}\n/\nprocess_resident_memory_bytes{job=~\"$job\",instance=~\"$instance\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Anonymous / Resident ratio", + "type": "stat" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "- Anonymous resident memory: Private memory allocated to the application that **cannot** be reclaimed by the kernel. Refer to the [Check/profile](https://docs.victoriametrics.com/victorialogs/#profiling) Go heap section for troubleshooting.\n- File-backed resident memory: Memory mapped from files, which can be safely reclaimed. Increases during querying. Correlate with `I/O` panels for further analysis.\n- Shared resident memory: Typically negligible. Large spikes may indicate unexpected shared memory consumers.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 17, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 62 + }, + "id": 40, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "anonymous resident memory", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_file_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "file-backed resident memory", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_shared_bytes{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "shared resident memory", + "range": true, + "refId": "C" + } + ], + "title": "Memory usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 62 + }, + "id": 118, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}\n) by(instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU % usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Lower is better, e.g. 20% means the process was delayed by memory pressure 20% of the time. See [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n- waiting: Time fraction where at least one thread was blocked on memory.\n- stalled: Time fraction where every thread was blocked on memory (severe pressure).\n\nIf queries slow down and both series spike, the host is likely limited by RAM or I/O throughput.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 0.2 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 70 + }, + "id": 73, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_pressure_memory_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} - waiting", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_pressure_memory_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} - stalled", + "range": true, + "refId": "B" + } + ], + "title": "Memory pressure", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Helps troubleshoot high CPU usage or throttling:\n\n- waiting: The percentage of time at least one task in the VictoriaLogs process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: The percentage of time all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 1%. It only becomes a concern if it consistently climbs above 5–10% and aligns with latency spikes or GC slowdowns.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 70 + }, + "id": 72, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_pressure_cpu_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}: waiting", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_pressure_cpu_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}: stalled", + "range": true, + "refId": "B" + } + ], + "title": "CPU pressure", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "The higher the better.\n\nThis computes the fraction of read bytes that came from the page cache (i.e., not from disk). It answers: \"Of all the bytes my process read via read(), how many were cache hits?\"\n\nThat's why you can see many read syscalls (read() calls), but the actual disk reads stay low; because the data was already in RAM.\n\n\n\n\n\n\n\n\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 0, + "y": 78 + }, + "id": 80, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "1 - (sum(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) / sum(rate(process_io_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])))", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Page-cache hit ratio", + "type": "stat" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Number of read/write calls application makes:\n\n- read call: Number of read*()-family system calls your process has issued since start. Each call can move 1 byte or megabytes, cached or uncached.\n- write call: Number of write*()-family system calls (including write, pwrite, writev, etc.) made by the process.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "read calls" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 4, + "y": 78 + }, + "id": 56, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_io_read_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "read calls", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_io_write_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "write calls", + "range": true, + "refId": "B" + } + ], + "title": "Read/Write syscalls ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Percentage of open file descriptors (files, sockets, pipes, etc.,) compared to the limit set in the OS. Reaching the limit of open files can cause various issues and must be prevented.\n\nSee [how to change limits](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a).", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "max" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 78 + }, + "id": 46, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max_over_time(process_open_fds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n/\nprocess_max_fds{job=~\"$job\", instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Open FDs % usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Measure the actual bytes read from and written to disk by the process:\n\n- read: physical bytes the kernel actually pulled from the storage device on behalf of the process (after checking page-cache).\n- write: physical bytes the kernel ultimately wrote to the storage device for the process (after combining, caching, or delaying writes).", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "read" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 86 + }, + "id": 52, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "read", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_io_storage_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "write", + "range": true, + "refId": "C" + } + ], + "title": "Disk writes/reads ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html). The lower the better.\n\n- waiting: at least one runnable thread blocked on block-`I/O` (disk, NVMe, network-storage) while others could still make progress.\n- stalled: all non-idle threads simultaneously waiting on `I/O`; no useful user code ran during these periods → true `I/O` thrashing.\n\nIf stalled > 0 while querying, it's recommended to increase queue depth on NVMe, raise blk-mq budgets, or relax cgroup I/O limits.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 86 + }, + "id": 74, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(process_pressure_io_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}: waiting", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(process_pressure_io_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}: stalled", + "range": true, + "refId": "B" + } + ], + "title": "IO pressure", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Current number of active TCP connections to VictoriaLogs. This metric helps monitor connection pool usage and identify potential connection leaks. High values may indicate clients not properly closing connections or connection pooling issues. Monitor for gradual increases that could lead to resource exhaustion.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 94 + }, + "id": 60, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vm_tcplistener_conns{job=~\"$job\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "TCP connections ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 94 + }, + "id": 50, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Goroutines ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Rate of incoming TCP connections accepted by VictoriaLogs. This metric indicates network activity and client connection patterns. Sudden spikes may indicate increased load or potential DDoS attacks. Sustained high rates should be correlated with resource usage to ensure adequate capacity.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 102 + }, + "id": 58, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(rate(vm_tcplistener_accepts_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "TCP connections rate ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 102 + }, + "id": 54, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(process_num_threads{job=~\"$job\", instance=~\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Threads ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Percent of CPU spent on garbage collection.\n\nIf % is high, then CPU usage can be decreased by changing `GOGC` to higher values. Increasing `GOGC` value will increase memory usage, and decrease CPU usage.\n\nTry searching for keyword `GOGC` at https://docs.victoriametrics.com/victoriametrics/troubleshooting/ ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 110 + }, + "id": 119, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(\n rate(go_gc_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) \n / rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n ) by(instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU spent on GC ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 0.1 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 110 + }, + "id": 61, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(histogram_quantile(0.99, sum(rate(go_sched_latencies_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (instance, le))) by (instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Go scheduling latency", + "type": "timeseries" + } + ], + "title": "Resource usage", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 52 + }, + "id": 82, + "panels": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Total number of time-based (daily) partitions in storage. The number typically grows over time as new data arrives and is partitioned by time periods. \n\nExcessive partition counts may indicate retention policy issues or very high data ingestion rates that could impact query performance.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 0, + "y": 53 + }, + "id": 88, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(vl_partitions{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Partition Count", + "type": "stat" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "description": "Number of storage parts (data files) in each tier. More parts mean fragmentation; fewer parts suggest successful merging. High part counts may slow queries and trigger background merge operations.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 3, + "y": 53 + }, + "id": 84, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_storage_parts{job=~\"$job\", instance=~\"$instance\"}) by(type)", + "hide": false, + "interval": "", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Part count max by type ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "description": "Disk space usage and limits for VictoriaLogs storage. Tracks current data usage against the configured retention limit or available disk space.\n\nThe red line indicates the space retention limit. When usage approaches this limit, older data will be automatically deleted. If the space retention limit (`-retention.maxDiskSpaceUsageBytes`) is not specified, the red line represents the maximum disk space. In that case, the storage will switch to read-only mode when the limit is reached.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 1, + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "max space retention of instances" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + }, + { + "id": "custom.insertNulls", + "value": 3600000 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 53 + }, + "id": 110, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_max_disk_space_usage_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) by (job)\n/\nmax(vl_total_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by (job) > 0", + "hide": false, + "interval": "", + "legendFormat": "max space retention of instances", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) by (job)\n/\nmax(vl_total_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by (job)", + "hide": false, + "interval": "", + "legendFormat": "{{job}}", + "range": true, + "refId": "B" + } + ], + "title": "Disk ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "description": "Number of storage merge operations by type (sum across instances). Merges compact smaller parts into larger ones; bursts are normal after activity spikes.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 61 + }, + "id": 112, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(vl_merges_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(type)", + "hide": false, + "interval": "$__rate_interval", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Merge events", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "description": "99th percentile duration of merge operations by storage type. Merge operations combine smaller storage parts into larger ones for optimization. \n\nNormal merge durations vary by storage type and data volume. Consistently high durations may indicate storage performance issues, high write load, or insufficient resources for background operations. Monitor for trends that could impact overall system performance.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 61 + }, + "id": 86, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_merge_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.9\"}) by (type) > 0", + "hide": false, + "interval": "", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Merge duration p99 ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "description": "99th percentile of data volume processed during merge operations by storage type. \n\nThis metric indicates the scale of background storage optimization activities. Larger merge sizes generally improve storage efficiency but require more resources. Consistently high values may indicate heavy write loads or large storage parts that need optimization. Monitor correlation with merge duration for performance insights.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 69 + }, + "id": 109, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(vl_merge_bytes{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\"}) by (type) > 0", + "hide": false, + "interval": "", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Merge bytes p99 ($instance)", + "type": "timeseries" + } + ], + "title": "Storage", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 53 + }, + "id": 83, + "panels": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Rate of incoming log insertion requests by endpoint path. This metric tracks the ingestion load on VictoriaLogs, including different insertion methods and protocols.\n\nHigher rates indicate increased log ingestion activity. Monitor for sudden spikes that might indicate new log sources, application deployments, or potential issues requiring capacity planning. Sustained high rates may require scaling ingestion capacity.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*(bytes)/" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 54 + }, + "id": 113, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_rows_ingested_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (type) > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vl_bytes_ingested_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (type) > 0", + "hide": false, + "instant": false, + "legendFormat": "{{type}} (bytes)", + "range": true, + "refId": "B" + } + ], + "title": "Logs ingestion rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "Rate of incoming query requests by endpoint path. This metric tracks the query load on VictoriaLogs, including different query interfaces and internal operations. \n\nHigher rates indicate increased query activity from users or applications. Monitor for sudden spikes that might indicate new dashboards, automated queries, or potential performance issues. Sustained high rates may require scaling query processing capacity.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -1616,11 +4881,13 @@ "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 }, { "color": "red", @@ -1636,9 +4903,9 @@ "h": 8, "w": 12, "x": 12, - "y": 315 + "y": 54 }, - "id": 26, + "id": 117, "options": { "legend": { "calcs": [ @@ -1658,7 +4925,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1666,16 +4933,16 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "increase(vl_streams_created_total{job=~\"$job\", instance=~\"$instance\"}[1d])", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance\",path=~\"^/(internal/)?insert.*\"}[$__rate_interval])) by (path) > 0", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "new log streams over 24h", + "legendFormat": "{{path}}", "range": true, "refId": "A" } ], - "title": "Log stream churn rate", + "title": "Request rate", "type": "timeseries" }, { @@ -1683,92 +4950,202 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Flags explicitly set to non-default values", + "description": "99th percentile of insert operation duration. This represents the time it takes for 99% of insert operations to complete. High values indicate slow ingestion performance that could affect overall system throughput. Spikes may suggest storage bottlenecks, resource contention, or inefficient data processing.", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" }, "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "inspect": false + "thresholdsStyle": { + "mode": "off" + } }, + "links": [], "mappings": [], + "min": 0, "thresholds": { "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 }, { "color": "red", "value": 80 } ] - } + }, + "unit": "s" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Time" - }, - "properties": [ - { - "id": "custom.hidden", - "value": true - } - ] + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 62 + }, + "id": 103, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.1.0", + "targets": [ + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" }, - { - "matcher": { - "id": "byName", - "options": "Value" - }, - "properties": [ - { - "id": "custom.hidden", - "value": true - } - ] + "editorMode": "code", + "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\", path=~\"^/(internal/)?insert.*\"}) by (path) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "range": true, + "refId": "A" + } + ], + "title": "Insert duration p99", + "type": "timeseries" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "description": "99th percentile response time for VictoriaLogs HTTP endpoints, grouped by instance and path. This means 99% of requests are faster than this value. **Lower numbers are better**, as they indicate faster responses and fewer slow requests.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "matcher": { - "id": "byName", - "options": "job" + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "properties": [ + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ { - "id": "custom.hidden", - "value": true + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 } ] - } - ] + }, + "unit": "s" + }, + "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 379 + "x": 12, + "y": 62 }, - "id": 70, + "id": 116, "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" ], - "show": false + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, - "showHeader": true + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1776,28 +5153,27 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": false, - "expr": "sum(flag{is_set=\"true\", job=~\"$job\", instance=~\"$instance\"}) by(job, instance, name, value)", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, + "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\", path=~\"^/(internal/)?insert.*\"}) by (path) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "range": true, "refId": "A" } ], - "title": "Non-default flags", - "type": "table" + "title": "Request duration p99 ($instance)", + "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows how many log entries were ignored or dropped on insertion due to various reasons:\n* timestamp out of retention period or timestamp in future;\n* number of fields per entry exceeded.", + "description": "Number of active message processors handling log ingestion. These processors parse and process incoming log messages before storage. The count typically correlates with the ingestion load.\n\nIf this number is unusually high or inflated (which is rare), check memory usage. It may indicate a heavy, concurrently ingestion load or processing bottlenecks that could benefit from performance tuning.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -1809,7 +5185,7 @@ "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, - "gradientMode": "none", + "gradientMode": "opacity", "hideFrom": { "legend": false, "tooltip": false, @@ -1832,12 +5208,17 @@ "mode": "off" } }, + "decimals": 0, + "links": [], "mappings": [], + "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 }, { "color": "red", @@ -1852,10 +5233,10 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 379 + "x": 0, + "y": 70 }, - "id": 71, + "id": 97, "options": { "legend": { "calcs": [ @@ -1872,10 +5253,10 @@ "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "none" + "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -1883,42 +5264,41 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(vl_rows_dropped_total{job=~\"$job\", instance=~\"$instance\"}[1h])) by (instance, reason)", - "hide": false, + "expr": "max(vl_insert_processors_count{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) by (instance)", + "format": "time_series", "interval": "", - "legendFormat": "{{instance}} - {{reason}}", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "victoriametrics-metrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vm_concurrent_insert_capacity{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "soft limit", "range": true, - "refId": "A" + "refId": "B" } ], - "title": "Logs dropped for last 1h", + "title": "Message processors ($instance)", "type": "timeseries" - } - ], - "title": "Troubleshooting", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 31 - }, - "id": 28, - "panels": [ + }, { "datasource": { "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Percentage of used memory (resident).\nThe application's performance will significantly degrade when memory usage is close to 100%.", + "description": "Number of log rows waiting to be written to storage, categorized by type. Pending rows indicate temporary queuing during ingestion. Consistently high values may suggest storage write bottlenecks or insufficient write capacity.\n\nPending rows are flushed in two ways:\n\n- After a specific time period (typically 1 second)\n- When the pending row size exceeds a threshold (typically 1.75 MB)", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -1961,7 +5341,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -1969,17 +5349,17 @@ } ] }, - "unit": "percentunit" + "unit": "short" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 32 + "x": 12, + "y": 70 }, - "id": 38, + "id": 104, "options": { "legend": { "calcs": [ @@ -1999,7 +5379,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2007,15 +5387,16 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": false, - "expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by(instance)", + "expr": "sum(vl_pending_rows{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) by (type)", + "format": "time_series", "interval": "", - "legendFormat": "__auto", + "intervalFactor": 1, + "legendFormat": "{{type}}", "range": true, "refId": "A" } ], - "title": "RSS memory % usage ($instance)", + "title": "Pending rows ($instance)", "type": "timeseries" }, { @@ -2023,6 +5404,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, + "description": "Number of concurrent insert operations has reached the configured limit: -maxConcurrentInserts (default: 2x CPU cores\n", "fieldConfig": { "defaults": { "color": { @@ -2038,7 +5420,7 @@ "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, - "gradientMode": "none", + "gradientMode": "opacity", "hideFrom": { "legend": false, "tooltip": false, @@ -2069,7 +5451,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2077,17 +5459,33 @@ } ] }, - "unit": "percentunit" + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 32 + "x": 0, + "y": 78 }, - "id": 44, + "id": 143, "options": { "legend": { "calcs": [ @@ -2104,10 +5502,10 @@ "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "none" + "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2115,17 +5513,16 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": false, - "expr": "max(\n rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}\n) by(instance)", + "expr": "max(rate(vm_concurrent_insert_limit_reached_total)) by (instance)", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "__auto", + "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "CPU % usage ($instance)", + "title": "Concurrent insert limit reached ($instance)", "type": "timeseries" }, { @@ -2133,7 +5530,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Share for memory allocated by the process itself. When memory usage reaches 100% it will be likely OOM-killed.\nSafe memory usage % considered to be below 80%", + "description": "Number of insert requests that timed out while waiting for available concurrency slots. This indicates sustained ingestion overload beyond configured limits.\n\nHigh values suggest:\n- Insert queue is consistently full\n- Insert requests waiting too long for execution slots\n- System under sustained heavy ingestion load\n- Need for horizontal scaling or ingestion optimization\n\nCombined with `Insert concurrency limit reached`, provides complete picture of ingestion rejection patterns.", "fieldConfig": { "defaults": { "color": { @@ -2153,6 +5550,7 @@ "hideFrom": { "legend": false, "tooltip": false, + "vis": false, "viz": false }, "insertNulls": false, @@ -2172,33 +5570,31 @@ "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", - "value": 80 + "value": 1 } ] }, - "unit": "percentunit" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 40 + "x": 12, + "y": 78 }, - "id": 42, + "id": 131, "options": { "legend": { "calcs": [ @@ -2208,17 +5604,15 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "desc" + "mode": "single", + "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2226,15 +5620,14 @@ "uid": "$ds" }, "editorMode": "code", - "exemplar": false, - "expr": "max(\n max_over_time(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by(instance)", + "expr": "sum(rate(vm_concurrent_insert_limit_timeout_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", "interval": "", - "legendFormat": "__auto", + "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "RSS anonymous memory % usage ($instance)", + "title": "Insert timeouts ($Instance)", "type": "timeseries" }, { @@ -2242,7 +5635,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "", + "description": "99th percentile duration of background flush operations by type. High values may indicate disk pressure or heavy ingestion. Correlate with `I/O` panels.", "fieldConfig": { "defaults": { "color": { @@ -2289,7 +5682,7 @@ "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2297,33 +5690,17 @@ } ] }, - "unit": "short" + "unit": "s" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 40 + "x": 0, + "y": 86 }, - "id": 48, + "id": 115, "options": { "legend": { "calcs": [ @@ -2343,7 +5720,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2351,38 +5728,37 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "sum(vl_insert_flush_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\"}) by (type) > 0", "format": "time_series", - "interval": "", "intervalFactor": 1, - "legendFormat": "CPU cores used", + "legendFormat": "{{type}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" - }, - "exemplar": false, - "expr": "process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "Limit", - "refId": "B" } ], - "title": "CPU ($instance)", + "title": "Flush duration p99", "type": "timeseries" - }, + } + ], + "title": "Ingestion", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 54 + }, + "id": 81, + "panels": [ { "datasource": { "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "", + "description": "Rate of incoming query requests by endpoint path. This metric tracks the query load on VictoriaLogs, including different query interfaces and internal operations. \n\nHigher rates indicate increased query activity from users or applications. Monitor for sudden spikes that might indicate new dashboards, automated queries, or potential performance issues. Sustained high rates may require scaling query processing capacity.", "fieldConfig": { "defaults": { "color": { @@ -2424,12 +5800,13 @@ "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2437,7 +5814,7 @@ } ] }, - "unit": "bytes" + "unit": "short" }, "overrides": [] }, @@ -2445,9 +5822,9 @@ "h": 8, "w": 12, "x": 0, - "y": 48 + "y": 55 }, - "id": 40, + "id": 102, "options": { "legend": { "calcs": [ @@ -2467,73 +5844,24 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "expr": "sum(go_memstats_sys_bytes{job=~\"$job\", instance=~\"$instance\"}) + sum(vm_cache_size_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "requested from system", - "refId": "A" - }, - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" - }, - "expr": "sum(go_memstats_heap_inuse_bytes{job=~\"$job\", instance=~\"$instance\"}) + sum(vm_cache_size_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "heap inuse", - "refId": "B" - }, - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" - }, - "expr": "sum(go_memstats_stack_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "stack inuse", - "refId": "C" - }, - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" - }, - "expr": "sum(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "resident", - "refId": "D" - }, - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" - }, - "exemplar": false, - "expr": "sum(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"})", + "editorMode": "code", + "expr": "sum(rate(vl_http_requests_total{job=~\"$job\", instance=~\"$instance\",path=~\"^/(internal/)?select.*\"}[$__rate_interval])) by (path) > 0", "format": "time_series", - "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "resident anonymous", - "refId": "E" + "legendFormat": "{{path}}", + "range": true, + "refId": "A" } ], - "title": "Memory usage ($instance)", + "title": "Query rate", "type": "timeseries" }, { @@ -2541,7 +5869,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.", + "description": "99th percentile of query execution duration. This represents the time it takes for 99% of queries to complete:\n\n- High values indicate slow query performance that affects user experience. \n- Spikes may suggest complex queries, resource contention, or inefficient indexes. Monitor for trends that could indicate degrading performance.", "fieldConfig": { "defaults": { "color": { @@ -2577,19 +5905,23 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 + }, + { + "color": "red", + "value": 80 } ] }, @@ -2601,9 +5933,9 @@ "h": 8, "w": 12, "x": 12, - "y": 48 + "y": 55 }, - "id": 72, + "id": 108, "options": { "legend": { "calcs": [ @@ -2623,7 +5955,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2631,31 +5963,15 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(process_pressure_cpu_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job, instance)", + "expr": "max(vl_http_request_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\", path=~\"^/(internal/)?select.*\"}) by (path) > 0", "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - waiting", + "intervalFactor": 1, + "legendFormat": "{{path}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "sum(rate(process_pressure_cpu_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job, instance)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - stalled", - "range": true, - "refId": "B" } ], - "title": "CPU pressure", + "title": "Query duration p99", "type": "timeseries" }, { @@ -2663,7 +5979,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.", + "description": "Number of concurrent select (query) operations compared to the configured limit.\n\nHigh utilization near the limit may indicate query bottlenecks or insufficient query processing capacity.\n\nIf it's consistently high while CPU usage remains low, consider increasing the concurrency limit (`-search.maxConcurrentRequests`) or optimizing query performance to support more concurrent users.", "fieldConfig": { "defaults": { "color": { @@ -2699,10 +6015,9 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, "links": [], "mappings": [], "min": 0, @@ -2711,21 +6026,41 @@ "steps": [ { "color": "green", - "value": null + "value": 0 + }, + { + "color": "red", + "value": 80 } ] }, - "unit": "s" + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 56 + "y": 63 }, - "id": 73, + "id": 107, "options": { "legend": { "calcs": [ @@ -2745,7 +6080,7 @@ "sort": "desc" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { @@ -2753,11 +6088,11 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(process_pressure_memory_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job, instance)", + "expr": "max(max_over_time(vl_concurrent_select_current{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - waiting", + "intervalFactor": 1, + "legendFormat": "current", "range": true, "refId": "A" }, @@ -2767,17 +6102,17 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(process_pressure_memory_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job, instance)", + "expr": "max(vl_concurrent_select_capacity{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - stalled", + "intervalFactor": 1, + "legendFormat": "limit", "range": true, "refId": "B" } ], - "title": "Memory pressure", + "title": "Concurrent queries ($instance)", "type": "timeseries" }, { @@ -2785,7 +6120,7 @@ "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "description": "Shows the number of bytes read/write from the storage layer.", + "description": "Number of queries that timed out while waiting for available concurrency slots. This indicates sustained query overload beyond configured limits.\n\nHigh values suggest:\n- Query queue is consistently full\n- Queries waiting too long for execution slots\n- System under sustained heavy load\n- Need for horizontal scaling or query optimization\n\nCombined with `Select concurrency limit reached`, provides complete picture of query rejection patterns.", "fieldConfig": { "defaults": { "color": { @@ -2800,11 +6135,12 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, + "vis": false, "viz": false }, "insertNulls": false, @@ -2824,105 +6160,86 @@ "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", - "value": 80 + "value": 1 } ] }, - "unit": "bytes" + "unit": "reqps" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "read" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 56 - }, - "id": 52, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", + "y": 63 + }, + "id": 132, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", "placement": "bottom", - "showLegend": true, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", "uid": "$ds" }, - "expr": "sum(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "hide": false, + "editorMode": "code", + "expr": "sum(rate(vl_concurrent_select_limit_timeout_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance) > 0", "interval": "", - "intervalFactor": 1, - "legendFormat": "read", + "legendFormat": "{{instance}}", + "range": true, "refId": "A" - }, - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" - }, - "expr": "sum(rate(process_io_storage_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "write", - "refId": "B" } ], - "title": "Disk writes/reads ($instance)", + "title": "Query timeouts ($instance)", "type": "timeseries" - }, + } + ], + "title": "Querying", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 55 + }, + "id": 126, + "panels": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, + "description": "Number of storage blocks scanned per query (99th percentile). Each block contains logs for a specific time period and field combination. High values indicate queries scanning too many blocks, often caused by:\n\n- Wide time ranges without specific filters\n- Queries missing indexed fields (like `_stream`, `kubernetes.*`)\n- Non-selective filters that don't utilize `bloom filters`\n\nCorrelate with `Bytes/query p99` - if blocks are high but bytes are low, blocks contain little data (good). If both are high, query is reading large amounts of data.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -2933,7 +6250,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2957,16 +6274,15 @@ "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -2974,7 +6290,7 @@ } ] }, - "unit": "short" + "unit": "none" }, "overrides": [] }, @@ -2982,9 +6298,9 @@ "h": 8, "w": 12, "x": 0, - "y": 64 + "y": 56 }, - "id": 50, + "id": 130, "options": { "legend": { "calcs": [ @@ -3004,35 +6320,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by(instance)", - "format": "time_series", - "intervalFactor": 2, + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_processed_blocks_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance)", + "hide": false, + "interval": "", "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "Goroutines ($instance)", + "title": "Blocks/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows the number of read/write syscalls such as read, pread, write, pwrite.", + "description": "Total bytes read from disk per query (99th percentile). This represents the complete `I/O` overhead for query execution, including:\n\n- Block headers and metadata\n- Bloom filter data for candidate selection\n- Column headers and indexes\n- Actual log values and timestamps\n\nHigh values indicate expensive queries. Compare with specific breakdown panels below to identify bottlenecks. Monitor trends over time and correlate with query complexity.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3043,7 +6360,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3067,14 +6384,15 @@ "mode": "off" } }, - "links": [], "mappings": [], + "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3082,30 +6400,17 @@ } ] }, - "unit": "short" + "unit": "bytes" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "read calls" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 64 + "y": 56 }, - "id": 56, + "id": 121, "options": { "legend": { "calcs": [ @@ -3125,51 +6430,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(rate(process_io_read_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_total_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance)", "hide": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "read calls", + "legendFormat": "{{instance}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "sum(rate(process_io_write_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "write calls", - "range": true, - "refId": "B" } ], - "title": "Disk write/read calls ($instance)", + "title": "Bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, + "description": "Bytes read from block headers per query (99th percentile). Block headers contain metadata about each storage block including `time ranges`, `field names`, and data location pointers.\n\nHigh values indicate:\n- Query `time range` spans many blocks (reduce time range or add time-based filters)\n- Missing stream-level filters (`_stream` field) causing full block header scans\n- High cardinality fields creating excessive blocks\n\nMonitor relative changes over time - sudden increases suggest inefficient query patterns or changes in data structure.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3180,7 +6470,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3204,16 +6494,15 @@ "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3221,7 +6510,7 @@ } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, @@ -3229,9 +6518,9 @@ "h": 8, "w": 12, "x": 0, - "y": 72 + "y": 64 }, - "id": 54, + "id": 124, "options": { "legend": { "calcs": [ @@ -3251,35 +6540,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(process_num_threads{job=~\"$job\", instance=~\"$instance\"}) by(instance)", - "format": "time_series", - "intervalFactor": 2, + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_block_headers_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance) > 0.001", + "hide": false, + "interval": "", "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "Threads ($instance)", + "title": "Block header bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.", + "description": "Bytes read from Bloom filters per query (99th percentile). `Bloom filters` are probabilistic data structures that quickly eliminate blocks that definitely don't contain search terms.\n\nHigh values indicate:\n- Queries with low-selectivity text filters (common words like `error`, `info`)\n- Missing or ineffective field-based filters\n- Queries that force scanning many candidate blocks\n\nOptimize by adding specific field filters (`kubernetes.container_name`, `_stream`) before text searches. Monitor for sudden increases that indicate poor filter selectivity.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3311,23 +6601,26 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 + }, + { + "color": "red", + "value": 80 } ] }, - "unit": "s" + "unit": "bytes" }, "overrides": [] }, @@ -3335,9 +6628,9 @@ "h": 8, "w": 12, "x": 12, - "y": 72 + "y": 64 }, - "id": 74, + "id": 129, "options": { "legend": { "calcs": [ @@ -3347,61 +6640,44 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true, - "sortBy": "Last *", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "desc" + "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "sum(rate(process_pressure_io_waiting_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job, instance)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - waiting", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(rate(process_pressure_io_stalled_seconds_total{job=~\"$job\"}[$__rate_interval])) by (job, instance)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_bloom_filters_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance) > 0.001", "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} - stalled", + "legendFormat": "{{instance}}", "range": true, - "refId": "B" + "refId": "A" } ], - "title": "IO pressure", + "title": "Bloom filter bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, - "description": "", + "description": "Bytes read from actual log values per query (99th percentile). This represents the uncompressed log content being retrieved and processed for the query result.\n\nHigh values indicate:\n- Queries returning large result sets (add `LIMIT` clause)\n- Retrieving logs with large payloads (`JSON` objects, stack traces)\n- Missing filters that would reduce matching log volume\n- Functions like `uniq` or `stats` processing many log entries\n\nReduce by: adding selective filters, using field extractors instead of full log retrieval, limiting result count.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3412,7 +6688,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3436,15 +6712,15 @@ "mode": "off" } }, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3452,7 +6728,7 @@ } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, @@ -3460,9 +6736,9 @@ "h": 8, "w": 12, "x": 0, - "y": 80 + "y": 72 }, - "id": 58, + "id": 122, "options": { "legend": { "calcs": [ @@ -3482,36 +6758,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(rate(vm_tcplistener_accepts_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_values_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance)", "hide": false, - "intervalFactor": 1, + "interval": "", "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "TCP connections rate ($instance)", + "title": "Value bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, - "description": "", + "description": "Bytes read from timestamp column (`_time`) per query (99th percentile). The `_time` column is automatically indexed and used for time-range filtering during query execution.\n\nHigh values indicate:\n- Queries with very wide time ranges requiring timestamp scanning\n- Time-based aggregations over large datasets\n- Missing time-range restrictions in query filters\n\nThis is usually the smallest component of query `I/O`. Spikes correlate with time range width and data density in the queried period.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3522,7 +6798,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3546,15 +6822,15 @@ "mode": "off" } }, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3562,7 +6838,7 @@ } ] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, @@ -3570,9 +6846,9 @@ "h": 8, "w": 12, "x": 12, - "y": 80 + "y": 72 }, - "id": 60, + "id": 127, "options": { "legend": { "calcs": [ @@ -3592,36 +6868,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "sum(vm_tcplistener_conns{job=~\"$job\", instance=~\"$instance\"}) by(instance)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_timestamps_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance)", "hide": false, - "intervalFactor": 1, + "interval": "", "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "TCP connections ($instance)", + "title": "_time bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.", + "description": "Bytes read from column header indexes per query (99th percentile). `Column header indexes` contain metadata about which fields exist in each block and their data types.\n\nHigh values suggest:\n- Queries scanning many blocks due to missing field filters\n- High field cardinality creating large index structures\n- Queries accessing many different field names across blocks\n- Schema evolution causing index fragmentation\n\nOptimize by using consistent `field names` and adding field-specific filters early in query pipeline.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3653,27 +6929,26 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", - "value": 0.1 + "value": 80 } ] }, - "unit": "s" + "unit": "bytes" }, "overrides": [] }, @@ -3681,9 +6956,9 @@ "h": 8, "w": 12, "x": 0, - "y": 88 + "y": 80 }, - "id": 61, + "id": 128, "options": { "legend": { "calcs": [ @@ -3693,44 +6968,46 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true }, "tooltip": { "hideZeros": false, "mode": "multi", - "sort": "desc" + "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "max(histogram_quantile(0.99, sum(rate(go_sched_latencies_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (job, instance, le))) by(job)", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_columns_header_indexes_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance)", + "hide": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "__auto", + "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "Go scheduling latency", + "title": "Column header index bytes/query p99 ($instance)", "type": "timeseries" }, { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, - "description": "Shows the percentage of open file descriptors compared to the limit set in the OS.\nReaching the limit of open files can cause various issues and must be prevented.\n\nSee how to change limits here https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a", + "description": "Bytes read from column headers per query (99th percentile). `Column headers` store the actual field schema and compression metadata for each block's columns.\n\nThis metric helps identify:\n- Schema complexity overhead (many fields per log entry)\n- Inefficient field access patterns\n- Blocks with heterogeneous schemas requiring header reads\n\nCompare with other `I/O` breakdown panels to understand query cost distribution. High column header reads suggest schema optimization opportunities or need for more selective field access patterns.", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "palette-classic-by-name" }, "custom": { "axisBorderShow": false, @@ -3765,16 +7042,15 @@ "mode": "off" } }, - "decimals": 2, - "links": [], "mappings": [], "min": 0, + "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "value": 0 }, { "color": "red", @@ -3782,33 +7058,17 @@ } ] }, - "unit": "percentunit" + "unit": "bytes" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "max" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C4162A", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 88 + "y": 80 }, - "id": 46, + "id": 123, "options": { "legend": { "calcs": [ @@ -3828,35 +7088,34 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "12.1.0", "targets": [ { "datasource": { "type": "victoriametrics-metrics-datasource", - "uid": "$ds" + "uid": "${ds}" }, "editorMode": "code", - "expr": "max_over_time(process_open_fds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n/\nprocess_max_fds{job=~\"$job\", instance=~\"$instance\"}", - "format": "time_series", + "exemplar": true, + "expr": "max(histogram_quantile(0.99, sum(increase(vl_storage_per_query_columns_headers_read_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange))) by (instance)", "hide": false, "interval": "", - "intervalFactor": 2, "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "Open FDs ($instance)", + "title": "Column header bytes/query p99 ($instance)", "type": "timeseries" } ], - "title": "Resource usage", + "title": "Slow Query Troubleshooting", "type": "row" } ], "preload": false, "refresh": "", - "schemaVersion": 40, + "schemaVersion": 41, "tags": [ "victoriametrics", "victorialogs" @@ -3865,8 +7124,8 @@ "list": [ { "current": { - "text": "VictoriaMetrics", - "value": "P4169E866C3094E38" + "text": "default", + "value": "default" }, "includeAll": false, "name": "ds", @@ -3877,7 +7136,10 @@ "type": "datasource" }, { - "current": {}, + "current": { + "text": "kubernetes-pods", + "value": "kubernetes-pods" + }, "datasource": { "type": "victoriametrics-metrics-datasource", "uid": "$ds" @@ -3895,7 +7157,12 @@ "type": "query" }, { - "current": {}, + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, "datasource": { "type": "victoriametrics-metrics-datasource", "uid": "$ds" @@ -3914,7 +7181,10 @@ "type": "query" }, { - "current": {}, + "current": { + "text": "", + "value": "" + }, "datasource": { "type": "victoriametrics-metrics-datasource", "uid": "${ds}" @@ -3926,7 +7196,7 @@ "query": { "qryType": 1, "query": "label_values(vm_app_version{job=~\"$job\", instance=~\"$instance\"},short_version)", - "refId": "VictoriaMetricsVariableQueryEditor-VariableQuery" + "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, "regex": "", @@ -3950,8 +7220,8 @@ }, "timepicker": {}, "timezone": "", - "title": "VictoriaLogs - single-node", - "uid": "OqPIZTX4z_vm", + "title": "VictoriaLogs - single-node (VM)", + "uid": "XqCOFEX4z_vm", "version": 1, "weekStart": "" } \ No newline at end of file From f28972c5bf230509480b86ec24e801572a2e19af Mon Sep 17 00:00:00 2001 From: func25 Date: Sat, 23 Aug 2025 13:26:53 +0700 Subject: [PATCH 2/2] opacity --- dashboards/victorialogs-cluster.json | 26 ++++++++++++------------- dashboards/victorialogs.json | 2 +- dashboards/vm/victorialogs-cluster.json | 26 ++++++++++++------------- dashboards/vm/victorialogs.json | 2 +- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/dashboards/victorialogs-cluster.json b/dashboards/victorialogs-cluster.json index e450293e71..776c41a30a 100644 --- a/dashboards/victorialogs-cluster.json +++ b/dashboards/victorialogs-cluster.json @@ -1291,7 +1291,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1635,7 +1635,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "bars", - "fillOpacity": 100, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2478,7 +2478,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 17, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3054,7 +3054,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3320,7 +3320,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5788,7 +5788,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "opacity", "hideFrom": { "legend": false, @@ -5914,7 +5914,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -6709,7 +6709,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -8486,7 +8486,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "opacity", "hideFrom": { "legend": false, @@ -8612,7 +8612,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -8713,7 +8713,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "opacity", "hideFrom": { "legend": false, @@ -9570,7 +9570,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -9805,7 +9805,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, diff --git a/dashboards/victorialogs.json b/dashboards/victorialogs.json index da3279fe1e..90ca49e58e 100644 --- a/dashboards/victorialogs.json +++ b/dashboards/victorialogs.json @@ -2272,7 +2272,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 17, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, diff --git a/dashboards/vm/victorialogs-cluster.json b/dashboards/vm/victorialogs-cluster.json index 6fb5821d40..ac30990653 100644 --- a/dashboards/vm/victorialogs-cluster.json +++ b/dashboards/vm/victorialogs-cluster.json @@ -1291,7 +1291,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1635,7 +1635,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "bars", - "fillOpacity": 100, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2478,7 +2478,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 17, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3054,7 +3054,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3320,7 +3320,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5788,7 +5788,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "opacity", "hideFrom": { "legend": false, @@ -5914,7 +5914,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -6709,7 +6709,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -8486,7 +8486,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "opacity", "hideFrom": { "legend": false, @@ -8612,7 +8612,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -8713,7 +8713,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "opacity", "hideFrom": { "legend": false, @@ -9570,7 +9570,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -9805,7 +9805,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, diff --git a/dashboards/vm/victorialogs.json b/dashboards/vm/victorialogs.json index e973aa5209..f86dd61506 100644 --- a/dashboards/vm/victorialogs.json +++ b/dashboards/vm/victorialogs.json @@ -2272,7 +2272,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 17, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false,