diff --git a/assets/monitoring/grafana/v1alpha1/dashboards-platform.cm.yaml b/assets/monitoring/grafana/v1alpha1/dashboards-platform.cm.yaml
deleted file mode 100644
index d407df049a7..00000000000
--- a/assets/monitoring/grafana/v1alpha1/dashboards-platform.cm.yaml
+++ /dev/null
@@ -1,6849 +0,0 @@
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: '{{ .scyllaDBMonitoringName }}-grafana-scylladb-dashboards'
-data:
- alternator.json: |-
- {{`{
- "dashboard": {
- "class": "dashboard",
- "originalTitle": "Scylla Cluster Metrics",
- "overwrite": true,
- "rows": [
- {
- "class": "alternator_logo_row"
- },
- {
- "class": "row",
- "height": "200px",
- "panels": [
- {
- "class": "single_stat_panel",
- "targets": [
- {
- "expr": "count(scylla_scylladb_current_version{job=\"scylla\", cluster=~\"$cluster|$^\"})",
- "intervalFactor": 1,
- "legendFormat": "Total Nodes",
- "refId": "A",
- "step": 40
- }
- ],
- "title": "Total Nodes"
- },
- {
- "class": "single_stat_panel_fail",
- "targets": [
- {
- "expr": "count(scrape_samples_scraped{job=\"scylla\", cluster=~\"$cluster|$^\"}==0) OR vector(0)",
- "intervalFactor": 1,
- "legendFormat": "Unreachable",
- "refId": "A",
- "step": 20
- }
- ],
- "thresholds": "1,2",
- "title": "Unreachable"
- },
- {
- "class": "single_stat_panel_fail",
- "description": "Number of nodes that reported their status as Starting or Joining",
- "targets": [
- {
- "expr": "count(scylla_node_operation_mode<=2)OR vector(0)",
- "intervalFactor": 1,
- "legendFormat": "Joining",
- "refId": "A",
- "step": 20
- }
- ],
- "thresholds": "1,2",
- "title": "Joining"
- },
- {
- "class": "single_stat_panel_fail",
- "description": "Number of nodes that reported their status as Leaving, Decommissioned, Draining or Drained",
- "targets": [
- {
- "expr": "count(scylla_node_operation_mode>3)OR vector(0)",
- "intervalFactor": 1,
- "legendFormat": "Leaving",
- "refId": "A",
- "step": 20
- }
- ],
- "thresholds": "1,2",
- "title": "Leaving"
- },
- {
- "class": "percent_panel",
- "description": "The percentage of the time during which Scylla utilized the CPU. Note that because Scylla does busy polling for some time before going idle, CPU utilization as seen by the operating system may be much higher. Your system is not yet CPU-bottlenecked until this metric is high.",
- "span": 4,
- "targets": [
- {
- "expr": "avg(scylla_reactor_utilization{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"} ) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 4
- }
- ],
- "title": "Load"
- },
- {
- "class": "small_nodes_table",
- "span": 4,
- "transformations":[
- {
- "id":"filterFieldsByName",
- "options":{
- "include":{
- "names":[
- "instance",
- "svr",
- "Value #A",
- "Value #B",
- "Value #C",
- "Value #D"
- ]
- }
- }
- },
- {
- "id":"seriesToColumns",
- "options":{
- "byField":"instance"
- }
- },
- {
- "id":"organize",
- "options":{
- "excludeByName":{
- },
- "indexByName":{
- "instance":0,
- "Value #D":1,
- "Value #C":2,
- "svr":3,
- "Value #A":4,
- "Value #B":5
- },
- "renameByName":{
- }
- }
- }
- ]
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "bytes_panel",
- "span": 4,
- "targets": [
- {
- "expr": "sum(node_filesystem_size_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])-sum(node_filesystem_avail_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "sum(node_filesystem_size{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])-sum(node_filesystem_avail{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "B",
- "step": 1
- }
- ],
- "title": "Disk Size by $by"
- },
- {
- "class": "ops_panel",
- "description": "Number of Alternator Actions",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_total_operations{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 4
- }
- ],
- "title": "Total Actions"
- },
- {
- "class": "alert_table",
- "span": 4,
- "styles": [
- {
- "alias": "Time",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "link": true,
- "linkTooltip": "Jump to the see the node",
- "linkUrl": "/d/detailed-[[dash_version]]/detailed?refresh=30s&orgId=1&var-by=instance&var-node=${__cell_4}&from=${__cell_0}",
- "pattern": "Time",
- "type": "date"
- },
- {
- "class": "hidden_column",
- "pattern": "severity"
- },
- {
- "class": "hidden_column",
- "pattern": "alertname"
- },
- {
- "class": "hidden_column",
- "pattern": "cluster"
- },
- {
- "class": "hidden_column",
- "pattern": "monitor"
- },
- {
- "class": "hidden_column",
- "pattern": "summary"
- },
- {
- "alias": "Instance",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": true,
- "linkTooltip": "Jump to the see the node",
- "linkUrl": "/d/detailed-[[dash_version]]/detailed?refresh=30s&orgId=1&var-by=instance&var-node=${__cell}",
- "mappingType": 1,
- "pattern": "instance",
- "thresholds": [],
- "type": "string",
- "unit": "short"
- },
- {
- "alias": "",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "decimals": 2,
- "pattern": "/.*/",
- "thresholds": [],
- "type": "number",
- "unit": "short"
- }
- ],
- "title": "Active Alerts"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": "Data Plane Actions"
- }
- ]
- },
- {
- "class": "header_row",
- "panels": [
- {
- "class": "plain_text",
- "content": "
Data Plane Actions
"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"GetItem\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "GetItem by [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"PutItem\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "PutItem by [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"UpdateItem\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "UpdateItem by [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"DeleteItem\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "DeleteItem by [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"BatchWriteItem\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "BatchWriteItem by [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "dashversion":[">4.4", ">2021.1"],
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"BatchGetItem\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "BatchGetItem by [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"Query\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Query by [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"Scan\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Scan by [[by]]"
- }
- ]
- },
- {
- "class": "header_row",
- "panels": [
- {
- "class": "plain_text",
- "content": "Data Plane Latencies
"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "id": "auto",
- "gridPos": {
- "h": 1,
- "w": 24
- },
- "panels": [],
- "repeat": "alternator_latency_ops",
- "title": "$alternator_latency_ops",
- "type": "row"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_op_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_latency_ops\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Completed $alternator_latency_ops"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_op_latency_sum{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_latency_ops\"}[60s])) by ([[by]])/($func(rate(scylla_alternator_op_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_latency_ops\"}[60s])) by ([[by]]) + 1)",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Average $alternator_latency_ops latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "histogram_quantile(0.95, sum(rate(scylla_alternator_op_latency_bucket{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_latency_ops\"}[60s])) by ([[by]], le))",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "95th percentile $alternator_latency_ops latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(scylla_alternator_op_latency_bucket{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_latency_ops\"}[60s])) by ([[by]], le))",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "99th percentile $alternator_latency_ops latency by [[by]]"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": "Streams"
- }
- ]
- },
- {
- "class": "header_row",
- "panels": [
- {
- "class": "plain_text",
- "content": "Streams Actions
"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"ListStreams\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "ListStreams by [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"DescribeStream\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "DescribeStream by [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"GetShardIterator\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "GetShardIterator by [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"GetRecords\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "GetRecords by [[by]]"
- }
- ]
- },
- {
- "class": "header_row",
- "panels": [
- {
- "class": "plain_text",
- "content": "Streams Latencies
"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "id": "auto",
- "gridPos": {
- "h": 1,
- "w": 24
- },
- "panels": [],
- "repeat": "alternator_streams_latency_ops",
- "title": "$alternator_streams_latency_ops",
- "type": "row"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_op_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_streams_latency_ops\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Completed $alternator_streams_latency_ops"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_op_latency_sum{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_streams_latency_ops\"}[60s])) by ([[by]])/($func(rate(scylla_alternator_op_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_streams_latency_ops\"}[60s])) by ([[by]]) + 1)",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Average $alternator_streams_latency_ops latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "histogram_quantile(0.95, sum(rate(scylla_alternator_op_latency_bucket{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_streams_latency_ops\"}[60s])) by ([[by]], le))",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "95th percentile $alternator_streams_latency_ops latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(scylla_alternator_op_latency_bucket{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_streams_latency_ops\"}[60s])) by ([[by]], le))",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "99th percentile $alternator_streams_latency_ops latency by [[by]]"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": "Control plane"
- }
- ]
- },
- {
- "class": "header_row",
- "panels": [
- {
- "class": "plain_text",
- "content": "Control Plane Actions
"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"CreateTable\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "CreateTable by [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"DeleteTable\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "DeleteTable by [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"DescribeTable\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "DescribeTable by [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"ListTables\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "ListTables by [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"DescribeEndpoints\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "DescribeEndpoints by [[by]]"
- },
- {
- "class": "text_panel",
- "dashversion":["<5.1", "<2022.2"],
- "content": "## ",
- "mode": "markdown",
- "span": 4,
- "style": {}
- },
- {
- "class": "ops_panel",
- "description": "The number of items deleted by their TTL",
- "dashversion":[">5.1", ">2022.2"],
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_expiration_items_deleted{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Expired Item Deleted by [[by]]"
- }
- ]
- },
- {
- "class": "row",
- "gridPos": {
- "h": 2
- },
- "height": "25px",
- "panels": [
- {
- "class": "plain_text",
- "content": "Cache
",
- "span": 6
- },
- {
- "content": "Timeouts
",
- "class": "plain_text",
- "span": 6
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "ops_panel",
- "description": "Number of rows that were read from the cache, without needing to be fetched from storage.",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_row_hits{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Cache Hits"
- },
- {
- "class": "ops_panel",
- "description": "Number of rows that were not present in the cache, and had to be fetched from storage.",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_row_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Cache Misses"
- },
- {
- "class": "wpm_panel",
- "description": "Requests that Scylla tried to write but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
- "span": 3,
- "targets": [
- {
- "expr": "$func(delta(scylla_storage_proxy_coordinator_write_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Write Timeouts/Minutes by [[by]]"
- },
- {
- "class": "rpm_panel",
- "description": "Requests that Scylla tried to read but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
- "span": 3,
- "targets": [
- {
- "expr": "$func(delta(scylla_storage_proxy_coordinator_read_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Read Timeouts/Minutes by [[by]]"
- }
- ],
- "title": "New row"
- },
- {
- "class": "user_panel_row_header"
- },
- {
- "class": "user_panels_row"
- },
- {
- "class": "monitoring_version_row"
- }
- ],
- "tags": [
- "__SCYLLA_VERSION_DOT__"
- ],
- "templating": {
- "list": [
- {
- "class": "by_template_var"
- },
- {
- "class": "template_variable_single",
- "label": "cluster",
- "name": "cluster",
- "query": "label_values(scylla_reactor_utilization, cluster)"
- },
- {
- "class": "template_variable_all",
- "label": "dc",
- "name": "dc",
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)"
- },
- {
- "class": "template_variable_all",
- "label": "node",
- "name": "node",
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
- },
- {
- "class": "template_variable_all",
- "label": "shard",
- "name": "shard",
- "query": "label_values(scylla_reactor_utilization,shard)",
- "sort": 3
- },
- {
- "class": "template_variable_single",
- "current": {
- "text": "/var/lib/scylla",
- "value": "/var/lib/scylla"
- },
- "label": "Mount path",
- "name": "mount_point",
- "query": "node_filesystem_avail_bytes",
- "regex": "/mountpoint=\"([^\"]*)\".*/",
- "sort": 0
- },
- {
- "class": "aggregation_function"
- },
- {
- "class": "template_variable_custom",
- "current": {
- "text": "__SCYLLA_VERSION_DASHED__",
- "value": "__SCYLLA_VERSION_DASHED__"
- },
- "name": "dash_version",
- "options": [
- {
- "selected": true,
- "text": "__SCYLLA_VERSION_DASHED__",
- "value": "__SCYLLA_VERSION_DASHED__"
- }
- ],
- "query": "__SCYLLA_VERSION_DASHED__"
- },
- {
- "class": "template_variable_custom",
- "current": {
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- },
- "name": "scylla_version",
- "options": [
- {
- "selected": true,
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- }
- ],
- "query": "__SCYLLA_VERSION_DOT__"
- },
- {
- "class": "monitor_version_var"
- },
- {
- "class": "template_variable_custom",
- "name": "alternator_latency_ops",
- "multi": true,
- "includeAll": true,
- "current": {
- "text": "All",
- "value": "$__all"
- },
- "options": [
- {
- "selected": true,
- "text": "All",
- "value": "$__all"
- },
- {
- "selected": false,
- "text": "GetItem",
- "value": "GetItem"
- },
- {
- "selected": false,
- "text": "PutItem",
- "value": "PutItem"
- },
- {
- "selected": false,
- "text": "UpdateItem",
- "value": "UpdateItem"
- },
- {
- "selected": false,
- "text": "DeleteItem",
- "value": "DeleteItem"
- }
- ],
- "query": "GetItem,PutItem,UpdateItem,DeleteItem"
- },
- {
- "class": "template_variable_custom",
- "name": "alternator_streams_latency_ops",
- "multi": true,
- "includeAll": true,
- "current": {
- "text": "All",
- "value": "$__all"
- },
- "options": [
- {
- "selected": true,
- "text": "All",
- "value": "$__all"
- },
- {
- "selected": false,
- "text": "GetRecords",
- "value": "GetRecords"
- }
- ],
- "query": "GetRecords"
- },
- {
- "allValue": null,
- "datasource": "prometheus",
- "definition": "scylla_alternator_total_operations{cluster=~\"$cluster|$^\"}",
- "description": null,
- "error": null,
- "hide": 2,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "no_alternator",
- "options": [],
- "query": {
- "query": "scylla_alternator_total_operations{cluster=~\"$cluster|$^\"}",
- "refId": "StandardVariableQuery"
- },
- "refresh": 1,
- "regex": "/^(scylla_alternator_total_operations)/",
- "skipUrlSync": false,
- "sort": 0,
- "type": "query"
- }
- ]
- },
- "time": {
- "from": "now-30m",
- "to": "now"
- },
- "annotations" :{
- "list": [
- {
- "builtIn": 1,
- "datasource": "-- Grafana --",
- "enable": true,
- "hide": true,
- "iconColor": "rgba(0, 211, 255, 1)",
- "name": "Annotations & Alerts",
- "type": "dashboard"
- },
- {
- "class" : "annotation_restart"
- },
- {
- "class" : "annotation_stall"
- },
- {
- "class" : "annotation_schema_changed"
- }
- ]
- },
- "title": "Alternator",
- "uid": "alternator-__SCYLLA_VERSION_DASHED__",
- "version": 1
- }
- }`}}
- advanced.json: |-
- {{`{
- "dashboard": {
- "class": "dashboard",
- "uid": "advanced-__SCYLLA_VERSION_DASHED__",
- "rows": [
- {
- "class": "small_stat_rows"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": ""
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "plain_text",
- "content": "IO Queue Information
"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "id": "auto",
- "gridPos": {
- "h": 1,
- "w": 24
- },
- "panels": [],
- "repeat": "classes",
- "title": "$classes",
- "type": "row"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "seconds_panel",
- "span": 3,
- "targets": [
- {
- "expr": "max(rate(scylla_io_queue_total_delay_sec{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])/rate(scylla_io_queue_total_operations{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or on() max(scylla_io_queue_delay{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"} ) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "seastar_io_queue_delay",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "description": "seastar_io_queue_delay",
- "title": "$classes I/O Queue delay by [[by]]"
- },
- {
- "class": "graph_panel",
- "span": 3,
- "targets": [
- {
- "expr": "max(scylla_io_queue_queue_length{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "seastar_io_queue_delay",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "description": "scylla_io_queue_queue_length",
- "title": "$classes Queue length by [[by]]"
- },
- {
- "class": "bps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_io_queue_total_bytes{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "seastar_io_queue_delay",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "description": "seastar_io_queue_delay",
- "title": "$classes I/O Queue bandwidth by [[by]]"
- },
- {
- "class": "iops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_io_queue_total_operations{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "seastar_io_queue_delay",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "description": "scylla_io_queue_total_operations",
- "title": "$classes I/O Queue IOPS by [[by]]"
- },
- {
- "class": "seconds_panel",
- "span": 3,
- "targets": [
- {
- "expr": "max(rate(scylla_io_queue_total_exec_sec{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])/rate(scylla_io_queue_total_operations{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or on() max(scylla_io_queue_delay{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"} ) by ([[by]])",
- "intervalFactor": 1,
- "metric": "scylla_io_queue_total_exec_sec",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "description": "scylla_io_queue_total_exec_sec",
- "title": "Disk $classes I/O Queue delay by [[by]]"
- },
- {
- "class": "graph_panel",
- "span": 3,
- "targets": [
- {
- "expr": "max(scylla_io_queue_disk_queue_length{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "metric": "scylla_io_queue_disk_queue_length",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "description": "scylla_io_queue_disk_queue_length",
- "title": "DISK $classes Queue length by [[by]]"
- },
- {
- "class": "seconds_panel",
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_io_queue_starvation_time_sec{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "metric": "scylla_io_queue_starvation_time_sec",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "description": "The time the class waited for being dispatched with non-empty software queue.\n\nLarge IO delays coupled with small starvation time denotes that scheduler is doing its job properly, and it's upper layer that overflows disk capacity.\n\nLarge IO delays coupled with large starvation time denotes that there might be some problem on the scheduler level that it cannot deliver IO requests from that class into disk in timely manner or the disk is slow and cannot afford timely dispatching.",
- "title": "DISK $classes starvation time by [[by]]"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": ""
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "plain_text",
- "content": "Information by Task Group
"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "id": "auto",
- "gridPos": {
- "h": 1,
- "w": 24
- },
- "panels": [],
- "repeat": "group",
- "title": "$group",
- "type": "row"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "percentunit_panel",
- "span":3,
- "pointradius": 1,
- "targets": [
- {
- "expr": "$func(rate(scylla_scheduler_runtime_ms{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\",group=~\"$group\", shard=~\"[[shard]]\"}[1m])/1000) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Time used by [[by]] - $group",
- "description": "Scylla employs an event-loop like reactor that alternates between the execution of different groups of tasks periodically. This graph shows how much time was spent in $group group"
- },
- {
- "class": "percentunit_panel",
- "span":3,
- "pointradius": 1,
- "targets": [
- {
- "expr": "$func(rate(scylla_scheduler_time_spent_on_task_quota_violations_ms{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\",group=~\"$group\", shard=~\"[[shard]]\"}[1m])/1000) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Time spent in task quota violations by [[by]] - $group",
- "description": "Scylla employs an event-loop like reactor that alternates between the execution of different groups of tasks periodically. The maximum amount of time during which a task group can run is called the \"task quota\". Some task groups may disrespect that and run for longer. This may cause latency issues"
- },
- {
- "class": "percentunit_panel",
- "span":3,
- "pointradius": 1,
- "targets": [
- {
- "expr": "$func(rate(scylla_scheduler_starvetime_ms{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\",group=~\"$group\", shard=~\"[[shard]]\"}[1m])/1000) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Starvation time by [[by]] - $group",
- "description": "Scylla employs an event-loop like reactor that alternates between the execution of different groups of tasks periodically. The maximum amount of time during which a task group can run is called the \"task quota\". Some task groups may disrespect that and run for longer.\n\n This graph shows the amount of time the group was waiting to get CPU time."
- },
- {
- "class": "graph_panel",
- "span":3,
- "pointradius": 1,
- "targets": [
- {
- "expr": "$func(scylla_scheduler_shares{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\",group=~\"$group\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Scheduler shares [[by]] - $group",
- "description": "Shares assigned to the $group. Shares determine how Scylla reactor distributes the task quotas between groups (Higher share gets more quotas)"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": "Internal node errors"
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "plain_text",
- "content": "Internal node Errors - $cluster
"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "rps_panel",
- "span": 4,
- "targets": [
- {
- "expr": "avg(rate(scylla_storage_proxy_coordinator_read_errors_local_node{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Local Reads Error by [[by]]",
- "description": "Number of Read requests that failed due to an 'unavailable' error"
- },
- {
- "class": "wps_panel",
- "span": 4,
- "targets": [
- {
- "expr": "avg(rate(scylla_storage_proxy_coordinator_write_errors_local_node{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Local Write Error by [[by]]",
- "description": "Number of write requests that failed due to an 'unavailable' error"
- },
- {
- "class": "text_panel",
- "content": "## ",
- "mode": "markdown",
- "span": 4,
- "style": {}
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "rps_panel",
- "span": 4,
- "targets": [
- {
- "expr": "avg(rate(scylla_storage_proxy_coordinator_read_unavailable{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Reads Unavailable Error by [[by]]",
- "description": "Number of Read requests that failed due to an 'unavailable' error"
- },
- {
- "class": "wps_panel",
- "span": 4,
- "targets": [
- {
- "expr": "avg(rate(scylla_storage_proxy_coordinator_write_unavailable{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Write Unavailable Error by [[by]]",
- "description": "Number of write requests that failed on a local Node"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "avg(rate(scylla_storage_proxy_coordinator_range_unavailable{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Range Unavailable Error by [[by]]",
- "description": "Number of write requests that failed on a local Node"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "avg(rate(scylla_reactor_aio_errors{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "AIO Error by [[by]]",
- "description": "Number of AIO Errors"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "sum(rate(scylla_reactor_abandoned_failed_futures{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Ignored Future By [[by]]",
- "description": "Total number of abandoned failed futures, futures destroyed while still containing an exception."
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "sum(rate(scylla_reactor_cpp_exceptions{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "C++ Exceptions [[by]]",
- "description": "Number of C++ exceptions thrown.\n\n An exception by itself does not indicate a problem"
- }
-
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": "Commit Log"
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "plain_text",
- "content": "Commit log Information
"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "bytes_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(scylla_commitlog_disk_total_bytes{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Avg reserved disk space by [[by]]",
- "description": "Holds the size of disk space in bytes reserved for data so far. A too high value indicates that we have some bottleneck in the writing to sstables path"
- },
- {
- "class": "bytes_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(scylla_commitlog_disk_active_bytes{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Avg used disk space by [[by]]",
- "description": "Holds the size of disk space in bytes used for data so far. A too high value indicates that we have some bottleneck in the writing to sstables path"
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(rate(scylla_commitlog_flush{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Avg flush by [[by]]",
- "description": "Counts a number of times the flush() method was called for a file"
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(scylla_commitlog_segments{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Segments by [[by]]",
- "description": "Holds the current number of segments"
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(rate(scylla_commitlog_flush_limit_exceeded{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Avg flush limit exceeded by [[by]]",
- "description": "Counts a number of times a flush limit was exceeded. A non-zero value indicates that there are too many pending flush operations (see pending_flushes) and some of them will be blocked till the total amount of pending flush operations drops below 5."
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(scylla_commitlog_pending_allocations{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Pending allocations by [[by]]",
- "description": "Holds the number of currently pending allocations. A non-zero value indicates that we have a bottleneck in the disk write flow."
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(scylla_commitlog_pending_flushes{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Pending flush by [[by]]",
- "description": "Counts a number of requests blocked due to memory pressure. A non-zero value indicates that the commitlog memory quota is not enough to serve the required amount of requests."
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(scylla_commitlog_unused_segments{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Unused segments by [[by]]",
- "description": "Holds the current number of unused segments. A non-zero value indicates that the disk write path became temporary slow."
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(scylla_commitlog_allocating_segments{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Allocating segments by [[by]]",
- "description": "Holds the number of not closed segments that still have some free space. This value should not get too high."
- }
- ]
- },
- {
- "class": "user_panels_collapse"
- },
- {
- "class": "user_panel_row_header"
- },
- {
- "class": "user_panels_row"
- },
- {
- "class": "monitoring_version_row"
- }
- ],
- "templating": {
- "list": [
- {
- "allValue": null,
- "current": {
- "tags": [],
- "text": "Instance",
- "value": "instance"
- },
- "hide": 0,
- "includeAll": false,
- "label": "by",
- "multi": false,
- "name": "by",
- "options": [
- {
- "selected": false,
- "text": "Cluster",
- "value": "cluster"
- },
- {
- "selected": false,
- "text": "DC",
- "value": "dc"
- },
- {
- "selected": true,
- "text": "Instance",
- "value": "instance"
- },
- {
- "selected": false,
- "text": "Shard",
- "value": "instance,shard"
- }
- ],
- "query": "Cluster,DC,Instance,Shard",
- "type": "custom"
- },
- {
- "class": "template_variable_single",
- "label": "cluster",
- "name": "cluster",
- "query": "label_values(scylla_reactor_utilization, cluster)"
- },
- {
- "class": "template_variable_all",
- "label": "dc",
- "name": "dc",
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)"
- },
- {
- "class": "template_variable_all",
- "label": "node",
- "name": "node",
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
- },
- {
- "class": "template_variable_all",
- "label": "shard",
- "name": "shard",
- "query": "label_values(scylla_reactor_utilization,shard)",
- "sort": 3
- },
- {
- "class": "template_variable_all",
- "label": "classes",
- "name": "classes",
- "hide": 0,
- "query": "label_values(scylla_io_queue_delay,class)",
- "sort": 1
- },
- {
- "class": "template_variable_all",
- "label": "group",
- "name": "group",
- "hide": 0,
- "query": "label_values(scylla_scheduler_time_spent_on_task_quota_violations_ms,group)",
- "sort": 1
- },
- {
- "class": "aggregation_function",
- "current": {
- "tags": [],
- "text": "avg",
- "value": "avg"
- }
- },
- {
- "class": "template_variable_custom",
- "current": {
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- },
- "name": "scylla_version",
- "options": [
- {
- "selected": true,
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- }
- ],
- "query": "__SCYLLA_VERSION_DOT__"
- },
- {
- "class": "monitor_version_var"
- }
- ]
- },
- "tags": [
- "__SCYLLA_VERSION_DOT__"
- ],
- "time": {
- "from": "now-30m",
- "to": "now"
- },
- "annotations" :{
- "list": [
- {
- "builtIn": 1,
- "datasource": "-- Grafana --",
- "enable": true,
- "hide": true,
- "iconColor": "rgba(0, 211, 255, 1)",
- "name": "Annotations & Alerts",
- "type": "dashboard"
- },
- {
- "class" : "annotation_restart"
- },
- {
- "class" : "annotation_stall"
- },
- {
- "class" : "annotation_schema_changed"
- }
- ]
- },
- "title": "Advanced",
- "overwrite": true,
- "version": 5
- }
- }`}}
- cql.json: |-
- {{`{
- "dashboard": {
- "class": "dashboard",
- "uid": "advanced-__SCYLLA_VERSION_DASHED__",
- "rows": [
- {
- "class": "small_stat_rows"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": ""
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "plain_text",
- "content": "IO Queue Information
"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "id": "auto",
- "gridPos": {
- "h": 1,
- "w": 24
- },
- "panels": [],
- "repeat": "classes",
- "title": "$classes",
- "type": "row"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "seconds_panel",
- "span": 3,
- "targets": [
- {
- "expr": "max(rate(scylla_io_queue_total_delay_sec{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])/rate(scylla_io_queue_total_operations{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or on() max(scylla_io_queue_delay{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"} ) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "seastar_io_queue_delay",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "description": "seastar_io_queue_delay",
- "title": "$classes I/O Queue delay by [[by]]"
- },
- {
- "class": "graph_panel",
- "span": 3,
- "targets": [
- {
- "expr": "max(scylla_io_queue_queue_length{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "seastar_io_queue_delay",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "description": "scylla_io_queue_queue_length",
- "title": "$classes Queue length by [[by]]"
- },
- {
- "class": "bps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_io_queue_total_bytes{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "seastar_io_queue_delay",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "description": "seastar_io_queue_delay",
- "title": "$classes I/O Queue bandwidth by [[by]]"
- },
- {
- "class": "iops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_io_queue_total_operations{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "seastar_io_queue_delay",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "description": "scylla_io_queue_total_operations",
- "title": "$classes I/O Queue IOPS by [[by]]"
- },
- {
- "class": "seconds_panel",
- "span": 3,
- "targets": [
- {
- "expr": "max(rate(scylla_io_queue_total_exec_sec{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])/rate(scylla_io_queue_total_operations{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or on() max(scylla_io_queue_delay{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"} ) by ([[by]])",
- "intervalFactor": 1,
- "metric": "scylla_io_queue_total_exec_sec",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "description": "scylla_io_queue_total_exec_sec",
- "title": "Disk $classes I/O Queue delay by [[by]]"
- },
- {
- "class": "graph_panel",
- "span": 3,
- "targets": [
- {
- "expr": "max(scylla_io_queue_disk_queue_length{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "metric": "scylla_io_queue_disk_queue_length",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "description": "scylla_io_queue_disk_queue_length",
- "title": "DISK $classes Queue length by [[by]]"
- },
- {
- "class": "seconds_panel",
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_io_queue_starvation_time_sec{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "metric": "scylla_io_queue_starvation_time_sec",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "description": "The time the class waited for being dispatched with non-empty software queue.\n\nLarge IO delays coupled with small starvation time denotes that scheduler is doing its job properly, and it's upper layer that overflows disk capacity.\n\nLarge IO delays coupled with large starvation time denotes that there might be some problem on the scheduler level that it cannot deliver IO requests from that class into disk in timely manner or the disk is slow and cannot afford timely dispatching.",
- "title": "DISK $classes starvation time by [[by]]"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": ""
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "plain_text",
- "content": "Information by Task Group
"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "id": "auto",
- "gridPos": {
- "h": 1,
- "w": 24
- },
- "panels": [],
- "repeat": "group",
- "title": "$group",
- "type": "row"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "percentunit_panel",
- "span":3,
- "pointradius": 1,
- "targets": [
- {
- "expr": "$func(rate(scylla_scheduler_runtime_ms{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\",group=~\"$group\", shard=~\"[[shard]]\"}[1m])/1000) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Time used by [[by]] - $group",
- "description": "Scylla employs an event-loop like reactor that alternates between the execution of different groups of tasks periodically. This graph shows how much time was spent in $group group"
- },
- {
- "class": "percentunit_panel",
- "span":3,
- "pointradius": 1,
- "targets": [
- {
- "expr": "$func(rate(scylla_scheduler_time_spent_on_task_quota_violations_ms{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\",group=~\"$group\", shard=~\"[[shard]]\"}[1m])/1000) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Time spent in task quota violations by [[by]] - $group",
- "description": "Scylla employs an event-loop like reactor that alternates between the execution of different groups of tasks periodically. The maximum amount of time during which a task group can run is called the \"task quota\". Some task groups may disrespect that and run for longer. This may cause latency issues"
- },
- {
- "class": "percentunit_panel",
- "span":3,
- "pointradius": 1,
- "targets": [
- {
- "expr": "$func(rate(scylla_scheduler_starvetime_ms{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\",group=~\"$group\", shard=~\"[[shard]]\"}[1m])/1000) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Starvation time by [[by]] - $group",
- "description": "Scylla employs an event-loop like reactor that alternates between the execution of different groups of tasks periodically. The maximum amount of time during which a task group can run is called the \"task quota\". Some task groups may disrespect that and run for longer.\n\n This graph shows the amount of time the group was waiting to get CPU time."
- },
- {
- "class": "graph_panel",
- "span":3,
- "pointradius": 1,
- "targets": [
- {
- "expr": "$func(scylla_scheduler_shares{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\",group=~\"$group\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Scheduler shares [[by]] - $group",
- "description": "Shares assigned to the $group. Shares determine how Scylla reactor distributes the task quotas between groups (Higher share gets more quotas)"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": "Internal node errors"
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "plain_text",
- "content": "Internal node Errors - $cluster
"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "rps_panel",
- "span": 4,
- "targets": [
- {
- "expr": "avg(rate(scylla_storage_proxy_coordinator_read_errors_local_node{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Local Reads Error by [[by]]",
- "description": "Number of Read requests that failed due to an 'unavailable' error"
- },
- {
- "class": "wps_panel",
- "span": 4,
- "targets": [
- {
- "expr": "avg(rate(scylla_storage_proxy_coordinator_write_errors_local_node{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Local Write Error by [[by]]",
- "description": "Number of write requests that failed due to an 'unavailable' error"
- },
- {
- "class": "text_panel",
- "content": "## ",
- "mode": "markdown",
- "span": 4,
- "style": {}
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "rps_panel",
- "span": 4,
- "targets": [
- {
- "expr": "avg(rate(scylla_storage_proxy_coordinator_read_unavailable{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Reads Unavailable Error by [[by]]",
- "description": "Number of Read requests that failed due to an 'unavailable' error"
- },
- {
- "class": "wps_panel",
- "span": 4,
- "targets": [
- {
- "expr": "avg(rate(scylla_storage_proxy_coordinator_write_unavailable{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Write Unavailable Error by [[by]]",
- "description": "Number of write requests that failed on a local Node"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "avg(rate(scylla_storage_proxy_coordinator_range_unavailable{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Range Unavailable Error by [[by]]",
- "description": "Number of write requests that failed on a local Node"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "avg(rate(scylla_reactor_aio_errors{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "AIO Error by [[by]]",
- "description": "Number of AIO Errors"
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "sum(rate(scylla_reactor_abandoned_failed_futures{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Ignored Future By [[by]]",
- "description": "Total number of abandoned failed futures, futures destroyed while still containing an exception."
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "sum(rate(scylla_reactor_cpp_exceptions{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "C++ Exceptions [[by]]",
- "description": "Number of C++ exceptions thrown.\n\n An exception by itself does not indicate a problem"
- }
-
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": "Commit Log"
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "plain_text",
- "content": "Commit log Information
"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "bytes_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(scylla_commitlog_disk_total_bytes{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Avg reserved disk space by [[by]]",
- "description": "Holds the size of disk space in bytes reserved for data so far. A too high value indicates that we have some bottleneck in the writing to sstables path"
- },
- {
- "class": "bytes_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(scylla_commitlog_disk_active_bytes{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Avg used disk space by [[by]]",
- "description": "Holds the size of disk space in bytes used for data so far. A too high value indicates that we have some bottleneck in the writing to sstables path"
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(rate(scylla_commitlog_flush{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Avg flush by [[by]]",
- "description": "Counts a number of times the flush() method was called for a file"
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(scylla_commitlog_segments{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Segments by [[by]]",
- "description": "Holds the current number of segments"
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(rate(scylla_commitlog_flush_limit_exceeded{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Avg flush limit exceeded by [[by]]",
- "description": "Counts a number of times a flush limit was exceeded. A non-zero value indicates that there are too many pending flush operations (see pending_flushes) and some of them will be blocked till the total amount of pending flush operations drops below 5."
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(scylla_commitlog_pending_allocations{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Pending allocations by [[by]]",
- "description": "Holds the number of currently pending allocations. A non-zero value indicates that we have a bottleneck in the disk write flow."
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(scylla_commitlog_pending_flushes{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Pending flush by [[by]]",
- "description": "Counts a number of requests blocked due to memory pressure. A non-zero value indicates that the commitlog memory quota is not enough to serve the required amount of requests."
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(scylla_commitlog_unused_segments{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Unused segments by [[by]]",
- "description": "Holds the current number of unused segments. A non-zero value indicates that the disk write path became temporary slow."
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "avg(scylla_commitlog_allocating_segments{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "options": {
- "class":"desc_tooltip_options"
- },
- "title": "Allocating segments by [[by]]",
- "description": "Holds the number of not closed segments that still have some free space. This value should not get too high."
- }
- ]
- },
- {
- "class": "user_panels_collapse"
- },
- {
- "class": "user_panel_row_header"
- },
- {
- "class": "user_panels_row"
- },
- {
- "class": "monitoring_version_row"
- }
- ],
- "templating": {
- "list": [
- {
- "allValue": null,
- "current": {
- "tags": [],
- "text": "Instance",
- "value": "instance"
- },
- "hide": 0,
- "includeAll": false,
- "label": "by",
- "multi": false,
- "name": "by",
- "options": [
- {
- "selected": false,
- "text": "Cluster",
- "value": "cluster"
- },
- {
- "selected": false,
- "text": "DC",
- "value": "dc"
- },
- {
- "selected": true,
- "text": "Instance",
- "value": "instance"
- },
- {
- "selected": false,
- "text": "Shard",
- "value": "instance,shard"
- }
- ],
- "query": "Cluster,DC,Instance,Shard",
- "type": "custom"
- },
- {
- "class": "template_variable_single",
- "label": "cluster",
- "name": "cluster",
- "query": "label_values(scylla_reactor_utilization, cluster)"
- },
- {
- "class": "template_variable_all",
- "label": "dc",
- "name": "dc",
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)"
- },
- {
- "class": "template_variable_all",
- "label": "node",
- "name": "node",
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
- },
- {
- "class": "template_variable_all",
- "label": "shard",
- "name": "shard",
- "query": "label_values(scylla_reactor_utilization,shard)",
- "sort": 3
- },
- {
- "class": "template_variable_all",
- "label": "classes",
- "name": "classes",
- "hide": 0,
- "query": "label_values(scylla_io_queue_delay,class)",
- "sort": 1
- },
- {
- "class": "template_variable_all",
- "label": "group",
- "name": "group",
- "hide": 0,
- "query": "label_values(scylla_scheduler_time_spent_on_task_quota_violations_ms,group)",
- "sort": 1
- },
- {
- "class": "aggregation_function",
- "current": {
- "tags": [],
- "text": "avg",
- "value": "avg"
- }
- },
- {
- "class": "template_variable_custom",
- "current": {
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- },
- "name": "scylla_version",
- "options": [
- {
- "selected": true,
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- }
- ],
- "query": "__SCYLLA_VERSION_DOT__"
- },
- {
- "class": "monitor_version_var"
- }
- ]
- },
- "tags": [
- "__SCYLLA_VERSION_DOT__"
- ],
- "time": {
- "from": "now-30m",
- "to": "now"
- },
- "annotations" :{
- "list": [
- {
- "builtIn": 1,
- "datasource": "-- Grafana --",
- "enable": true,
- "hide": true,
- "iconColor": "rgba(0, 211, 255, 1)",
- "name": "Annotations & Alerts",
- "type": "dashboard"
- },
- {
- "class" : "annotation_restart"
- },
- {
- "class" : "annotation_stall"
- },
- {
- "class" : "annotation_schema_changed"
- }
- ]
- },
- "title": "Advanced",
- "overwrite": true,
- "version": 5
- }
- }`}}
- detailed.json: |-
- {{`{
- "dashboard": {
- "class": "dashboard",
- "uid": "detailed-__SCYLLA_VERSION_DASHED__",
- "rows": [
- {
- "class": "logo_row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "percent_panel",
- "description": "The percentage of the time during which Scylla utilized the CPU. Note that because Scylla does busy polling for some time before going idle, CPU utilization as seen by the operating system may be much higher. Your system is not yet CPU-bottlenecked until this metric is high.",
- "span": 3,
- "targets": [
- {
- "expr": "avg(scylla_reactor_utilization{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"} ) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 4
- }
- ],
- "title": "Load"
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_transport_requests_served{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) + ($func(rate(scylla_thrift_served{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or max(scylla_transport_requests_served{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by([[by]])*0)",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "Amount of requests served as the coordinator. Imbalances here represent dispersion at the connection level, not your data model.",
- "title": "Requests Served per [[by]] - Coordinator"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_database_total_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_database_total_reads",
- "title": "Reads per [[by]] - Replica"
- },
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_database_total_writes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_database_total_writes",
- "title": "Writes per [[by]] - Replica"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": "reads and writes"
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "plain_text",
- "content": "Reads and Writes - Coordinator
"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "height": "200px",
- "panels": [
- {
- "class": "writes_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_storage_proxy_coordinator_foreground_writes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "Foreground writes are writes that weren't acknowledged yet to the application. For instance, if a single replica responded and two are needed due to the consistency level. This metric represents a queue size, not a rate. High values here correlate with increased write latencies.",
- "title": "Foreground Writes per [[by]]"
- },
- {
- "class": "writes_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_storage_proxy_coordinator_background_writes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "Background writes are writes that are already acknowledged to the application but have additional work to be done. For instance, if a replica responded and only one is needed, this request is still listed as a background request until all replicas respond. This metric represents a queue size, not a rate. High values here correlate with increased write latencies.",
- "title": "Background Writes per [[by]]"
- },
- {
- "class": "reads_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_storage_proxy_coordinator_foreground_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "Foreground reads are reads that weren't acknowledged yet to the application. For instance, if a single replica responded and two are needed due to the consistency level. This metric represents a queue size, not a rate. High values here correlate with increased read latencies.",
- "title": "Foreground Reads per [[by]]"
- },
- {
- "class": "reads_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_storage_proxy_coordinator_background_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 4
- }
- ],
- "description": "Background reads are reads that are already acknowledged to the application but have additional work to be done. For instance, if a replica responded and only one is needed, this request is still listed as a background request until all replicas respond. This metric represents a queue size, not a rate. High values here correlate with increased read latencies.",
- "title": "Background Reads per [[by]]"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "height": "200px",
- "panels": [
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_hints_manager_written{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_hints_manager_written",
- "title": "Hints Written per [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_hints_manager_sent{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 4
- }
- ],
- "description": "scylla_hints_manager_sent",
- "title": "Hints sent per [[by]]"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_speculative_digest_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 4
- }
- ],
- "description" : "Number of times a digest read was done on behalf of a speculative retry.\n\nSpeculative retry is a mechanism that causes the client or server to speculate that a request may fail, and send a new request.\n\nspeculative retry may reduce latency in exchange for system load, but only if there is little activity.\n\nA lot of speculative retries increases load and can harm latency more than helping.",
- "title": "Speculative Digest Reads By [[by]]"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_speculative_data_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 4
- }
- ],
- "description" : "Number of times a read was done on behalf of a speculative retry.\n\nSpeculative retry is a mechanism that causes the client or server to speculate that a request may fail, and send a new request.\n\nspeculative retry may reduce latency in exchange for system load, but only if there is little activity.\n\nA lot of speculative retries increases load and can harm latency more than helping.",
- "title": "Speculative Data Reads By [[by]]"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": "Timeouts and Errors"
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "plain_text",
- "content": "Timeouts and Errors - Coordinator
"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_write_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "Requests that Scylla tried to write but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
- "title": "Write Timeouts/Seconds per [[by]]"
- },
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_write_unavailable{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "Requests that Scylla did not even try to write because replicas that were needed to execute this write were unavailable. Unavailable writes are counted in the node that received the request (the coordinator), not at the replicas.",
- "title": "Write Unavailable/Seconds per [[by]]"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_read_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]) + rate(scylla_storage_proxy_coordinator_cas_read_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])+rate(scylla_storage_proxy_coordinator_range_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "Requests that Scylla tried to read but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
- "title": "Read Timeouts/Seconds per [[by]]"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_read_unavailable{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 4
- }
- ],
- "description": "Requests that Scylla did not even try to read because replicas that were needed to execute this write were unavailable. Unavailable reads are counted in the node that received the request (the coordinator), not at the replicas.",
- "title": "Read Unavailable/Seconds per [[by]]"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": "Replica"
- }
- ]
- },
- {
- "class": "row",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "plain_text",
- "content": "Replica
"
- }
- ]
- },
- {
- "class": "row",
- "height": "auto",
- "panels": [
- {
- "class": "reads_panel",
- "description" : "The number of currently active read operations",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_database_active_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Active reads"
- },
- {
- "class": "reads_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_database_queued_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "number of currently queued read operations",
- "title": "Queued reads"
- },
- {
- "class": "writes_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_database_requests_blocked_memory_current{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description" :"The current number of requests blocked due to reaching the memory quota. Non-zero value indicates that our bottleneck is memory",
- "title": "Writes currently blocked on dirty"
- },
- {
- "class": "writes_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_commitlog_pending_allocations{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description" :"number of currently pending allocations. A non-zero value indicates that we have a bottleneck in the disk write flow.",
- "title": "Writes currently blocked on commitlog"
- },
- {
- "class": "graph_panel",
- "span": 3,
- "targets": [
- {
- "expr": "clamp_max(1 + sum((rate(scylla_cache_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]) - rate(scylla_cache_reads_with_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))) by ([[by]])/(sum(rate(scylla_cache_reads_with_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) + 0.00001),100)",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description" :"Reciprocal Miss Rate is a score in the range of 1 to 100 that is used to decide the fraction of read requests to send to each replica - a replica with twice the RMR value of another replica will serve twice the number of read requests.\n\nRMR is calculated on a table level, this is an aggregate estimation of that score.",
- "title": "Reciprocal Miss Rate (HWLB)"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_database_total_reads_failed{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_database_total_reads_failed",
- "title": "Reads failed"
- },
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_database_requests_blocked_memory{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_database_requests_blocked_memory",
- "title": "Writes blocked on dirty"
- },
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_commitlog_requests_blocked_memory{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_commitlog_requests_blocked_memory",
- "title": "Writes blocked on commitlog"
- },
- {
- "class": "requestsps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_transport_requests_shed{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_transport_requests_shed",
- "title": "Requests Shed"
- },
- {
- "class": "text_panel",
- "content": "",
- "mode": "markdown",
- "span": 3
- },
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_database_total_writes_failed{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_database_total_writes_failed",
- "title": "Writes failed"
- },
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_database_total_writes_timedout{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_database_total_writes_timedout",
- "title": "Writes timed out"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": "Cache"
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "text_panel",
- "content": "Cache - Replica
",
- "style": {}
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "rps_panel",
- "span": 6,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]) - rate(scylla_cache_reads_with_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "number of reads that were served from the cache",
- "title": "Reads with no misses"
- },
- {
- "class": "rps_panel",
- "span": 6,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_reads_with_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_reads_with_misses",
- "title": "Reads with misses"
- }
- ]
- },
- {
- "class" : "row",
- "panels": [
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_row_hits{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_row_hits",
- "title": "Row Hits"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_partition_hits{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_partition_hits",
- "title": "Partition Hits"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_row_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_row_misses",
- "title": "Row Misses"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_partition_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_partition_misses",
- "title": "Partition Misses"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_row_insertions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_row_insertions",
- "title": "Row Insertions"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_partition_insertions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_partition_insertions",
- "title": "Partition Insertions"
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_row_evictions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_row_evictions",
- "title": "Row Evictions"
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_partition_evictions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_partition_evictions",
- "title": "Partition Evictions"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_rows_merged_from_memtable{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_rows_merged_from_memtable",
- "title": "Row Merges"
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_partition_merges{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_partition_merges",
- "title": "Partition Merges"
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_row_removals{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_row_removals",
- "title": "Row Removals"
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_partition_removals{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_partition_removals",
- "title": "Partition Removals"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "graph_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_cache_rows{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_rows",
- "title": "Rows"
- },
- {
- "class": "graph_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_cache_partitions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_partitions",
- "title": "Partitions"
- },
- {
- "class": "bytes_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_cache_bytes_used{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_bytes_used",
- "title": "Used Bytes"
- },
- {
- "class": "bytes_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_cache_bytes_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cache_bytes_total",
- "title": "Total Bytes"
- },
- {
- "class": "graph_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cql_prepared_cache_evictions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cql_prepared_cache_evictions",
- "title": "Prepared Statements Cache Eviction"
- },
- {
- "class": "graph_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cql_authorized_prepared_statements_cache_evictions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_cql_authorized_prepared_statements_cache_evictions",
- "title": "Authorized Prepared Statements Cache Eviction"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "collapsed": true,
- "title": "Materialized Views"
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "text_panel",
- "content": "Materialized Views - Replica
",
- "style": {}
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_database_total_view_updates_pushed_local{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "View Local Update",
- "description" : "Number of view update locally"
- },
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_database_total_view_updates_pushed_remote{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "View Remote Update",
- "description" : "Number of view update remotely"
- },
- {
- "class": "bytes_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_database_view_update_backlog{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "View Update Backlog",
- "description" : "Size in bytes of the view update backlog at each base replica."
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_database_dropped_view_updates{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Dropped View Updates",
- "description" : "Number of dropped view updates due to an excessive view update backlog."
- } ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_hints_for_views_manager_sent{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Hints for view",
- "description" : "Number of hints sent for view."
- },
- {
- "class": "writes_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_storage_proxy_coordinator_current_throttled_base_writes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Throttled Base Writes",
- "description" : "Currently throttled base writes, as a consequence of the respective view update backlog."
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "collapsed": true,
- "dashversion":[">4.5", ">2021.1"],
- "title": "Tombstones"
- }
- ]
- },
- {
- "class": "row",
- "dashversion":[">4.5", ">2021.1"],
- "panels": [
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_sstables_range_tombstone_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Range Tombstones reads",
- "dashversion":[">4.6", ">2021.1"],
- "description" : "Amount of range tombstones processed during read."
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_range_tombstone_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Cache Range Tombstones Read",
- "dashversion":[">4.6", ">2021.1"],
- "description" : "Amount of range tombstones processed during read."
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_sstables_row_tombstone_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Row Tombstones reads",
- "dashversion":[">4.6", ">2021.1"],
- "description" : "Amount of row tombstones read"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_row_tombstone_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Cache Row Tombstones reads",
- "dashversion":[">4.6", ">2021.1"],
- "description" : "Amount of cache row tombstones read"
- },
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_sstables_tombstone_writes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Tombstones Writes",
- "description" : "Amount of tombstones writes."
- },
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_sstables_range_tombstone_writes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Range Tombstones Writes",
- "description" : "Amount of range tombstones writes."
- },
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_sstables_cell_tombstone_writes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Cell Tombstones Writes",
- "description" : "Amount of Cell Tombstones Writes."
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "collapsed": true,
- "title": "LWT"
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "text_panel",
- "content": "LWT - Coordinator
",
- "style": {}
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or on([[by]]) $func(rate(scylla_storage_proxy_coordinator_cas_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Reads",
- "description" : "LWT read rate."
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "sum(casrlatencya{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", by=\"[[by]]\"} or on([[by]]) $func(rate(scylla_storage_proxy_coordinator_cas_read_latency_sum{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])/($func(rate(scylla_storage_proxy_coordinator_cas_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]]) + 1)",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Avrage Read latency",
- "description" : "LWT Avrage Read latency."
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "casrlatencyp95{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "95% latency",
- "description" : "LWT 95% Read latency."
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_read_timeouts{instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"statement|$\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Read Timeouts",
- "description" : "LWT Read Timeouts"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or on ([[by]]) ($func(rate(scylla_storage_proxy_coordinator_cas_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]))",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Writes",
- "description" : "LWT write rate."
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "sum(caswlatencya{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", by=\"[[by]]\"} or on([[by]]) ($func(rate(scylla_storage_proxy_coordinator_cas_write_latency_sum{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])/($func(rate(scylla_storage_proxy_coordinator_cas_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) + 1))",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Avrage Write latency",
- "description" : "LWT Avrage Write latency."
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "caswlatencyp95{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "95% latency",
- "description" : "LWT 95% write latency."
- },
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_write_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Write Timeouts",
- "description" : "LWT Write Timeouts"
- }
- ]
- },
- {
- "class": "row",
- "dashversion":[">4.2", ">2021.1"],
- "panels": [
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_total_operations{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Paxos operations",
- "description" : "A single Read/Write LWT will result in multiple paxos operations"
- },
- {
- "class": "graph_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_storage_proxy_coordinator_cas_foreground{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Paxos Foreground operations",
- "description" : "How many paxos operations that did not yet produce a result are running"
- },
- {
- "class": "graph_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(scylla_storage_proxy_coordinator_cas_background{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Paxos Background operations",
- "description" : "How many paxos operations are still running after a result was alredy returned"
- },
- {
- "class": "text_panel",
- "span": 3,
- "options": {
- "content": "# "
- }
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_write_condition_not_met{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Condition-Not-Met",
- "description" : "An LWT INSERT, UPDATE or DELETE command that involves a condition will be rejected if the condition is not met.\n\nWhile it is ok, a high value may indicate that there is a potential problem with data distribution"
- },
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_write_contention_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Write Contention",
- "description" : "Number of times some INSERT, UPDATE or DELETE request with conditions had to retry because there was a concurrent conditional statement against the same key. Each retry is performed after a randomized sleep interval, so it can lead to statement timing out completely.\n\nIt can indicates contention over a hot row or key"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_read_contention_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]]) - $func(rate(scylla_storage_proxy_coordinator_cas_read_contention_bucket{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", le=\"1.000000\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Read Contention",
- "description" : "Number of times some SELECT with SERIAL consistency had to retry because there was a concurrent conditional statement against the same key. Each retry is performed after a randomized sleep interval, so it can lead to statement timing out completely.\n\nIt can indicates contention over a hot row or key"
- },
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_write_timeout_due_to_uncertainty{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Write Timeout Due to Uncertainty",
- "description" : "Number of partially succeeded conditional statements. These statements were not committed by the coordinator, due to some replicas responding with errors or timing out. The coordinator had to propagate the error to the client. However, the statement succeeded on a minority of replicas, so may later be propagated to the rest during repair."
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_write_unavailable{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Write Unavailable",
- "description" : "Number of times a INSERT, UPDATE, or DELETE with conditions failed after being unable to contact enough replicas to match the consistency level"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_read_unavailable{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Read Unavailable",
- "description" : "Number of times a SELECT with SERIAL consistency failed after being unable to contact enough replicas to match the consistency level"
- },
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_write_unfinished_commit{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Write Unfinished - Repair Attempts",
- "description" : "Number of Paxos-repairs of INSERT, UPDATE, or DELETE with conditions.\n\nA repair is necessary when a previous Paxos statement was partialy successful. A subsequent statement then may not proceed before completing the work of its predecessor. A repair is not guaranteed to succeed, the metric indicates the number of repair attempts made"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_read_unfinished_commit{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Read Unfinished - Repair Attempts",
- "description" : "Number of Paxos-repairs of SELECT statement with SERIAL consistency.\n\nA repair is necessary when a previous Paxos statement was partialy successful. A subsequent statement then may not proceed before completing the work of its predecessor. A repair is not guaranteed to succeed, the metric indicates the number of repair attempts made"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_failed_read_round_optimization{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Failed Read-Round Optimization",
- "description" : "Normally, a PREPARE Paxos-round piggy-backs the previous value along with the PREPARE response. When the coordinator is unable to obtain the previous value (or its digest) from some of the participants, or when the digests did not match, a separate repair round has to be performed.\n\nThis indicates that some Paxos queries did not run successfully to completion, e.g. because some node is overloaded, down, or there was contention around a key."
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_prune{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Prune",
- "description" : "Number of pruning requests.\n\nA successful conditional statement deletes the intermediate state from system.paxos table using PRUNE command."
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_dropped_prune{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "LWT Dropped Prune",
- "description" : "Number of Dropped pruning requests.\n\nA successful conditional statement deletes the intermediate state from system.paxos table using PRUNE command. If the system is busy it may not keep up with the PRUNE requests, so such requests are dropped.\n\nHigh value suggests the system is overloaded and also that system.paxos table is taking up space. If a prune is dropped, system.paxos table key and value for respective LWT transaction will stay around until next transaction against the same key or until the gc_grace_period, when it's removed by compaction."
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "collapsed": true,
- "title": "CDC"
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "text_panel",
- "content": "CDC - Replica
",
- "style": {}
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_cdc_operations_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "CDC Operations",
- "description" : "The rate of CDC operations."
- },
- {
- "class": "ops_panel",
- "span": 4,
- "targets": [
- {
- "expr": "$func(rate(scylla_cdc_operations_failed{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])/($func(rate(scylla_storage_proxy_coordinator_cas_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_cas_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) + 1)",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Failed CDC operations",
- "description" : "The rate of failed CDC operations."
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": "Memory"
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "text_panel",
- "content": "Memory - Replica
",
- "style": {}
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "bytes_panel",
- "span": 6,
- "targets": [
- {
- "expr": "$func(scylla_lsa_total_space_bytes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_lsa_total_space_bytes",
- "title": "LSA total memory"
- },
- {
- "class": "bytes_panel",
- "span": 6,
- "targets": [
- {
- "expr": "$func(scylla_lsa_non_lsa_used_space_bytes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_lsa_non_lsa_used_space_bytes",
- "title": "Non-LSA used memory"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": "Compaction"
- }
- ]
- },
- {
- "class": "row",
- "height": "25px",
- "gridPos": {"h": 2},
- "panels": [
- {
- "class": "text_panel",
- "content": "Compaction - Replica
",
- "style": {}
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "graph_panel_int",
- "span": 4,
- "targets": [
- {
- "expr": "$func(scylla_compaction_manager_compactions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_compaction_manager_compactions",
- "title": "Running Compactions"
- },
- {
- "class": "percent_panel",
- "span": 4,
- "targets": [
- {
- "expr": "($func(rate(scylla_scheduler_runtime_ms{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", group=\"compaction\"}[1m])) by ([[by]]))/10",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "Percentage of CPU time used by compaction",
- "title": "Compactions CPU Runtime"
- },
- {
- "class": "graph_panel",
- "span": 4,
- "targets": [
- {
- "refId": "A",
- "expr": "avg(scylla_scheduler_shares{group=\"compaction\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "format": "time_series"
- }
- ],
- "description": "Shares assigned to the compaction",
- "title": "Compactions Shares"
- }
-
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "repeat":"scheduling_group",
- "collapsed": false,
- "title": "Latencies - $scheduling_group"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[60s])) by ([[by]]) or on([[by]]) $func(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$scheduling_group\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_storage_proxy_coordinator_write_latency",
- "title": "Writes by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "wlatencya{by=\"[[by]]\", instance=~\"[[node]]|$^\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Average write latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "wlatencyp95{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group|$\"}",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "95th percentile write latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "wlatencyp99{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "99th percentile write latency by [[by]]"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[60s])) by ([[by]]) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_storage_proxy_coordinator_read_latency",
- "title": "Reads by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "rlatencya{by=\"[[by]]\", instance=~\"[[node]]|$^\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Average read latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "rlatencyp95{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "95th percentile read latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "rlatencyp99{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "99th percentile read latency by [[by]]"
- },
- {
- "class": "bytes_panel",
- "span": 3,
- "dashversion":[">5.3", ">2022.1"],
- "description": "Bytes received in CQL messages",
- "targets": [
- {
- "expr": "sum(rate(scylla_transport_cql_request_bytes{kind=~\"$kind\",instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Received payload by [[by]]"
- },
- {
- "class": "bytes_panel",
- "span": 3,
- "dashversion":[">5.3", ">2022.1"],
- "description": "Average CQL message size (received)",
- "targets": [
- {
- "expr": "sum(rate(scylla_transport_cql_request_bytes{kind=~\"$kind\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])/sum(rate(scylla_transport_cql_requests_count{kind=~\"$kind\",instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Average received payload size by [[by]]"
- },
- {
- "class": "bytes_panel",
- "span": 3,
- "dashversion":[">5.3", ">2022.1"],
- "description": "Bytes sent in CQL messages",
- "targets": [
- {
- "expr": "sum(rate(scylla_transport_cql_response_bytes{kind=~\"$kind\",instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Response payload by [[by]]"
- },
- {
- "class": "bytes_panel",
- "span": 3,
- "dashversion":[">5.3", ">2022.1"],
- "targets": [
- {
- "expr": "sum(rate(scylla_transport_cql_response_bytes{kind=~\"$kind\",instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])/sum(rate(scylla_transport_cql_requests_count{kind=~\"$kind\",instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "Average CQL message size (sent)",
- "title": "Average response payload size by [[by]]"
- },
- {
- "class": "bytes_panel",
- "span": 3,
- "dashversion":[">5.3", ">2022.1"],
- "targets": [
- {
- "expr": "sum(rate(scylla_transport_cql_request_bytes{kind=~\"QUERY|EXECUTE\",instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])/(sum(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]]) or on([[by]]) sum(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]]))",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "This is a ballpark estimation of the write-messages size (like insert and update).\n\nIt is based on the assumption that write-messages are responsible for most inwards traffic.",
- "title": "Estimated write message size by [[by]]"
- },
- {
- "class": "bytes_panel",
- "span": 3,
- "dashversion":[">5.3", ">2022.1"],
- "targets": [
- {
- "expr": "sum(rate(scylla_transport_cql_response_bytes{kind=~\"QUERY|EXECUTE\",instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])/(sum(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]]) or on ([[by]]) sum(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]]))",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "This is a ballpark estimation of the read-messages size (like select).\n\nIt is based on the assumption that read-messages are responsible for most outbound traffic.",
- "title": "Estimated read message size by [[by]]"
- }
- ]
- },
- {
- "class": "user_panels_collapse"
- },
- {
- "class": "user_panel_row_header"
- },
- {
- "class": "user_panels_row"
- },
- {
- "class": "monitoring_version_row"
- }
- ],
- "templating": {
- "list": [
- {
- "class":"by_template_var"
- },
- {
- "class": "template_variable_single",
- "label": "cluster",
- "name": "cluster",
- "query": "label_values(scylla_reactor_utilization, cluster)"
- },
- {
- "class": "template_variable_all",
- "label": "dc",
- "name": "dc",
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)"
- },
- {
- "class": "template_variable_all",
- "label": "node",
- "name": "node",
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
- },
- {
- "class": "template_variable_all",
- "label": "shard",
- "name": "shard",
- "allValue":".+",
- "query": "label_values(scylla_reactor_utilization,shard)",
- "sort": 3
- },
- {
- "class": "template_variable_all",
- "label": "SG",
- "dashversion":[">2021.1"],
- "current": {
- "selected": true,
- "tags": [],
- "text": [
- "sl:default"
- ],
- "value": [
- "sl:default"
- ]
- },
- "name": "scheduling_group",
- "query": "label_values(all_scheduling_group{cluster=~\"$cluster|$^\"}, scheduling_group_name)",
- "sort": 3
- },
- {
- "class": "template_variable_all",
- "label": "SG",
- "dashversion":[">4.3"],
- "current": {
- "selected": true,
- "tags": [],
- "text": [
- "statement"
- ],
- "value": [
- "statement"
- ]
- },
- "name": "scheduling_group",
- "query": "label_values(all_scheduling_group{cluster=~\"$cluster|$^\"}, scheduling_group_name)",
- "sort": 3
- },
- {
- "class": "template_variable_all",
- "label": "cql_kind",
- "dashversion":[">5.3", ">2022.1"],
- "name": "kind",
- "query": "label_values(scylla_transport_cql_requests_count{cluster=~\"$cluster|$^\"}, kind)",
- "sort": 3
- },
- {
- "class": "aggregation_function"
- },
- {
- "class": "template_variable_custom",
- "current": {
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- },
- "name": "scylla_version",
- "options": [
- {
- "selected": true,
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- }
- ],
- "query": "__SCYLLA_VERSION_DOT__"
- },
- {
- "class": "monitor_version_var"
- }
- ]
- },
- "tags": [
- "__SCYLLA_VERSION_DOT__"
- ],
- "time": {
- "from": "now-30m",
- "to": "now"
- },
- "title": "Detailed",
- "overwrite": true,
- "version": 5
- }
- }`}}
- scylla-ks.json: |-
- {{`{
- "dashboard": {
- "class": "dashboard",
- "uid": "ks-__SCYLLA_VERSION_DASHED__",
- "rows": [
- {
- "class": "row",
- "panels": [
- {
- "class":"text_panel",
- "options": {
- "mode": "html",
- "content": "
[[cluster]]Keyspace and table level metrics are not available. To enable, set enable_keyspace_column_family_metrics to true in scylla.yaml. Note this has a significant effect on the monitoring stack sizing.
"
- },
- "gridPos":{
- "x":0,
- "y":1,
- "w":24,
- "h":3
- }
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "repeat":"table",
- "collapsed": true,
- "title": "Latencies - $ks:$table"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_column_family_write_latency_count{ks=\"$ks\", cf=\"$table\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Writes by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "wlatencyaks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|$^\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Average write latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "wlatencyp95ks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"} ",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "95th percentile write latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "wlatencyp99ks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"} ",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "99th percentile write latency by [[by]]"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_column_family_read_latency_count{ks=\"$ks\", cf=\"$table\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Reads by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "rlatencyaks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|$^\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Average read latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "rlatencyp95ks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"} ",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "95th percentile read latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "rlatencyp99ks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"} ",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "99th percentile read latency by [[by]]"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": ""
- }
- ]
- },
- {
- "class": "user_panel_row_header"
- },
- {
- "class": "user_panels_row"
- },
- {
- "class": "monitoring_version_row"
- }
- ],
- "templating": {
- "list": [
- {
- "class":"by_template_var"
- },
- {
- "class": "template_variable_single",
- "label": "cluster",
- "name": "cluster",
- "query": "label_values(scylla_reactor_utilization, cluster)"
- },
- {
- "class": "template_variable_all",
- "label": "dc",
- "name": "dc",
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)"
- },
- {
- "class": "template_variable_all",
- "label": "node",
- "name": "node",
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
- },
- {
- "class": "template_variable_all",
- "label": "shard",
- "name": "shard",
- "query": "label_values(scylla_reactor_utilization,shard)",
- "sort": 3
- },
- {
- "class": "template_variable_single",
- "label": "ks",
- "name": "ks",
- "query": "label_values(scylla_column_family_write_latency_count{cluster=~\"$cluster|$^\"},ks)",
- "sort": 3
- },
- {
- "class": "template_variable_all",
- "label": "table",
- "name": "table",
- "query": "label_values(scylla_column_family_write_latency_count{cluster=~\"$cluster|$^\", ks=\"$ks\"},cf)",
- "sort": 3
- },
- {
- "class": "aggregation_function"
- },
- {
- "class": "template_variable_custom",
- "current": {
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- },
- "name": "scylla_version",
- "options": [
- {
- "selected": true,
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- }
- ],
- "query": "__SCYLLA_VERSION_DOT__"
- },
- {
- "class": "monitor_version_var"
- },
- {
- "allValue": null,
- "datasource": "prometheus",
- "definition": "scylla_column_family_write_latency_count{cluster=~\"$cluster|$^\"}",
- "description": null,
- "error": null,
- "hide": 2,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "no_ks",
- "options": [],
- "query": {
- "query": "scylla_column_family_write_latency_count{cluster=~\"$cluster|$^\"}",
- "refId": "StandardVariableQuery"
- },
- "refresh": 1,
- "regex": "/^(scylla_column_family_write_latency_count)/",
- "skipUrlSync": false,
- "sort": 0,
- "type": "query"
- }
- ]
- },
- "tags": [
- "__SCYLLA_VERSION_DOT__"
- ],
- "time": {
- "from": "now-30m",
- "to": "now"
- },
- "title": "Keyspace",
- "overwrite": true,
- "version": 5
- }
- }`}}
- scylla-manager.json: |-
- {{`{
- "dashboard": {
- "class": "dashboard",
- "uid": "ks-__SCYLLA_VERSION_DASHED__",
- "rows": [
- {
- "class": "row",
- "panels": [
- {
- "class":"text_panel",
- "options": {
- "mode": "html",
- "content": "
[[cluster]]Keyspace and table level metrics are not available. To enable, set enable_keyspace_column_family_metrics to true in scylla.yaml. Note this has a significant effect on the monitoring stack sizing.
"
- },
- "gridPos":{
- "x":0,
- "y":1,
- "w":24,
- "h":3
- }
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "repeat":"table",
- "collapsed": true,
- "title": "Latencies - $ks:$table"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_column_family_write_latency_count{ks=\"$ks\", cf=\"$table\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Writes by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "wlatencyaks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|$^\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Average write latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "wlatencyp95ks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"} ",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "95th percentile write latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "wlatencyp99ks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"} ",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "99th percentile write latency by [[by]]"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_column_family_read_latency_count{ks=\"$ks\", cf=\"$table\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[60s])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Reads by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "rlatencyaks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|$^\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Average read latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "rlatencyp95ks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"} ",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "95th percentile read latency by [[by]]"
- },
- {
- "class": "us_panel",
- "span": 3,
- "targets": [
- {
- "expr": "rlatencyp99ks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"} ",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "99th percentile read latency by [[by]]"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": ""
- }
- ]
- },
- {
- "class": "user_panel_row_header"
- },
- {
- "class": "user_panels_row"
- },
- {
- "class": "monitoring_version_row"
- }
- ],
- "templating": {
- "list": [
- {
- "class":"by_template_var"
- },
- {
- "class": "template_variable_single",
- "label": "cluster",
- "name": "cluster",
- "query": "label_values(scylla_reactor_utilization, cluster)"
- },
- {
- "class": "template_variable_all",
- "label": "dc",
- "name": "dc",
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)"
- },
- {
- "class": "template_variable_all",
- "label": "node",
- "name": "node",
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
- },
- {
- "class": "template_variable_all",
- "label": "shard",
- "name": "shard",
- "query": "label_values(scylla_reactor_utilization,shard)",
- "sort": 3
- },
- {
- "class": "template_variable_single",
- "label": "ks",
- "name": "ks",
- "query": "label_values(scylla_column_family_write_latency_count{cluster=~\"$cluster|$^\"},ks)",
- "sort": 3
- },
- {
- "class": "template_variable_all",
- "label": "table",
- "name": "table",
- "query": "label_values(scylla_column_family_write_latency_count{cluster=~\"$cluster|$^\", ks=\"$ks\"},cf)",
- "sort": 3
- },
- {
- "class": "aggregation_function"
- },
- {
- "class": "template_variable_custom",
- "current": {
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- },
- "name": "scylla_version",
- "options": [
- {
- "selected": true,
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- }
- ],
- "query": "__SCYLLA_VERSION_DOT__"
- },
- {
- "class": "monitor_version_var"
- },
- {
- "allValue": null,
- "datasource": "prometheus",
- "definition": "scylla_column_family_write_latency_count{cluster=~\"$cluster|$^\"}",
- "description": null,
- "error": null,
- "hide": 2,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "no_ks",
- "options": [],
- "query": {
- "query": "scylla_column_family_write_latency_count{cluster=~\"$cluster|$^\"}",
- "refId": "StandardVariableQuery"
- },
- "refresh": 1,
- "regex": "/^(scylla_column_family_write_latency_count)/",
- "skipUrlSync": false,
- "sort": 0,
- "type": "query"
- }
- ]
- },
- "tags": [
- "__SCYLLA_VERSION_DOT__"
- ],
- "time": {
- "from": "now-30m",
- "to": "now"
- },
- "title": "Keyspace",
- "overwrite": true,
- "version": 5
- }
- }`}}
- os.json: |-
- {{`{
- "dashboard": {
- "class": "dashboard",
- "overwrite": true,
- "rows": [
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": ""
- }
- ]
- },
- {
- "class": "logo_row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "collapsible_row_panel",
- "title": ""
- }
- ]
- },
- {
- "class": "row",
- "height": "200px",
- "panels": [
- {
- "class": "piechart_panel_percent",
- "height": "250px",
- "repeat": "node",
- "targets": [
- {
- "expr": "sum(node_filesystem_avail_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"})",
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "Free",
- "metric": "",
- "refId": "A",
- "instant": true,
- "step": 7200
- },
- {
- "expr": "(sum(node_filesystem_size_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"})-sum(node_filesystem_avail_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}))",
- "intervalFactor": 1,
- "legendFormat": "Used",
- "refId": "B",
- "instant": true,
- "step": 7200
- }
- ],
- "title": "Total Storage $node"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "id": "auto",
- "gridPos": {
- "h": 1,
- "w": 24
- },
- "panels": [],
- "repeat": "mount_point",
- "title": "Partition $mount_point",
- "type": "row"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "percentunit_panel",
- "span": 3,
- "targets": [
- {
- "expr": "1-sum(node_filesystem_avail_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])/sum(node_filesystem_size_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Used disk by $by"
- },
- {
- "class": "bytes_panel",
- "span": 3,
- "targets": [
- {
- "expr": "sum(node_filesystem_size_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])-sum(node_filesystem_avail_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "sum(node_filesystem_size{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])-sum(node_filesystem_avail{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "B",
- "step": 1
- }
- ],
- "title": "Used Bytes by $by"
- },
- {
- "class": "bytes_panel",
- "span": 3,
- "targets": [
- {
- "expr": "sum(node_filesystem_free_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Free Bytes by $by"
- },
- {
- "class": "graph_panel",
- "span": 3,
- "targets": [
- {
- "expr": "sum(node_filesystem_files{mountpoint=\"$mount_point\", instance=~\"$node\"}- node_filesystem_files_free{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Number of files by $by"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "id": "auto",
- "gridPos": {
- "h": 1,
- "w": 24
- },
- "panels": [],
- "repeat": "monitor_disk",
- "title": "Disk $monitor_disk",
- "type": "row"
- }
- ]
- },
- {
- "class": "row",
- "gridPos": {
- "h": 2
- },
- "height": "25px",
- "panels": [
- {
- "class": "text_panel",
- "content": "Disk $monitor_disk
",
- "style": {}
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "wps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(node_disk_writes_completed_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "sum(rate(node_disk_writes_completed{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "B",
- "step": 1
- }
- ],
- "title": "Disk Writes per $by"
- },
- {
- "class": "rps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(node_disk_reads_completed_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "sum(rate(node_disk_reads_completed{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "B",
- "step": 1
- }
- ],
- "title": "Disk Reads per $by"
- },
- {
- "class": "bps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(node_disk_written_bytes_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "sum(rate(node_disk_bytes_written{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "B",
- "step": 1
- }
- ],
- "title": "Disk Writes Bps per $by"
- },
- {
- "class": "bps_panel",
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(node_disk_read_bytes_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "sum(rate(node_disk_bytes_read{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "B",
- "step": 1
- }
- ],
- "title": "Disk Read Bps per $by"
- },
- {
- "class": "seconds_panel",
- "span": 3,
- "targets": [
- {
- "expr": "rate(node_disk_read_time_seconds_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])/rate(node_disk_reads_completed_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "The average read time from disk",
- "title": "Read AWait per instance"
- },
- {
- "class": "seconds_panel",
- "span": 3,
- "targets": [
- {
- "expr": "rate(node_disk_write_time_seconds_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])/rate(node_disk_writes_completed_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "The average write time to disk",
- "title": "Write AWait per instance"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "id": "auto",
- "gridPos": {
- "h": 1,
- "w": 24
- },
- "panels": [],
- "repeat": "monitor_network_interface",
- "title": "Network Interface $monitor_network_interface",
- "type": "row"
- }
- ]
- },
- {
- "class": "row",
- "gridPos": {
- "h": 2
- },
- "height": "25px",
- "panels": [
- {
- "class": "text_panel",
- "content": "Network $monitor_network_interface
",
- "style": {}
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "pps_panel",
- "span": 6,
- "targets": [
- {
- "expr": "sum(rate(node_network_receive_packets_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "sum(rate(node_network_receive_packets{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "B",
- "step": 1
- }
- ],
- "title": "Interface Rx Packets"
- },
- {
- "class": "pps_panel",
- "span": 6,
- "targets": [
- {
- "expr": "sum(rate(node_network_transmit_packets_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "sum(rate(node_network_transmit_packets{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "B",
- "step": 1
- }
- ],
- "title": "Interface Tx Packets"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "bps_panel",
- "span": 6,
- "targets": [
- {
- "expr": "sum(rate(node_network_receive_bytes_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "sum(rate(node_network_receive_bytes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "B",
- "step": 1
- }
- ],
- "title": "Interface Rx Bps"
- },
- {
- "class": "bps_panel",
- "span": 6,
- "targets": [
- {
- "expr": "sum(rate(node_network_transmit_bytes_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "sum(rate(node_network_transmit_bytes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "B",
- "step": 1
- }
- ],
- "title": "Interface Tx Bps"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "id": "auto",
- "gridPos": {
- "h": 1,
- "w": 24
- },
- "panels": [],
- "title": "CPU and Memory",
- "type": "row"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "bps_panel",
- "span": 3,
- "description": "The available memory, note that in a production environment we expect this to be low, Scylla would use most of the available memory when possible",
- "targets": [
- {
- "expr": "sum(node_memory_MemAvailable_bytes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Available memory"
- },
- {
- "class": "percentunit_panel",
- "span": 3,
- "description": "Percent of available memory, note that in a production environment we expect this to be low, Scylla would use most of the available memory when possible",
- "targets": [
- {
- "expr": "sum(node_memory_MemAvailable_bytes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by ([[by]])/sum(node_memory_MemTotal_bytes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Available memory"
- },
- {
- "class": "percentunit_panel",
- "span": 3,
- "description": "Percent of CPU used, note that in production Scylla would try to use most of the CPU and this is not a problem",
- "targets": [
- {
- "expr": "1-sum(rate(node_cpu_seconds_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", mode=\"idle\"}[3m])) by ([[by]])/count(node_cpu_seconds_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", mode=\"idle\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "CPU used"
- },
- {
- "class": "graph_panel",
- "span": 3,
- "description": "CPU frequency should be set for performance.\n\n The current frequency should match the max frequency. If that is not the case, check your host configuration.",
- "targets": [
- {
- "expr": "max(node_cpu_scaling_frequency_max_hertz{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\"}) or on() max(node_cpu_frequency_max_hertz{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\"})",
- "intervalFactor": 1,
- "legendFormat": "Max",
- "metric": "",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "min(node_cpu_scaling_frequency_hertz{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by ([[by]]) or on() min(node_cpu_frequency_hertz{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "B",
- "step": 1
- }
- ],
- "seriesOverrides": [
- {
- "$$hashKey": "object:211",
- "alias": "Max",
- "color": "#F2495C"
- }
- ],
- "fieldConfig": {
- "defaults": {
- "links": [],
- "unit": "hertz"
- },
- "overrides": []
- },
- "title": "CPU Frequency"
- }
- ],
- "title": "New row"
- },
- {
- "class": "monitoring_version_row"
- }
- ],
- "tags": [
- "__SCYLLA_VERSION_DOT__"
- ],
- "templating": {
- "list": [
- {
- "class": "template_variable_single",
- "label": "cluster",
- "name": "cluster",
- "query": "label_values(node_filesystem_avail_bytes, cluster)"
- },
- {
- "class": "template_variable_all",
- "label": "dc",
- "name": "dc",
- "query": "label_values(node_filesystem_avail_bytes{cluster=~\"$cluster\"}, dc)"
- },
- {
- "class": "template_variable_all",
- "label": "node",
- "name": "node",
- "query": "label_values(node_filesystem_avail_bytes{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
- },
- {
- "allValue": null,
- "current": {
- "tags": [],
- "text": "Instance",
- "value": "instance"
- },
- "hide": 0,
- "includeAll": false,
- "label": "by",
- "multi": false,
- "name": "by",
- "options": [
- {
- "selected": false,
- "text": "Cluster",
- "value": "cluster"
- },
- {
- "selected": false,
- "text": "DC",
- "value": "dc"
- },
- {
- "selected": true,
- "text": "Instance",
- "value": "instance"
- },
- {
- "selected": true,
- "text": "Shard",
- "value": "instance,cpu"
- }
- ],
- "query": "Cluster,DC,Instance",
- "type": "custom"
- },
- {
- "allValue": null,
- "current": {
- "isNone": true,
- "text": "None",
- "value": ""
- },
- "datasource": "prometheus",
- "hide": 0,
- "includeAll": false,
- "label": null,
- "multi": true,
- "name": "monitor_disk",
- "options": [],
- "query": "node_disk_read_bytes_total",
- "refresh": 2,
- "regex": "/.*device=\"([^\\\"]*)\".*/",
- "sort": 0,
- "tagValuesQuery": "",
- "tags": [],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "current": {
- "isNone": true,
- "text": "None",
- "value": ""
- },
- "datasource": "prometheus",
- "hide": 0,
- "includeAll": false,
- "label": null,
- "multi": true,
- "name": "monitor_network_interface",
- "options": [],
- "query": "node_network_receive_packets_total",
- "refresh": 2,
- "regex": "/.*device=\"([^\\\"]*)\".*/",
- "sort": 0,
- "tagValuesQuery": "",
- "tags": [],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "current": {
- "text": "/var/lib/scylla",
- "value": "/var/lib/scylla"
- },
- "datasource": "prometheus",
- "hide": 0,
- "includeAll": false,
- "label": "Mount path",
- "multi": true,
- "name": "mount_point",
- "options": [],
- "query": "node_filesystem_avail_bytes",
- "refresh": 2,
- "regex": "/mountpoint=\"([^\"]*)\".*/",
- "sort": 0,
- "tagValuesQuery": "",
- "tags": [],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "class": "template_variable_custom",
- "current": {
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- },
- "name": "scylla_version",
- "options": [
- {
- "selected": true,
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- }
- ],
- "query": "__SCYLLA_VERSION_DOT__"
- },
- {
- "class": "monitor_version_var"
- }
- ]
- },
- "annotations" :{
- "list": [
- {
- "builtIn": 1,
- "datasource": "-- Grafana --",
- "enable": true,
- "hide": true,
- "iconColor": "rgba(0, 211, 255, 1)",
- "name": "Annotations & Alerts",
- "type": "dashboard"
- },
- {
- "class" : "annotation_restart"
- },
- {
- "class" : "annotation_stall"
- },
- {
- "class" : "annotation_schema_changed"
- }
- ]
- },
- "time": {
- "from": "now-30m",
- "to": "now"
- },
- "title": "OS Metrics",
- "uid": "OS-__SCYLLA_VERSION_DASHED__",
- "version": 5
- }
- }`}}
- overview.json: |-
- {{`{
- "dashboard": {
- "class": "dashboard",
- "originalTitle": "Scylla Cluster Metrics",
- "overwrite": true,
- "rows": [
- {
- "class": "row",
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "id": "auto",
- "gridPos": {
- "h": 1,
- "w": 24
- },
- "panels": [],
- "title": "Cluster overview $cluster",
- "type": "row"
- }
- ]
- },
- {
- "class" : "small_stat_rows"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "alert_table",
- "span": 4,
- "title": "Active Alerts"
- },
- {
- "class": "ops_panel",
- "span": 2,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "Write attempts - include all writes that reached the coordinator node, even if they will eventually fail",
- "title": "Writes"
- },
- {
- "class": "us_panel",
- "span": 2,
- "targets": [
- {
- "expr": "wlatencyp95{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name=~\"$sg\"}>0",
- "intervalFactor": 1,
- "legendFormat": "{{scheduling_group_name}} 95%",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "wlatencyp99{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name=~\"$sg\"}>0",
- "intervalFactor": 1,
- "legendFormat": "{{scheduling_group_name}} 99%",
- "refId": "B",
- "step": 1
- }
- ],
- "legend": {
- "class": "show_legend"
- },
- "title": "Write Latencies"
- },
- {
- "class": "ops_panel",
- "span": 2,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))",
- "intervalFactor": 1,
- "legendFormat": "Reads",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "Read attempts - include all reads that reached the coordinator node, even if they will eventually fail",
- "title": "Reads"
- },
- {
- "class": "us_panel",
- "span": 2,
- "targets": [
- {
- "expr": "rlatencyp95{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name=~\"$sg\"}>0",
- "intervalFactor": 1,
- "legendFormat": "{{scheduling_group_name}} 95%",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "rlatencyp99{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name=~\"$sg\"}>0",
- "intervalFactor": 1,
- "legendFormat": "{{scheduling_group_name}} 99%",
- "refId": "B",
- "step": 1
- }
- ],
- "legend": {
- "class": "show_legend"
- },
- "title": "Read Latencies"
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "id": "auto",
- "gridPos": {
- "h": 1,
- "w": 24
- },
- "panels": [],
- "title": "",
- "repeat": "dc",
- "type": "row"
- }
- ]
- },
- {
- "class": "header_row",
- "panels": [
- {
- "class": "plain_text",
- "content": "Information for $dc
"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class": "vertical_lcd",
- "targets": [
- {
- "expr": "avg(scylla_reactor_utilization{instance=~\"[[node]]\",cluster=~\"$cluster\", dc=~\"$dc\", shard=~\"[[shard]]\"} )",
- "interval": "",
- "legendFormat": "",
- "instant": true,
- "refId": "A"
- }
- ],
- "title": "Load"
- },
- {
- "class": "bytes_panel",
- "gridPos": {
- "w": 3
- },
- "targets": [
- {
- "expr": "Avg(node_filesystem_size_bytes{mountpoint=\"$mount_point\", dc=~\"$dc\", instance=~\"$node\"}) by ([[by]])-avg(node_filesystem_avail_bytes{mountpoint=\"$mount_point\", dc=~\"$dc\", instance=~\"$node\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Avg Usage {{[[by]]}}",
- "metric": "",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "avg(node_filesystem_size_bytes{mountpoint=\"$mount_point\", dc=~\"$dc\", instance=~\"$node\"}) by ([[by]])",
- "legendFormat": "Size {{[[by]]}}",
- "interval": "",
- "refId": "B"
- }
- ],
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "unit": "bytes"
- },
- "overrides": [
- {
- "matcher": {
- "id": "byFrameRefID",
- "options": "B"
- },
- "properties": [
- {
- "id": "custom.lineStyle",
- "value": {
- "fill": "dash",
- "dash": [
- 10,
- 10
- ]
- }
- },
- {
- "id": "custom.lineWidth",
- "value": 2
- }
- ]
- }
- ]
- },
- "options": {
- "class":"desc_tooltip_options"
- },
- "description": "The average Disk usage per [[by]].\n\n The dashed line represent the total size.",
- "title": "Average Disk Usage"
- },
- {
- "class": "graph_panel_int",
- "span": 2,
- "targets": [
- {
- "expr": "$func(scylla_compaction_manager_compactions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "description": "scylla_compaction_manager_compactions",
- "title": "Running Compactions"
- },
- {
- "class": "ops_panel",
- "description": "The Hits and Misses",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_row_hits{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Hit {{[[by]]}}",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "$func(rate(scylla_cache_row_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Misses {{[[by]]}}",
- "refId": "B",
- "step": 10
- }
- ],
- "legend": {
- "class": "show_legend"
- },
- "title": "Cache Hits/Misses"
- },
- {
- "class":"small_nodes_table",
- "gridPos": {
- "h": 17,
- "w": 10
- }
- },
- {
- "class": "ops_panel",
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or on([[by]]) $func(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Writes",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m] offset 1d)) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m] offset 1d))",
- "legendFormat": "1 Day Ago",
- "interval": "",
- "intervalFactor": 1,
- "refId": "B",
- "step": 1
- },
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m] offset 1w)) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m] offset 1w))",
- "legendFormat": "1 Week Ago",
- "interval": "",
- "intervalFactor": 1,
- "refId": "C",
- "step": 1
- }
- ],
- "legend": {
- "class": "show_legend"
- },
- "seriesOverrides": [
- {
- "alias": "1 Day Ago",
- "dashes": true,
- "dashLength": 4
- },
- {
- "alias": "1 Week Ago",
- "dashes": true,
- "dashLength": 2
- }
- ],
- "description": "Write attempts - include all writes that reached the coordinator node, even if they will eventually fail",
- "title": "Writes"
- },
- {
- "class": "us_panel",
- "span": 2,
- "targets": [
- {
- "expr": "avg(wlatencyp95{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$sg\"}>0) by (scheduling_group_name, [[by]])",
- "intervalFactor": 1,
- "legendFormat": "{{scheduling_group_name}} 95% {{[[by]]}}",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "avg(wlatencyp99{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$sg\"}>0) by (scheduling_group_name, [[by]])",
- "intervalFactor": 1,
- "legendFormat": "{{scheduling_group_name}} 99% {{[[by]]}}",
- "refId": "B",
- "step": 1
- }
- ],
- "legend": {
- "class": "show_legend"
- },
- "title": "Write Latencies"
- },
- {
- "class": "ops_panel",
- "description": "Requests that Scylla tried to write but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
- "span": 2,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_write_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Writes {{[[by]]}}",
- "refId": "A",
- "step": 10
- }
- ],
- "legend": {
- "class": "show_legend"
- },
- "title": "Write Timeouts by [[by]]"
- },
- {
- "class": "ops_panel",
- "span": 3,
- "gridPos": {
- "x": 0
- },
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m])) by ([[by]]) or on([[by]]) $func(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Reads",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m] offset 1d)) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m] offset 1d))",
- "intervalFactor": 1,
- "legendFormat": "1 Day Ago",
- "refId": "B",
- "step": 1
- },
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m] offset 1w)) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m] offset 1w))",
- "intervalFactor": 1,
- "legendFormat": "1 Week Ago",
- "refId": "C",
- "step": 1
- }
- ],
- "legend": {
- "class": "show_legend"
- },
- "seriesOverrides": [
- {
- "alias": "1 Day Ago",
- "dashes": true,
- "dashLength": 4
- },
- {
- "alias": "1 Week Ago",
- "dashes": true,
- "dashLength": 2
- }
- ],
- "description": "Read attempts - include all reads that reached the coordinator node, even if they will eventually fail",
- "title": "Reads"
- },
- {
- "class": "us_panel",
- "span": 2,
- "gridPos": {
- "x": 6
- },
- "targets": [
- {
- "expr": "avg(rlatencyp95{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$sg\"}>0) by(scheduling_group_name, [[by]])",
- "intervalFactor": 1,
- "legendFormat": "{{scheduling_group_name}} 95% {{[[by]]}}",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "avg(rlatencyp99{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$sg\"}>0) by(scheduling_group_name, [[by]])",
- "intervalFactor": 1,
- "legendFormat": "{{scheduling_group_name}} 99% {{[[by]]}}",
- "refId": "B",
- "step": 1
- }
- ],
- "legend": {
- "class": "show_legend"
- },
- "title": "Read Latencies"
- },
- {
- "class": "ops_panel",
- "description": "Requests that Scylla tried to read but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
- "span": 2,
- "gridPos": {
- "x": 10
- },
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_read_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])+rate(scylla_storage_proxy_coordinator_cas_read_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])+rate(scylla_storage_proxy_coordinator_range_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Read {{[[by]]}}",
- "refId": "A",
- "step": 10
- }
- ],
- "legend": {
- "class": "show_legend"
- },
- "title": "Read Timeouts by [[by]]"
- },
- {
- "class": "plain_text",
- "dashproductreject": "no-version-check",
- "gridPos": {
- "w": 10,
- "x": 14,
- "h": 1
- },
- "options": {
- "mode": "html",
- "content": "
"
- }
- },
- {
- "class": "plain_text",
- "dashproduc": "no-version-check",
- "gridPos": {
- "w": 10,
- "x": 14,
- "h": 1
- },
- "options": {
- "mode": "html",
- "content": ""
- }
- }
- ]
- },
- {
- "class": "row",
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "id": "auto",
- "gridPos": {
- "h": 1,
- "w": 24
- },
- "panels": [],
- "title": "",
- "type": "row"
- }
- ]
- },
- {
- "class": "header_row",
- "panels": [
- {
- "class": "plain_text",
- "content": "Advisor
"
- }
- ],
- "title": "New row"
- },
- {
- "class": "row",
- "panels": [
- {
- "class":"advisor_table",
- "dashversion":">4.1"
- },
- {
- "class":"enterprise_advisor_table",
- "dashversion":">2019.1"
- }
- ]
- },
- {
- "class": "user_panels_collapse"
- },
- {
- "class": "user_panel_row_header"
- },
- {
- "class": "user_panels_row"
- },
- {
- "class": "monitoring_version_row"
- }
- ],
- "tags": [
- "__SCYLLA_VERSION_DOT__"
- ],
- "templating": {
- "list": [
- {
- "class": "by_template_var",
- "current": {
- "tags": [],
- "text": "DC",
- "value": "dc"
- },
- "options": [
- {
- "selected": false,
- "text": "Cluster",
- "value": "cluster"
- },
- {
- "selected": true,
- "text": "DC",
- "value": "dc"
- },
- {
- "selected": false,
- "text": "Instance",
- "value": "instance"
- },
- {
- "selected": false,
- "text": "instance,shard",
- "value": "instance,shard"
- }
- ]
- },
- {
- "class": "template_variable_single",
- "label": "cluster",
- "name": "cluster",
- "query": "label_values(scylla_reactor_utilization, cluster)"
- },
- {
- "class": "template_variable_all",
- "label": "dc",
- "name": "dc",
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)"
- },
- {
- "class": "template_variable_all",
- "label": "node",
- "name": "node",
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
- },
- {
- "class": "template_variable_all",
- "label": "shard",
- "name": "shard",
- "allValue":".+",
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\"},shard)",
- "sort": 3
- },
- {
- "class": "template_variable_single",
- "current": {
- "text": "/var/lib/scylla",
- "value": "/var/lib/scylla"
- },
- "label": "Mount path",
- "name": "mount_point",
- "query": "node_filesystem_avail_bytes",
- "regex": "/mountpoint=\"([^\"]*)\".*/",
- "sort": 0
- },
- {
- "class": "template_variable_single",
- "current": {
- "selected": true,
- "text": [
- "statement"
- ],
- "value": [
- "statement"
- ]
- },
- "label": "SG",
- "name": "sg",
- "includeAll":true,
- "multi":true,
- "dashversion":[">4.3"],
- "query": "label_values(rlatencyp99{cluster=~\"$cluster\", scheduling_group_name!~\"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache\"},scheduling_group_name)",
- "sort": 3
- },
- {
- "class": "template_variable_single",
- "dashversion":[">2019.1"],
- "current": {
- "selected": true,
- "text": [
- "sl:default"
- ],
- "value": [
- "sl:default"
- ]
- },
- "label": "SG",
- "name": "sg",
- "includeAll":true,
- "multi":true,
- "query": "label_values(rlatencyp99{cluster=~\"$cluster\", scheduling_group_name!~\"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache\"},scheduling_group_name)",
- "sort": 3
- },
- {
- "class": "aggregation_function"
- },
- {
- "class": "template_variable_custom",
- "current": {
- "text": "__SCYLLA_VERSION_DASHED__",
- "value": "__SCYLLA_VERSION_DASHED__"
- },
- "name": "dash_version",
- "options": [
- {
- "selected": true,
- "text": "__SCYLLA_VERSION_DASHED__",
- "value": "__SCYLLA_VERSION_DASHED__"
- }
- ],
- "query": "__SCYLLA_VERSION_DASHED__"
- },
- {
- "class": "template_variable_all",
- "hide":2,
- "name": "all_scyllas_versions",
- "current":{
- "selected":true,
- "text":[
- "All"
- ],
- "value":[
- "$__all"
- ]
- },
- "query": "label_values(scylla_scylladb_current_version{cluster=~\"$cluster|$^\"}, version)"
- },
- {
- "class": "template_variable_all",
- "hide":2,
- "name": "count_dc",
- "definition": "query_result(count(up{job=\"scylla\"}) by (dc))",
- "query": {
- "query": "query_result(count(up{job=\"scylla\"}) by (dc))",
- "refId": "StandardVariableQuery"
- },
- "regex": "/(?\\{dc=\"[^\"]+\".* \\d+) .*/"
- },
- {
- "class": "template_variable_custom",
- "current": {
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- },
- "name": "scylla_version",
- "options": [
- {
- "selected": true,
- "text": "__SCYLLA_VERSION_DOT__",
- "value": "__SCYLLA_VERSION_DOT__"
- }
- ],
- "query": "__SCYLLA_VERSION_DOT__"
- },
- {
- "class": "monitor_version_var"
- }
- ]
- },
- "time": {
- "from": "now-30m",
- "to": "now"
- },
- "title": "Overview",
- "uid": "overview-__SCYLLA_VERSION_DASHED__",
- "version": 1
- }
- }`}}
diff --git a/assets/monitoring/grafana/v1alpha1/dashboards-saas.cm.yaml b/assets/monitoring/grafana/v1alpha1/dashboards-saas.cm.yaml
deleted file mode 100644
index ac7949e6bf7..00000000000
--- a/assets/monitoring/grafana/v1alpha1/dashboards-saas.cm.yaml
+++ /dev/null
@@ -1,4686 +0,0 @@
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: '{{ .scyllaDBMonitoringName }}-grafana-scylladb-dashboards'
-data:
- overview.json: |-
- {{`{
- "annotations": {
- "list": [
- {
- "builtIn": 1,
- "datasource": "-- Grafana --",
- "enable": true,
- "hide": true,
- "iconColor": "rgba(0, 211, 255, 1)",
- "name": "Annotations & Alerts",
- "type": "dashboard"
- },
- {
- "class": "annotation_manager_task",
- "datasource": "prometheus",
- "enable": true,
- "expr": "scylla_manager_task_active_count{type=~\"repair|backup\",cluster=~\"$cluster|$^\"}>0",
- "hide": false,
- "iconColor": "#73BF69",
- "limit": 100,
- "name": "Task",
- "showIn": 0,
- "tagKeys": "type",
- "tags": [],
- "titleFormat": "Running",
- "type": "tags"
- },
- {
- "class": "mv_building",
- "datasource": "prometheus",
- "enable": true,
- "expr": "sum(scylla_view_builder_builds_in_progress)>0",
- "hide": false,
- "iconColor": "rgb(50, 176, 0, 128)",
- "limit": 100,
- "name": "MV",
- "showIn": 0,
- "tagKeys": "instance,dc,cluster",
- "tags": [],
- "titleFormat": "Materialized View built",
- "type": "tags"
- },
- {
- "class": "ops_annotation",
- "datasource": "prometheus",
- "enable": true,
- "expr": "10*min(scylla_node_ops_finished_percentage) by (ops, dc,instance) < 10",
- "hide": false,
- "iconColor": "rgb(50, 176, 0, 128)",
- "limit": 100,
- "name": "ops",
- "showIn": 0,
- "tagKeys": "ops,dc,instance",
- "tags": [],
- "titleFormat": "Operation",
- "type": "tags"
- },
- {
- "class": "annotation_schema_changed",
- "datasource": "prometheus",
- "enable": false,
- "expr": "changes(scylla_database_schema_changed[$__rate_interval])>0",
- "hide": false,
- "iconColor": "rgba(255, 96, 96, 1)",
- "limit": 100,
- "name": "Schema Changed",
- "showIn": 0,
- "tagKeys": "instance,dc,cluster",
- "tags": [],
- "titleFormat": "schema changed",
- "type": "tags"
- }
- ]
- },
- "class": "dashboard",
- "editable": true,
- "gnetId": null,
- "graphTooltip": 1,
- "hideControls": true,
- "id": null,
- "links": [
- {
- "asDropdown": true,
- "icon": "external link",
- "includeVars": true,
- "keepTime": true,
- "tags": [],
- "type": "dashboards"
- }
- ],
- "originalTitle": "Scylla Cluster Metrics",
- "overwrite": true,
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 0
- },
- "id": 1,
- "panels": [],
- "title": "Cluster overview $cluster",
- "type": "row"
- },
- {
- "class": "small_stat",
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "decimals": 1,
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- }
- ]
- },
- "unit": "si:"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 3,
- "x": 0,
- "y": 1
- },
- "id": 2,
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "sum(rate(scylla_transport_requests_served{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[60s])) + (sum(rate(scylla_thrift_served{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[60s])) or on() vector(0))",
- "instant": true,
- "intervalFactor": 1,
- "refId": "A",
- "step": 40
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Requests/s",
- "type": "stat"
- },
- {
- "class": "small_stat",
- "datasource": "prometheus",
- "description": "Average Write Latency",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "decimals": 0,
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 50000
- }
- ]
- },
- "unit": "\u00b5s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 3,
- "y": 1
- },
- "id": 3,
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "avg(wlatencya{by=\"cluster\", cluster=~\"$cluster|^$\",scheduling_group_name!=\"streaming\"}>0)",
- "instant": true,
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 4
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Avg Write",
- "type": "stat"
- },
- {
- "class": "small_stat",
- "datasource": "prometheus",
- "description": "99% write Latency",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "decimals": 0,
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 100000
- }
- ]
- },
- "unit": "\u00b5s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 5,
- "y": 1
- },
- "id": 4,
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "avg(wlatencyp95{by=\"cluster\", cluster=~\"$cluster|^$\",scheduling_group_name!=\"streaming\"}>0)",
- "instant": true,
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 4
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "95% Write",
- "type": "stat"
- },
- {
- "class": "small_stat",
- "datasource": "prometheus",
- "description": "99% write Latency",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "decimals": 0,
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 100000
- }
- ]
- },
- "unit": "\u00b5s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 7,
- "y": 1
- },
- "id": 5,
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "avg(wlatencyp99{by=\"cluster\", cluster=~\"$cluster|^$\",scheduling_group_name!=\"streaming\"}>0)",
- "instant": true,
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 4
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "99% Write",
- "type": "stat"
- },
- {
- "class": "small_stat",
- "datasource": "prometheus",
- "description": "Average Read Latency",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "decimals": 0,
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 50000
- }
- ]
- },
- "unit": "\u00b5s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 9,
- "y": 1
- },
- "id": 6,
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "avg(rlatencya{by=\"cluster\", cluster=~\"$cluster|^$\",scheduling_group_name!=\"streaming\"}>0)",
- "instant": true,
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 4
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Avg Read",
- "type": "stat"
- },
- {
- "class": "small_stat",
- "datasource": "prometheus",
- "description": "99% read Latency",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "decimals": 0,
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 100000
- }
- ]
- },
- "unit": "\u00b5s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 11,
- "y": 1
- },
- "id": 7,
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "avg(rlatencyp95{by=\"cluster\", cluster=~\"$cluster|^$\",scheduling_group_name!=\"streaming\"}>0)",
- "instant": true,
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 4
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "95% Read",
- "type": "stat"
- },
- {
- "class": "small_stat",
- "datasource": "prometheus",
- "description": "99% read Latency",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "decimals": 0,
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 100000
- }
- ]
- },
- "unit": "\u00b5s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 13,
- "y": 1
- },
- "id": 8,
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "avg(rlatencyp99{by=\"cluster\", cluster=~\"$cluster|^$\",scheduling_group_name!=\"streaming\"}>0)",
- "instant": true,
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 4
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "99% Read",
- "type": "stat"
- },
- {
- "class": "small_stat",
- "datasource": "prometheus",
- "description": "The percentage of the time during which Scylla utilized the CPU. Note that because Scylla does busy polling for some time before going idle, CPU utilization as seen by the operating system may be much higher. Your system is not yet CPU-bottlenecked until this metric is high.",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "decimals": 0,
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "percent"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 15,
- "y": 1
- },
- "id": 9,
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "avg(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"} )",
- "instant": true,
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 4
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Load",
- "type": "stat"
- },
- {
- "class": "small_stat",
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 1
- }
- ]
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 17,
- "y": 1
- },
- "id": 10,
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "$func(rate(scylla_database_total_reads_failed{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
- "instant": true,
- "intervalFactor": 1,
- "refId": "A",
- "step": 40
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "R Failed",
- "type": "stat"
- },
- {
- "class": "small_stat",
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 1
- }
- ]
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 19,
- "y": 1
- },
- "id": 11,
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "$func(rate(scylla_database_total_writes_failed{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
- "instant": true,
- "intervalFactor": 1,
- "refId": "A",
- "step": 40
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "W Failed",
- "type": "stat"
- },
- {
- "class": "small_stat",
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 1
- }
- ]
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 21,
- "y": 1
- },
- "id": 12,
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "sum(rate(scylla_storage_proxy_coordinator_write_timeouts{cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m]))",
- "instant": true,
- "intervalFactor": 1,
- "refId": "A",
- "step": 40
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Timeouts",
- "type": "stat"
- },
- {
- "class": "alert_table",
- "columns": [],
- "datasource": "alertmanager",
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "thresholds"
- },
- "custom": {
- "align": null,
- "filterable": false
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute"
- }
- },
- "overrides": [
- {
- "matcher": {
- "id": "byName",
- "options": "Time"
- },
- "properties": [
- {
- "id": "custom.width",
- "value": 150
- }
- ]
- },
- {
- "matcher": {
- "id": "byName",
- "options": "instance"
- },
- "properties": [
- {
- "id": "custom.width",
- "value": 100
- }
- ]
- }
- ]
- },
- "fontSize": "100%",
- "gridPos": {
- "h": 6,
- "w": 8,
- "x": 0,
- "y": 5
- },
- "id": 13,
- "links": [],
- "options": {
- "showHeader": true
- },
- "pageSize": null,
- "scroll": true,
- "showHeader": true,
- "sort": {
- "col": 0,
- "desc": true
- },
- "span": 4,
- "targets": [
- {
- "active": true,
- "annotations": true,
- "filters": "job!=\"scylla_manager\",advisor=\"\"",
- "legendFormat": "{{description}}",
- "refId": "A",
- "target": "Query"
- }
- ],
- "title": "Active Alerts",
- "transform": "table",
- "transformations": [
- {
- "id": "filterFieldsByName",
- "options": {
- "include": {
- "names": [
- "Time",
- "summary",
- "instance"
- ]
- }
- }
- },
- {
- "id": "organize",
- "options": {
- "excludeByName": {},
- "indexByName": {
- "Time": 0,
- "instance": 1,
- "summary": 2
- },
- "renameByName": {}
- }
- }
- ],
- "type": "table"
- },
- {
- "class": "ops_panel",
- "datasource": "prometheus",
- "description": "Write attempts - include all writes that reached the coordinator node, even if they will eventually fail",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "si:ops/s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 4,
- "x": 8,
- "y": 5
- },
- "id": 14,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 2,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]))",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Writes",
- "type": "timeseries"
- },
- {
- "class": "us_panel",
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "\u00b5s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 4,
- "x": 12,
- "y": 5
- },
- "id": 15,
- "isNew": true,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "class": "show_legend",
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "total": false,
- "values": false
- },
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 2,
- "targets": [
- {
- "expr": "avg(wlatencyp95{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)",
- "intervalFactor": 1,
- "legendFormat": "95%",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "avg(wlatencyp99{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)",
- "intervalFactor": 1,
- "legendFormat": "99%",
- "refId": "B",
- "step": 1
- }
- ],
- "title": "Write Latencies",
- "type": "timeseries"
- },
- {
- "class": "ops_panel",
- "datasource": "prometheus",
- "description": "Read attempts - include all reads that reached the coordinator node, even if they will eventually fail",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "si:ops/s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 4,
- "x": 16,
- "y": 5
- },
- "id": 16,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 2,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]))",
- "intervalFactor": 1,
- "legendFormat": "Reads",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Reads",
- "type": "timeseries"
- },
- {
- "class": "us_panel",
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "\u00b5s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 4,
- "x": 20,
- "y": 5
- },
- "id": 17,
- "isNew": true,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "class": "show_legend",
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "total": false,
- "values": false
- },
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 2,
- "targets": [
- {
- "expr": "avg(rlatencyp95{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)",
- "intervalFactor": 1,
- "legendFormat": "95%",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "avg(rlatencyp99{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)",
- "intervalFactor": 1,
- "legendFormat": "99%",
- "refId": "B",
- "step": 1
- }
- ],
- "title": "Read Latencies",
- "type": "timeseries"
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 11
- },
- "id": 18,
- "panels": [],
- "title": "",
- "type": "row"
- },
- {
- "class": "plain_text",
- "content": "Advisor
",
- "datasource": null,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {}
- },
- "overrides": []
- },
- "gridPos": {
- "h": 2,
- "w": 24,
- "x": 0,
- "y": 12
- },
- "id": 19,
- "isNew": true,
- "links": [],
- "mode": "html",
- "options": {},
- "span": 12,
- "style": {},
- "title": "",
- "transparent": true,
- "type": "text"
- },
- {
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "custom": {}
- },
- "overrides": []
- },
- "gridPos": {
- "h": 2,
- "w": 12,
- "x": 12,
- "y": 14
- },
- "id": 20,
- "options": {
- "content": "Balance
\nAn Imbalance between shards or nodes may indicates a potential problem",
- "mode": "html"
- },
- "pluginVersion": "7.3.4",
- "targets": [
- {
- "queryType": "randomWalk",
- "refId": "A"
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "",
- "type": "text"
- },
- {
- "class": "advisor_table",
- "dashversion": ">4.1",
- "datasource": "alertmanager",
- "fieldConfig": {
- "defaults": {
- "custom": {
- "align": null,
- "filterable": false
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- }
- },
- "overrides": [
- {
- "matcher": {
- "id": "byName",
- "options": "dashboard"
- },
- "properties": [
- {
- "id": "links",
- "value": [
- {
- "title": "",
- "url": "/d/${__data.fields.dashboard}-[[dash_version]]?refresh=30s&orgId=1&var-by=instance&from=${__from}&to=${__to}"
- }
- ]
- },
- {
- "id": "custom.width",
- "value": 100
- }
- ]
- },
- {
- "matcher": {
- "id": "byName",
- "options": "advisor"
- },
- "properties": [
- {
- "id": "custom.width",
- "value": 120
- },
- {
- "id": "displayName",
- "value": "Category"
- }
- ]
- },
- {
- "matcher": {
- "id": "byName",
- "options": "severity"
- },
- "properties": [
- {
- "id": "links",
- "value": [
- {
- "targetBlank": true,
- "title": "Open an issue",
- "url": "https://github.com/scylladb/scylla/issues/new?body=description%3D${__data.fields[4]}%0ASource%3DAdvisor%0AScylla-versions%3D${all_scyllas_versions}%0Ascylla-monitoring%3D${monitoring_version}%0Acluster%3D${count_dc}%0Aname%3D${cluster}%0A%0A"
- }
- ]
- },
- {
- "id": "mappings",
- "value": [
- {
- "from": "0",
- "id": 1,
- "text": "\ud83d\udd14",
- "to": "10",
- "type": 2,
- "value": ""
- }
- ]
- },
- {
- "id": "displayName",
- "value": "Report"
- },
- {
- "id": "custom.width",
- "value": 65
- }
- ]
- },
- {
- "matcher": {
- "id": "byName",
- "options": "summary"
- },
- "properties": [
- {
- "id": "links",
- "value": [
- {
- "targetBlank": true,
- "title": "${__data.fields.description}\n\n click for more information",
- "url": "https://monitoring.docs.scylladb.com/branch-master/use-monitoring/advisor/${__data.fields.alertname}"
- }
- ]
- }
- ]
- },
- {
- "matcher": {
- "id": "byName",
- "options": "alertname"
- },
- "properties": [
- {
- "id": "displayName",
- "value": "."
- },
- {
- "id": "custom.width",
- "value": 1
- }
- ]
- },
- {
- "matcher": {
- "id": "byName",
- "options": "description"
- },
- "properties": [
- {
- "id": "displayName",
- "value": "."
- },
- {
- "id": "custom.width",
- "value": 1
- }
- ]
- },
- {
- "matcher": {
- "id": "byName",
- "options": "Time"
- },
- "properties": [
- {
- "id": "custom.width",
- "value": 150
- }
- ]
- }
- ]
- },
- "gridPos": {
- "h": 6,
- "w": 12,
- "x": 0,
- "y": 16
- },
- "id": 21,
- "links": [],
- "options": {
- "showHeader": true
- },
- "pluginVersion": "7.3.4",
- "targets": [
- {
- "active": true,
- "annotations": true,
- "filters": "advisor!=\"\"",
- "legendFormat": "{{description}}",
- "refId": "A",
- "target": "Query"
- }
- ],
- "title": "",
- "transformations": [
- {
- "id": "filterFieldsByName",
- "options": {
- "include": {
- "names": [
- "advisor",
- "dashboard",
- "description",
- "severity",
- "alertname",
- "summary",
- "Time"
- ]
- }
- }
- },
- {
- "id": "organize",
- "options": {
- "excludeByName": {},
- "indexByName": {
- "Time": 1,
- "advisor": 2,
- "dashboard": 3,
- "severity": 0,
- "summary": 4
- },
- "renameByName": {}
- }
- }
- ],
- "type": "table"
- },
- {
- "class": "small_stat_error",
- "datasource": "prometheus",
- "description": "",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "mappings": [
- {
- "from": "-1000",
- "id": 1,
- "text": "\u2713",
- "to": "0.001",
- "type": 2,
- "value": ""
- },
- {
- "from": "0.001",
- "id": 2,
- "text": "\u26a0",
- "to": "10000",
- "type": 2,
- "value": "0.001"
- }
- ],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "orange",
- "value": 0.001
- }
- ]
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 12,
- "y": 16
- },
- "id": 22,
- "links": [
- {
- "title": "The number of connections per shard should be balanced"
- }
- ],
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "max(abs(sum(scylla_transport_current_connections{cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by (instance,shard)-scalar(avg(scylla_transport_current_connections{cluster=~\"$cluster|$^\", dc=~\"$dc\"})))) - 8",
- "hide": false,
- "refId": "A"
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Connections",
- "transformations": [
- {
- "id": "calculateField",
- "options": {
- "mode": "reduceRow",
- "reduce": {
- "reducer": "max"
- },
- "replaceFields": true
- }
- }
- ],
- "type": "stat"
- },
- {
- "class": "small_stat_error",
- "datasource": "prometheus",
- "description": "",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "mappings": [
- {
- "from": "-1000",
- "id": 1,
- "text": "\u2713",
- "to": "0.001",
- "type": 2,
- "value": ""
- },
- {
- "from": "0.001",
- "id": 2,
- "text": "\u26a0",
- "to": "10000",
- "type": 2,
- "value": "0.001"
- }
- ],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "orange",
- "value": 0.001
- }
- ]
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 14,
- "y": 16
- },
- "id": 23,
- "links": [
- {
- "title": "Indicates that the number of CQL operations (inserts, updates, deletes, reads) is not balance between shards in one of the nodes"
- }
- ],
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "max(abs(rate(scylla_cql_updates{conditional=\"no\", dc=~\"$dc\"}[1m]) - on(dc) group_left avg(rate(scylla_cql_updates{conditional=\"no\", dc=~\"$dc\"}[1m])) by (dc))/on(dc) group_left sum(stddev(rate(scylla_cql_updates{conditional=\"no\", dc=~\"$dc\"}[1m])) by(dc)+100) by(dc))-3",
- "format": "time_series",
- "hide": false,
- "interval": "",
- "legendFormat": "",
- "refId": "A"
- },
- {
- "expr": "max(abs(rate(scylla_cql_inserts{conditional=\"no\", dc=~\"$dc\"}[1m]) - on(dc) group_left avg(rate(scylla_cql_inserts{conditional=\"no\", dc=~\"$dc\"}[1m])) by (dc))/on(dc) group_left sum(stddev(rate(scylla_cql_inserts{conditional=\"no\", dc=~\"$dc\"}[1m])) by(dc)+100) by(dc))-3",
- "hide": false,
- "interval": "",
- "legendFormat": "",
- "refId": "B"
- },
- {
- "expr": "max(abs(rate(scylla_cql_reads{ dc=~\"$dc\"}[1m]) - on(dc) group_left avg(rate(scylla_cql_reads{ dc=~\"$dc\"}[1m])) by (dc))/on(dc) group_left sum(stddev(rate(scylla_cql_reads{ dc=~\"$dc\"}[1m])) by(dc)+100) by(dc))-3",
- "hide": false,
- "interval": "",
- "legendFormat": "",
- "refId": "C"
- },
- {
- "expr": "max(abs(rate(scylla_cql_deletes{conditional=\"no\", dc=~\"$dc\"}[1m]) - on(dc) group_left avg(rate(scylla_cql_deletes{conditional=\"no\", dc=~\"$dc\"}[1m])) by (dc))/on(dc) group_left sum(stddev(rate(scylla_cql_deletes{conditional=\"no\", dc=~\"$dc\"}[1m])) by(dc)+100) by(dc))-3",
- "hide": false,
- "interval": "",
- "legendFormat": "",
- "refId": "D"
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "CQL OPs",
- "transformations": [
- {
- "id": "calculateField",
- "options": {
- "mode": "reduceRow",
- "reduce": {
- "reducer": "max"
- },
- "replaceFields": true
- }
- }
- ],
- "type": "stat"
- },
- {
- "class": "small_stat_error",
- "datasource": "prometheus",
- "description": "",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "mappings": [
- {
- "from": "-1000",
- "id": 1,
- "text": "\u2713",
- "to": "0.001",
- "type": 2,
- "value": ""
- },
- {
- "from": "0.001",
- "id": 2,
- "text": "\u26a0",
- "to": "10000",
- "type": 2,
- "value": "0.001"
- }
- ],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "orange",
- "value": 0.001
- }
- ]
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 16,
- "y": 16
- },
- "id": 24,
- "links": [
- {
- "title": "A single node with higher latency is an indication for a node related issue"
- }
- ],
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "((max(wlatencyp99{by=\"instance\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"})-scalar(avg(wlatencyp99{by=\"instance\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)))/(scalar(stddev(wlatencyp99{by=\"instance\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0))+100)-3)",
- "legendFormat": "",
- "refId": "A"
- },
- {
- "expr": "((max(rlatencyp99{by=\"instance\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"})-scalar(avg(rlatencyp99{by=\"instance\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)))/(scalar(stddev(rlatencyp99{by=\"instance\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0))+100)-3)",
- "legendFormat": "",
- "refId": "B"
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Node Latency",
- "transformations": [
- {
- "id": "calculateField",
- "options": {
- "mode": "reduceRow",
- "reduce": {
- "reducer": "max"
- },
- "replaceFields": true
- }
- }
- ],
- "type": "stat"
- },
- {
- "class": "small_stat_error",
- "datasource": "prometheus",
- "description": "",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "mappings": [
- {
- "from": "-1000",
- "id": 1,
- "text": "\u2713",
- "to": "0.001",
- "type": 2,
- "value": ""
- },
- {
- "from": "0.001",
- "id": 2,
- "text": "\u26a0",
- "to": "10000",
- "type": 2,
- "value": "0.001"
- }
- ],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "orange",
- "value": 0.001
- }
- ]
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 18,
- "y": 16
- },
- "id": 25,
- "links": [
- {
- "title": "A single shard with high latency is an indication of a hot-partition, or a large row/cell/partition"
- }
- ],
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "((max(wlatencyp99{by=\"instance,shard\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"})-scalar(avg(wlatencyp99{by=\"instance,shard\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)))/(scalar(stddev(wlatencyp99{by=\"instance,shard\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0))+100)-3)",
- "legendFormat": "",
- "refId": "A"
- },
- {
- "expr": "((max(rlatencyp99{by=\"instance,shard\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"})-scalar(avg(rlatencyp99{by=\"instance,shard\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)))/(scalar(stddev(rlatencyp99{by=\"instance,shard\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0))+100)-3)",
- "legendFormat": "",
- "refId": "B"
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Shard Latency",
- "transformations": [
- {
- "id": "calculateField",
- "options": {
- "mode": "reduceRow",
- "reduce": {
- "reducer": "max"
- },
- "replaceFields": true
- }
- }
- ],
- "type": "stat"
- },
- {
- "class": "small_stat_error",
- "datasource": "prometheus",
- "description": "A shard that reads more from the cache could be an indication for hot partition",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "mappings": [
- {
- "from": "-1000",
- "id": 1,
- "text": "\u2713",
- "to": "0.001",
- "type": 2,
- "value": ""
- },
- {
- "from": "0.001",
- "id": 2,
- "text": "\u26a0",
- "to": "10000",
- "type": 2,
- "value": "0.001"
- }
- ],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "orange",
- "value": 0.001
- }
- ]
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 20,
- "y": 16
- },
- "id": 26,
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "((rate(scylla_cache_reads{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]) - rate(scylla_cache_reads_with_misses{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]))- scalar(avg(rate(scylla_cache_reads{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]) - rate(scylla_cache_reads_with_misses{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]))))/scalar(stddev(rate(scylla_cache_reads{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]) - rate(scylla_cache_reads_with_misses{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]))+100)-3",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Cache",
- "transformations": [
- {
- "id": "calculateField",
- "options": {
- "mode": "reduceRow",
- "reduce": {
- "reducer": "max"
- },
- "replaceFields": true
- }
- }
- ],
- "type": "stat"
- },
- {
- "class": "small_stat_error",
- "datasource": "prometheus",
- "description": "A single shard that reads more from sstables, could be that a node is slow",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "mappings": [
- {
- "from": "-1000",
- "id": 1,
- "text": "\u2713",
- "to": "0.001",
- "type": 2,
- "value": ""
- },
- {
- "from": "0.001",
- "id": 2,
- "text": "\u26a0",
- "to": "10000",
- "type": 2,
- "value": "0.001"
- }
- ],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "orange",
- "value": 0.001
- }
- ]
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 4,
- "w": 2,
- "x": 22,
- "y": 16
- },
- "id": 27,
- "options": {
- "colorMode": "value",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "textMode": "auto"
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "max(abs(scylla_database_active_reads{ dc=~\"$dc\"} - scalar(avg(scylla_database_active_reads{ dc=~\"$dc\"})))/scalar(stddev(scylla_database_active_reads{ dc=~\"$dc\"})+0.001))-3",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A",
- "step": 1
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "SSTable",
- "transformations": [
- {
- "id": "calculateField",
- "options": {
- "mode": "reduceRow",
- "reduce": {
- "reducer": "max"
- },
- "replaceFields": true
- }
- }
- ],
- "type": "stat"
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 22
- },
- "id": 28,
- "panels": [],
- "repeat": "dc",
- "title": "",
- "type": "row"
- },
- {
- "class": "plain_text",
- "content": "Information for $dc
",
- "datasource": null,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {}
- },
- "overrides": []
- },
- "gridPos": {
- "h": 2,
- "w": 24,
- "x": 0,
- "y": 23
- },
- "id": 29,
- "isNew": true,
- "links": [],
- "mode": "html",
- "options": {},
- "span": 12,
- "style": {},
- "title": "",
- "transparent": true,
- "type": "text"
- },
- {
- "class": "vertical_lcd",
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "decimals": 0,
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "orange",
- "value": 85
- }
- ]
- },
- "unit": "percent"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 1,
- "x": 0,
- "y": 25
- },
- "id": 30,
- "options": {
- "displayMode": "lcd",
- "orientation": "vertical",
- "reduceOptions": {
- "calcs": [
- "last"
- ],
- "fields": "",
- "values": false
- },
- "showUnfilled": true
- },
- "pluginVersion": "7.1.3",
- "targets": [
- {
- "expr": "avg(scylla_reactor_utilization{cluster=~\"$cluster\", dc=~\"$dc\"} )",
- "instant": true,
- "interval": "",
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Load",
- "type": "bargauge"
- },
- {
- "class": "bytes_panel",
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "bytes"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 3,
- "x": 1,
- "y": 25
- },
- "id": 31,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "span": 5,
- "targets": [
- {
- "expr": "Avg(node_filesystem_size_bytes{mountpoint=\"$mount_point\", dc=~\"$dc\"}) by ([[by]])-avg(node_filesystem_avail_bytes{mountpoint=\"$mount_point\", dc=~\"$dc\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Avg Usage {{[[by]]}}",
- "metric": "",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "avg(node_filesystem_size_bytes{mountpoint=\"$mount_point\", dc=~\"$dc\"}) by ([[by]])",
- "interval": "",
- "legendFormat": "Size {{[[by]]}}",
- "refId": "B"
- }
- ],
- "title": "Disk Size by $by",
- "type": "timeseries"
- },
- {
- "class": "graph_panel_int",
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 4,
- "x": 4,
- "y": 25
- },
- "id": 32,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "span": 2,
- "targets": [
- {
- "expr": "$func(scylla_compaction_manager_compactions{cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "Running Compactions",
- "type": "timeseries"
- },
- {
- "class": "ops_panel",
- "datasource": "prometheus",
- "description": "The Hits and Misses",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "si:ops/s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 6,
- "x": 8,
- "y": 25
- },
- "id": 33,
- "isNew": true,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "class": "show_legend",
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "total": false,
- "values": false
- },
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_row_hits{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Hit {{[[by]]}}",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "$func(rate(scylla_cache_row_misses{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Misses {{[[by]]}}",
- "refId": "B",
- "step": 10
- }
- ],
- "title": "Cache Rows Hits/Misses",
- "type": "timeseries"
- },
- {
- "class": "ops_panel",
- "datasource": "prometheus",
- "description": "Write attempts - include all writes that reached the coordinator node, even if they will eventually fail",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "si:ops/s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 6,
- "x": 14,
- "y": 25
- },
- "id": 34,
- "isNew": true,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "class": "show_legend",
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "total": false,
- "values": false
- },
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {
- "alias": "1 Day Ago",
- "dashLength": 4,
- "dashes": true
- },
- {
- "alias": "1 Week Ago",
- "dashLength": 2,
- "dashes": true
- }
- ],
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Writes",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m] offset 1d))",
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "1 Day Ago",
- "refId": "B",
- "step": 1
- },
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m] offset 1w))",
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "1 Week Ago",
- "refId": "C",
- "step": 1
- }
- ],
- "title": "Writes",
- "type": "timeseries"
- },
- {
- "class": "us_panel",
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "\u00b5s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 4,
- "x": 20,
- "y": 25
- },
- "id": 35,
- "isNew": true,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "class": "show_legend",
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "total": false,
- "values": false
- },
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 2,
- "targets": [
- {
- "expr": "avg(wlatencyp95{by=\"[[by]]\", cluster=~\"$cluster|$^\", dc=~\"$dc\",scheduling_group_name!=\"streaming\"}>0) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "95% {{[[by]]}}",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "avg(wlatencyp99{by=\"[[by]]\", cluster=~\"$cluster|$^\", dc=~\"$dc\",scheduling_group_name!=\"streaming\"}>0) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "99% {{[[by]]}}",
- "refId": "B",
- "step": 1
- }
- ],
- "title": "Write Latencies",
- "type": "timeseries"
- },
- {
- "class": "ops_panel",
- "datasource": "prometheus",
- "description": "Requests that Scylla tried to write but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "si:ops/s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 4,
- "x": 0,
- "y": 31
- },
- "id": 36,
- "isNew": true,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "class": "show_legend",
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "total": false,
- "values": false
- },
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 2,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_write_timeouts{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Writes {{[[by]]}}",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Write Timeouts by [[by]]",
- "type": "timeseries"
- },
- {
- "class": "ops_panel",
- "datasource": "prometheus",
- "description": "Requests that Scylla tried to read but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "si:ops/s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 4,
- "x": 4,
- "y": 31
- },
- "id": 37,
- "isNew": true,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "class": "show_legend",
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "total": false,
- "values": false
- },
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 2,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_read_timeouts{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])+rate(scylla_storage_proxy_coordinator_cas_read_timeouts{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])+rate(scylla_storage_proxy_coordinator_range_timeouts{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Read {{[[by]]}}",
- "refId": "A",
- "step": 10
- }
- ],
- "title": "Read Timeouts by [[by]]",
- "type": "timeseries"
- },
- {
- "class": "ops_panel",
- "datasource": "prometheus",
- "description": "The Hits and Misses",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "si:ops/s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 6,
- "x": 8,
- "y": 31
- },
- "id": 38,
- "isNew": true,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "class": "show_legend",
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "total": false,
- "values": false
- },
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_cache_reads{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])-$func(rate(scylla_cache_reads_with_misses{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Hit {{[[by]]}}",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "$func(rate(scylla_cache_reads_with_misses{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Misses {{[[by]]}}",
- "refId": "B",
- "step": 10
- }
- ],
- "title": "Cache Reads Hits/Misses",
- "type": "timeseries"
- },
- {
- "class": "ops_panel",
- "datasource": "prometheus",
- "description": "Read attempts - include all reads that reached the coordinator node, even if they will eventually fail",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "si:ops/s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 6,
- "x": 14,
- "y": 31
- },
- "id": 39,
- "isNew": true,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "class": "show_legend",
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "total": false,
- "values": false
- },
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {
- "alias": "1 Day Ago",
- "dashLength": 4,
- "dashes": true
- },
- {
- "alias": "1 Week Ago",
- "dashLength": 2,
- "dashes": true
- }
- ],
- "span": 3,
- "targets": [
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\",scheduling_group_name!=\"streaming\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "Reads",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\",scheduling_group_name!=\"streaming\"}[1m] offset 1d))",
- "intervalFactor": 1,
- "legendFormat": "1 Day Ago",
- "refId": "B",
- "step": 1
- },
- {
- "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\",scheduling_group_name!=\"streaming\"}[1m] offset 1w))",
- "intervalFactor": 1,
- "legendFormat": "1 Week Ago",
- "refId": "C",
- "step": 1
- }
- ],
- "title": "Reads",
- "type": "timeseries"
- },
- {
- "class": "us_panel",
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "\u00b5s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 4,
- "x": 20,
- "y": 31
- },
- "id": 40,
- "isNew": true,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "class": "show_legend",
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "total": false,
- "values": false
- },
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 2,
- "targets": [
- {
- "expr": "avg(rlatencyp95{by=\"[[by]]\", cluster=~\"$cluster|$^\", dc=~\"$dc\",scheduling_group_name!=\"streaming\"}>0) by([[by]])",
- "intervalFactor": 1,
- "legendFormat": "95% {{[[by]]}}",
- "refId": "A",
- "step": 1
- },
- {
- "expr": "avg(rlatencyp99{by=\"[[by]]\", cluster=~\"$cluster|$^\", dc=~\"$dc\",scheduling_group_name!=\"streaming\"}>0) by([[by]])",
- "intervalFactor": 1,
- "legendFormat": "99% {{[[by]]}}",
- "refId": "B",
- "step": 1
- }
- ],
- "title": "Read Latencies",
- "type": "timeseries"
- },
- {
- "class": "ops_panel",
- "datasource": "prometheus",
- "description": "Number of CQL INSERT commands generated by the user",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "si:ops/s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 6,
- "x": 0,
- "y": 37
- },
- "id": 41,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_cql_inserts{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]]) - sum(rate(scylla_cql_inserts_per_ks{cluster=~\"$cluster|$^\", dc=~\"$dc\", who=\"internal\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "CQL Insert",
- "type": "timeseries"
- },
- {
- "class": "ops_panel",
- "datasource": "prometheus",
- "description": "Number of CQL SELECT commands generated by the user",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "si:ops/s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 6,
- "x": 6,
- "y": 37
- },
- "id": 42,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_cql_reads{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]]) - sum(rate(scylla_cql_reads_per_ks{cluster=~\"$cluster|$^\", dc=~\"$dc\", who=\"internal\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "CQL Reads",
- "type": "timeseries"
- },
- {
- "class": "ops_panel",
- "datasource": "prometheus",
- "description": "Number of CQL DELETE commands generated by the user",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "si:ops/s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 6,
- "x": 12,
- "y": 37
- },
- "id": 43,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_cql_deletes{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])-sum(rate(scylla_cql_deletes_per_ks{cluster=~\"$cluster|$^\", dc=~\"$dc\", who=\"internal\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "CQL Deletes",
- "type": "timeseries"
- },
- {
- "class": "ops_panel",
- "datasource": "prometheus",
- "description": "Number of CQL UPDATE commands generated by the user",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "si:ops/s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 6,
- "x": 18,
- "y": 37
- },
- "id": 44,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_cql_updates{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])-sum(rate(scylla_cql_updates_per_ks{cluster=~\"$cluster|$^\", dc=~\"$dc\", who=\"internal\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 1
- }
- ],
- "title": "CQL Updates",
- "type": "timeseries"
- },
- {
- "class": "graph_panel",
- "datasource": "prometheus",
- "description": "amount of CQL connections currently established",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 6,
- "x": 0,
- "y": 43
- },
- "id": 45,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "pointradius": 1,
- "span": 3,
- "targets": [
- {
- "expr": "sum(scylla_transport_current_connections{cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "title": "Client CQL connections by [[by]]",
- "type": "timeseries"
- },
- {
- "class": "graph_panel",
- "datasource": "prometheus",
- "description": "Number of CQL batches command, each batched command is counted once",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 6,
- "x": 6,
- "y": 43
- },
- "id": 46,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "pointradius": 1,
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_cql_batches{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "title": "CQL Batches by [[by]]",
- "type": "timeseries"
- },
- {
- "class": "graph_panel",
- "datasource": "prometheus",
- "description": "Number of CQL command batched. Each batch would add the number of commands inside the batch",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 6,
- "x": 12,
- "y": 43
- },
- "id": 47,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "pointradius": 1,
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_cql_statements_in_batches{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "title": "CQL Command In Batches by [[by]]",
- "type": "timeseries"
- },
- {
- "class": "ops_panel",
- "datasource": "prometheus",
- "description": "Counts the number of SELECT statements with BYPASS CACHE option",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "si:ops/s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 6,
- "x": 18,
- "y": 43
- },
- "id": 48,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_cql_select_bypass_caches{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "title": "BYPASS CACHE",
- "type": "timeseries"
- },
- {
- "class": "graph_panel",
- "dashversion": [
- ">4.4",
- ">2021.1"
- ],
- "datasource": "prometheus",
- "description": "CQL errors by type, only active errors are shown",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 6,
- "x": 0,
- "y": 49
- },
- "id": 49,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "pointradius": 1,
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_transport_cql_errors_total{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]],type) >0",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "title": "CQL Errors [[by]]",
- "type": "timeseries"
- },
- {
- "class": "graph_panel",
- "datasource": "prometheus",
- "description": "Number of CQL row reads",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 6,
- "x": 6,
- "y": 49
- },
- "id": 50,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "pointradius": 1,
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_cql_rows_read{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "title": "CQL Row Reads [[by]]",
- "type": "timeseries"
- },
- {
- "class": "graph_panel",
- "datasource": "prometheus",
- "description": "Number of reads using secondary indexes",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 6,
- "x": 12,
- "y": 49
- },
- "id": 51,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "pointradius": 1,
- "span": 3,
- "targets": [
- {
- "expr": "sum(rate(scylla_cql_secondary_index_reads{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
- "intervalFactor": 1,
- "legendFormat": "",
- "metric": "",
- "refId": "A",
- "step": 30
- }
- ],
- "title": "Secondary indexes Reads [[by]]",
- "type": "timeseries"
- },
- {
- "class": "collapsible_row_panel",
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 55
- },
- "id": 52,
- "panels": [],
- "repeat": "",
- "title": "Your panels",
- "type": "row"
- },
- {
- "class": "plain_text",
- "datasource": null,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {}
- },
- "overrides": []
- },
- "gridPos": {
- "h": 2,
- "w": 24,
- "x": 0,
- "y": 56
- },
- "id": 53,
- "isNew": true,
- "links": [],
- "mode": "html",
- "options": {
- "content": "Your Panels
",
- "mode": "html"
- },
- "span": 12,
- "style": {},
- "title": "",
- "transparent": true,
- "type": "text"
- },
- {
- "class": "user_panel",
- "datasource": "prometheus",
- "description": "This graph panel was left empty on purpose for ad-hoc usage. Change it when needed. Pay attention that changes to the panel will not be saved.\n\nIf you do need a panel that can be saved, create a new dashboard, or edit the panel inside the json file",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "si:ops/s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 12,
- "x": 0,
- "y": 58
- },
- "id": 54,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 6,
- "title": "Your Graph here",
- "type": "timeseries"
- },
- {
- "class": "user_panel",
- "datasource": "prometheus",
- "description": "This graph panel was left empty on purpose for ad-hoc usage. Change it when needed. Pay attention that changes to the panel will not be saved.\n\nIf you do need a panel that can be saved, create a new dashboard, or edit the panel inside the json file",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "class": "fieldConfig_defaults",
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "axisSoftMin": 0,
- "barAlignment": 0,
- "class": "fieldConfig_defaults_custom",
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "unit": "si:ops/s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 6,
- "w": 12,
- "x": 12,
- "y": 58
- },
- "id": 55,
- "isNew": true,
- "links": [],
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "hidden",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "multi",
- "sort": "asc"
- }
- },
- "seriesOverrides": [
- {}
- ],
- "span": 6,
- "title": "Your Graph here",
- "type": "timeseries"
- },
- {
- "class": "plain_text",
- "datasource": null,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {}
- },
- "overrides": []
- },
- "gridPos": {
- "h": 3,
- "w": 24,
- "x": 0,
- "y": 64
- },
- "id": 56,
- "isNew": true,
- "links": [],
- "mode": "html",
- "options": {
- "content": "Scylla Monitoring version - master
",
- "mode": "html"
- },
- "span": 12,
- "style": {},
- "title": "",
- "transparent": true,
- "type": "text"
- }
- ],
- "refresh": "30s",
- "schemaVersion": 26,
- "style": "dark",
- "tags": [],
- "templating": {
- "list": [
- {
- "allValue": null,
- "class": "by_template_var",
- "current": {
- "tags": [],
- "text": "DC",
- "value": "dc"
- },
- "error": null,
- "hide": 0,
- "includeAll": false,
- "label": "by",
- "multi": false,
- "name": "by",
- "options": [
- {
- "selected": false,
- "text": "Cluster",
- "value": "cluster"
- },
- {
- "selected": true,
- "text": "DC",
- "value": "dc"
- }
- ],
- "query": "Cluster,DC,Instance,Shard",
- "skipUrlSync": false,
- "type": "custom"
- },
- {
- "allValue": null,
- "class": "template_variable_single",
- "current": {
- "isNone": true,
- "selected": false,
- "text": "None",
- "value": ""
- },
- "datasource": "prometheus",
- "definition": "",
- "error": null,
- "hide": 0,
- "includeAll": false,
- "label": "cluster",
- "multi": false,
- "name": "cluster",
- "options": [],
- "query": "label_values(scylla_reactor_utilization, cluster)",
- "refresh": 2,
- "regex": "",
- "skipUrlSync": false,
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "class": "template_variable_all",
- "current": {
- "selected": true,
- "text": [
- "All"
- ],
- "value": [
- "$__all"
- ]
- },
- "datasource": "prometheus",
- "definition": "",
- "error": null,
- "hide": 0,
- "includeAll": true,
- "label": "dc",
- "multi": true,
- "name": "dc",
- "options": [],
- "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)",
- "refresh": 2,
- "regex": "",
- "skipUrlSync": false,
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "class": "template_variable_single",
- "current": {
- "text": "/var/lib/scylla",
- "value": "/var/lib/scylla"
- },
- "datasource": "prometheus",
- "definition": "",
- "error": null,
- "hide": 0,
- "includeAll": false,
- "label": "Mount path",
- "multi": false,
- "name": "mount_point",
- "options": [
- {
- "selected": true,
- "text": "/var/lib/scylla",
- "value": "/var/lib/scylla"
- }
- ],
- "query": "/var/lib/scylla",
- "skipUrlSync": false,
- "sort": 0,
- "tagValuesQuery": "",
- "tags": [],
- "tagsQuery": "",
- "type": "custom",
- "useTags": false
- },
- {
- "allValue": null,
- "class": "aggregation_function",
- "current": {
- "tags": [],
- "text": "sum",
- "value": "sum"
- },
- "hide": 0,
- "includeAll": false,
- "label": "Function",
- "multi": false,
- "name": "func",
- "options": [
- {
- "selected": true,
- "text": "sum",
- "value": "sum"
- },
- {
- "selected": false,
- "text": "avg",
- "value": "avg"
- },
- {
- "selected": false,
- "text": "max",
- "value": "max"
- },
- {
- "selected": false,
- "text": "min",
- "value": "min"
- },
- {
- "selected": false,
- "text": "stddev",
- "value": "stddev"
- },
- {
- "selected": false,
- "text": "stdvar",
- "value": "stdvar"
- }
- ],
- "query": "sum,avg,max,min,stddev,stdvar",
- "skipUrlSync": false,
- "type": "custom"
- },
- {
- "allValue": null,
- "class": "template_variable_all",
- "current": {
- "selected": true,
- "text": [
- "All"
- ],
- "value": [
- "$__all"
- ]
- },
- "datasource": "prometheus",
- "definition": "",
- "error": null,
- "hide": 2,
- "includeAll": true,
- "multi": true,
- "name": "all_scyllas_versions",
- "options": [],
- "query": "label_values(scylla_scylladb_current_version{cluster=~\"$cluster|$^\"}, version)",
- "refresh": 2,
- "regex": "",
- "skipUrlSync": false,
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "class": "template_variable_all",
- "current": {
- "selected": true,
- "text": [
- "All"
- ],
- "value": [
- "$__all"
- ]
- },
- "datasource": "prometheus",
- "definition": "query_result(count(up{job=~\"$cluster|$^\"}) by (dc))",
- "error": null,
- "hide": 2,
- "includeAll": true,
- "multi": true,
- "name": "count_dc",
- "options": [],
- "query": {
- "query": "query_result(count(up{job=~\"$cluster|$^\"}) by (dc))",
- "refId": "StandardVariableQuery"
- },
- "refresh": 2,
- "regex": "/(?\\{dc=\"[^\"]+\".* \\d+) .*/",
- "skipUrlSync": false,
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "class": "monitor_version_var",
- "current": {
- "text": "master",
- "value": "master"
- },
- "error": null,
- "hide": 2,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "monitoring_version",
- "options": [
- {
- "selected": true,
- "text": "master",
- "value": "master"
- }
- ],
- "query": "master",
- "skipUrlSync": false,
- "type": "custom"
- }
- ]
- },
- "time": {
- "from": "now-30m",
- "to": "now"
- },
- "timepicker": {
- "now": true,
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "utc",
- "title": "CQL Overview",
- "uid": "cql-overview",
- "version": 1
- }`}}
diff --git a/assets/monitoring/grafana/v1alpha1/dashboards.cm.yaml b/assets/monitoring/grafana/v1alpha1/dashboards.cm.yaml
new file mode 100644
index 00000000000..bce7181cde7
--- /dev/null
+++ b/assets/monitoring/grafana/v1alpha1/dashboards.cm.yaml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: '{{ .scyllaDBMonitoringName }}-grafana-scylladb-dashboards'
+data:
+{{- range $key, $value := .dashboards }}
+ "{{ $key }}": "{{ $value }}"
+{{ else }}
+--- Can't read dashboards ---
+{{ end }}
diff --git a/assets/monitoring/grafana/v1alpha1/dashboards/platform/alternator.template.json b/assets/monitoring/grafana/v1alpha1/dashboards/platform/alternator.template.json
new file mode 100644
index 00000000000..5c9200cf079
--- /dev/null
+++ b/assets/monitoring/grafana/v1alpha1/dashboards/platform/alternator.template.json
@@ -0,0 +1,1055 @@
+{
+ "dashboard": {
+ "class": "dashboard",
+ "originalTitle": "Scylla Cluster Metrics",
+ "overwrite": true,
+ "rows": [
+ {
+ "class": "alternator_logo_row"
+ },
+ {
+ "class": "row",
+ "height": "200px",
+ "panels": [
+ {
+ "class": "single_stat_panel",
+ "targets": [
+ {
+ "expr": "count(scylla_scylladb_current_version{job=\"scylla\", cluster=~\"$cluster|$^\"})",
+ "intervalFactor": 1,
+ "legendFormat": "Total Nodes",
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "title": "Total Nodes"
+ },
+ {
+ "class": "single_stat_panel_fail",
+ "targets": [
+ {
+ "expr": "count(scrape_samples_scraped{job=\"scylla\", cluster=~\"$cluster|$^\"}==0) OR vector(0)",
+ "intervalFactor": 1,
+ "legendFormat": "Unreachable",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "1,2",
+ "title": "Unreachable"
+ },
+ {
+ "class": "single_stat_panel_fail",
+ "description": "Number of nodes that reported their status as Starting or Joining",
+ "targets": [
+ {
+ "expr": "count(scylla_node_operation_mode<=2)OR vector(0)",
+ "intervalFactor": 1,
+ "legendFormat": "Joining",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "1,2",
+ "title": "Joining"
+ },
+ {
+ "class": "single_stat_panel_fail",
+ "description": "Number of nodes that reported their status as Leaving, Decommissioned, Draining or Drained",
+ "targets": [
+ {
+ "expr": "count(scylla_node_operation_mode>3)OR vector(0)",
+ "intervalFactor": 1,
+ "legendFormat": "Leaving",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "1,2",
+ "title": "Leaving"
+ },
+ {
+ "class": "percent_panel",
+ "description": "The percentage of the time during which Scylla utilized the CPU. Note that because Scylla does busy polling for some time before going idle, CPU utilization as seen by the operating system may be much higher. Your system is not yet CPU-bottlenecked until this metric is high.",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "avg(scylla_reactor_utilization{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"} ) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "title": "Load"
+ },
+ {
+ "class": "small_nodes_table",
+ "span": 4,
+ "transformations":[
+ {
+ "id":"filterFieldsByName",
+ "options":{
+ "include":{
+ "names":[
+ "instance",
+ "svr",
+ "Value #A",
+ "Value #B",
+ "Value #C",
+ "Value #D"
+ ]
+ }
+ }
+ },
+ {
+ "id":"seriesToColumns",
+ "options":{
+ "byField":"instance"
+ }
+ },
+ {
+ "id":"organize",
+ "options":{
+ "excludeByName":{
+ },
+ "indexByName":{
+ "instance":0,
+ "Value #D":1,
+ "Value #C":2,
+ "svr":3,
+ "Value #A":4,
+ "Value #B":5
+ },
+ "renameByName":{
+ }
+ }
+ }
+ ]
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "bytes_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "sum(node_filesystem_size_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])-sum(node_filesystem_avail_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "sum(node_filesystem_size{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])-sum(node_filesystem_avail{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "title": "Disk Size by $by"
+ },
+ {
+ "class": "ops_panel",
+ "description": "Number of Alternator Actions",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_total_operations{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "title": "Total Actions"
+ },
+ {
+ "class": "alert_table",
+ "span": 4,
+ "styles": [
+ {
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "link": true,
+ "linkTooltip": "Jump to the see the node",
+ "linkUrl": "/d/detailed-[[dash_version]]/detailed?refresh=30s&orgId=1&var-by=instance&var-node=${__cell_4}&from=${__cell_0}",
+ "pattern": "Time",
+ "type": "date"
+ },
+ {
+ "class": "hidden_column",
+ "pattern": "severity"
+ },
+ {
+ "class": "hidden_column",
+ "pattern": "alertname"
+ },
+ {
+ "class": "hidden_column",
+ "pattern": "cluster"
+ },
+ {
+ "class": "hidden_column",
+ "pattern": "monitor"
+ },
+ {
+ "class": "hidden_column",
+ "pattern": "summary"
+ },
+ {
+ "alias": "Instance",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": true,
+ "linkTooltip": "Jump to the see the node",
+ "linkUrl": "/d/detailed-[[dash_version]]/detailed?refresh=30s&orgId=1&var-by=instance&var-node=${__cell}",
+ "mappingType": 1,
+ "pattern": "instance",
+ "thresholds": [],
+ "type": "string",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ }
+ ],
+ "title": "Active Alerts"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": "Data Plane Actions"
+ }
+ ]
+ },
+ {
+ "class": "header_row",
+ "panels": [
+ {
+ "class": "plain_text",
+ "content": "Data Plane Actions
"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"GetItem\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "GetItem by [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"PutItem\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "PutItem by [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"UpdateItem\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "UpdateItem by [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"DeleteItem\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "DeleteItem by [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"BatchWriteItem\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "BatchWriteItem by [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "dashversion":[">4.4", ">2021.1"],
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"BatchGetItem\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "BatchGetItem by [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"Query\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Query by [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"Scan\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Scan by [[by]]"
+ }
+ ]
+ },
+ {
+ "class": "header_row",
+ "panels": [
+ {
+ "class": "plain_text",
+ "content": "Data Plane Latencies
"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "id": "auto",
+ "gridPos": {
+ "h": 1,
+ "w": 24
+ },
+ "panels": [],
+ "repeat": "alternator_latency_ops",
+ "title": "$alternator_latency_ops",
+ "type": "row"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_op_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_latency_ops\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Completed $alternator_latency_ops"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_op_latency_sum{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_latency_ops\"}[60s])) by ([[by]])/($func(rate(scylla_alternator_op_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_latency_ops\"}[60s])) by ([[by]]) + 1)",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Average $alternator_latency_ops latency by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(scylla_alternator_op_latency_bucket{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_latency_ops\"}[60s])) by ([[by]], le))",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "95th percentile $alternator_latency_ops latency by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(scylla_alternator_op_latency_bucket{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_latency_ops\"}[60s])) by ([[by]], le))",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "99th percentile $alternator_latency_ops latency by [[by]]"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": "Streams"
+ }
+ ]
+ },
+ {
+ "class": "header_row",
+ "panels": [
+ {
+ "class": "plain_text",
+ "content": "Streams Actions
"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"ListStreams\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "ListStreams by [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"DescribeStream\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "DescribeStream by [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"GetShardIterator\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "GetShardIterator by [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"GetRecords\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "GetRecords by [[by]]"
+ }
+ ]
+ },
+ {
+ "class": "header_row",
+ "panels": [
+ {
+ "class": "plain_text",
+ "content": "Streams Latencies
"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "id": "auto",
+ "gridPos": {
+ "h": 1,
+ "w": 24
+ },
+ "panels": [],
+ "repeat": "alternator_streams_latency_ops",
+ "title": "$alternator_streams_latency_ops",
+ "type": "row"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_op_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_streams_latency_ops\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Completed $alternator_streams_latency_ops"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_op_latency_sum{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_streams_latency_ops\"}[60s])) by ([[by]])/($func(rate(scylla_alternator_op_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_streams_latency_ops\"}[60s])) by ([[by]]) + 1)",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Average $alternator_streams_latency_ops latency by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(scylla_alternator_op_latency_bucket{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_streams_latency_ops\"}[60s])) by ([[by]], le))",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "95th percentile $alternator_streams_latency_ops latency by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(scylla_alternator_op_latency_bucket{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"$alternator_streams_latency_ops\"}[60s])) by ([[by]], le))",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "99th percentile $alternator_streams_latency_ops latency by [[by]]"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": "Control plane"
+ }
+ ]
+ },
+ {
+ "class": "header_row",
+ "panels": [
+ {
+ "class": "plain_text",
+ "content": "Control Plane Actions
"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"CreateTable\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "CreateTable by [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"DeleteTable\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "DeleteTable by [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"DescribeTable\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "DescribeTable by [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"ListTables\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "ListTables by [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_alternator_operation{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", op=\"DescribeEndpoints\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "DescribeEndpoints by [[by]]"
+ },
+ {
+ "class": "text_panel",
+ "dashversion":["<5.1", "<2022.2"],
+ "content": "## ",
+ "mode": "markdown",
+ "span": 4,
+ "style": {}
+ },
+ {
+ "class": "ops_panel",
+ "description": "The number of items deleted by their TTL",
+ "dashversion":[">5.1", ">2022.2"],
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_expiration_items_deleted{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Expired Item Deleted by [[by]]"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "gridPos": {
+ "h": 2
+ },
+ "height": "25px",
+ "panels": [
+ {
+ "class": "plain_text",
+ "content": "Cache
",
+ "span": 6
+ },
+ {
+ "content": "Timeouts
",
+ "class": "plain_text",
+ "span": 6
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "ops_panel",
+ "description": "Number of rows that were read from the cache, without needing to be fetched from storage.",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_row_hits{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Cache Hits"
+ },
+ {
+ "class": "ops_panel",
+ "description": "Number of rows that were not present in the cache, and had to be fetched from storage.",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_row_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Cache Misses"
+ },
+ {
+ "class": "wpm_panel",
+ "description": "Requests that Scylla tried to write but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(delta(scylla_storage_proxy_coordinator_write_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Write Timeouts/Minutes by [[by]]"
+ },
+ {
+ "class": "rpm_panel",
+ "description": "Requests that Scylla tried to read but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(delta(scylla_storage_proxy_coordinator_read_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Read Timeouts/Minutes by [[by]]"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "user_panel_row_header"
+ },
+ {
+ "class": "user_panels_row"
+ },
+ {
+ "class": "monitoring_version_row"
+ }
+ ],
+ "tags": [
+ "__SCYLLA_VERSION_DOT__"
+ ],
+ "templating": {
+ "list": [
+ {
+ "class": "by_template_var"
+ },
+ {
+ "class": "template_variable_single",
+ "label": "cluster",
+ "name": "cluster",
+ "query": "label_values(scylla_reactor_utilization, cluster)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "dc",
+ "name": "dc",
+ "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "node",
+ "name": "node",
+ "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "shard",
+ "name": "shard",
+ "query": "label_values(scylla_reactor_utilization,shard)",
+ "sort": 3
+ },
+ {
+ "class": "template_variable_single",
+ "current": {
+ "text": "/var/lib/scylla",
+ "value": "/var/lib/scylla"
+ },
+ "label": "Mount path",
+ "name": "mount_point",
+ "query": "node_filesystem_avail_bytes",
+ "regex": "/mountpoint=\"([^\"]*)\".*/",
+ "sort": 0
+ },
+ {
+ "class": "aggregation_function"
+ },
+ {
+ "class": "template_variable_custom",
+ "current": {
+ "text": "__SCYLLA_VERSION_DASHED__",
+ "value": "__SCYLLA_VERSION_DASHED__"
+ },
+ "name": "dash_version",
+ "options": [
+ {
+ "selected": true,
+ "text": "__SCYLLA_VERSION_DASHED__",
+ "value": "__SCYLLA_VERSION_DASHED__"
+ }
+ ],
+ "query": "__SCYLLA_VERSION_DASHED__"
+ },
+ {
+ "class": "template_variable_custom",
+ "current": {
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ },
+ "name": "scylla_version",
+ "options": [
+ {
+ "selected": true,
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ }
+ ],
+ "query": "__SCYLLA_VERSION_DOT__"
+ },
+ {
+ "class": "monitor_version_var"
+ },
+ {
+ "class": "template_variable_custom",
+ "name": "alternator_latency_ops",
+ "multi": true,
+ "includeAll": true,
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "options": [
+ {
+ "selected": true,
+ "text": "All",
+ "value": "$__all"
+ },
+ {
+ "selected": false,
+ "text": "GetItem",
+ "value": "GetItem"
+ },
+ {
+ "selected": false,
+ "text": "PutItem",
+ "value": "PutItem"
+ },
+ {
+ "selected": false,
+ "text": "UpdateItem",
+ "value": "UpdateItem"
+ },
+ {
+ "selected": false,
+ "text": "DeleteItem",
+ "value": "DeleteItem"
+ }
+ ],
+ "query": "GetItem,PutItem,UpdateItem,DeleteItem"
+ },
+ {
+ "class": "template_variable_custom",
+ "name": "alternator_streams_latency_ops",
+ "multi": true,
+ "includeAll": true,
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "options": [
+ {
+ "selected": true,
+ "text": "All",
+ "value": "$__all"
+ },
+ {
+ "selected": false,
+ "text": "GetRecords",
+ "value": "GetRecords"
+ }
+ ],
+ "query": "GetRecords"
+ },
+ {
+ "allValue": null,
+ "datasource": "prometheus",
+ "definition": "scylla_alternator_total_operations{cluster=~\"$cluster|$^\"}",
+ "description": null,
+ "error": null,
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "no_alternator",
+ "options": [],
+ "query": {
+ "query": "scylla_alternator_total_operations{cluster=~\"$cluster|$^\"}",
+ "refId": "StandardVariableQuery"
+ },
+ "refresh": 1,
+ "regex": "/^(scylla_alternator_total_operations)/",
+ "skipUrlSync": false,
+ "sort": 0,
+ "type": "query"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-30m",
+ "to": "now"
+ },
+ "annotations" :{
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ },
+ {
+ "class" : "annotation_restart"
+ },
+ {
+ "class" : "annotation_stall"
+ },
+ {
+ "class" : "annotation_schema_changed"
+ }
+ ]
+ },
+ "title": "Alternator",
+ "uid": "alternator-__SCYLLA_VERSION_DASHED__",
+ "version": 1
+ }
+}
diff --git a/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-advanced.template.json b/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-advanced.template.json
new file mode 100644
index 00000000000..34e651e7812
--- /dev/null
+++ b/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-advanced.template.json
@@ -0,0 +1,860 @@
+{
+ "dashboard": {
+ "class": "dashboard",
+ "uid": "advanced-__SCYLLA_VERSION_DASHED__",
+ "rows": [
+ {
+ "class": "small_stat_rows"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": ""
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "plain_text",
+ "content": "IO Queue Information
"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "id": "auto",
+ "gridPos": {
+ "h": 1,
+ "w": 24
+ },
+ "panels": [],
+ "repeat": "classes",
+ "title": "$classes",
+ "type": "row"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "seconds_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "max(rate(scylla_io_queue_total_delay_sec{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])/rate(scylla_io_queue_total_operations{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or on() max(scylla_io_queue_delay{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"} ) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "seastar_io_queue_delay",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "description": "seastar_io_queue_delay",
+ "title": "$classes I/O Queue delay by [[by]]"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "max(scylla_io_queue_queue_length{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "seastar_io_queue_delay",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "description": "scylla_io_queue_queue_length",
+ "title": "$classes Queue length by [[by]]"
+ },
+ {
+ "class": "bps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_io_queue_total_bytes{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "seastar_io_queue_delay",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "description": "seastar_io_queue_delay",
+ "title": "$classes I/O Queue bandwidth by [[by]]"
+ },
+ {
+ "class": "iops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_io_queue_total_operations{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "seastar_io_queue_delay",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "description": "scylla_io_queue_total_operations",
+ "title": "$classes I/O Queue IOPS by [[by]]"
+ },
+ {
+ "class": "seconds_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "max(rate(scylla_io_queue_total_exec_sec{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])/rate(scylla_io_queue_total_operations{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or on() max(scylla_io_queue_delay{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"} ) by ([[by]])",
+ "intervalFactor": 1,
+ "metric": "scylla_io_queue_total_exec_sec",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "description": "scylla_io_queue_total_exec_sec",
+ "title": "Disk $classes I/O Queue delay by [[by]]"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "max(scylla_io_queue_disk_queue_length{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "metric": "scylla_io_queue_disk_queue_length",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "description": "scylla_io_queue_disk_queue_length",
+ "title": "DISK $classes Queue length by [[by]]"
+ },
+ {
+ "class": "seconds_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_io_queue_starvation_time_sec{class=\"$classes\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "metric": "scylla_io_queue_starvation_time_sec",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "description": "The time the class waited for being dispatched with non-empty software queue.\n\nLarge IO delays coupled with small starvation time denotes that scheduler is doing its job properly, and it's upper layer that overflows disk capacity.\n\nLarge IO delays coupled with large starvation time denotes that there might be some problem on the scheduler level that it cannot deliver IO requests from that class into disk in timely manner or the disk is slow and cannot afford timely dispatching.",
+ "title": "DISK $classes starvation time by [[by]]"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": ""
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "plain_text",
+ "content": "Information by Task Group
"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "id": "auto",
+ "gridPos": {
+ "h": 1,
+ "w": 24
+ },
+ "panels": [],
+ "repeat": "group",
+ "title": "$group",
+ "type": "row"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "percentunit_panel",
+ "span":3,
+ "pointradius": 1,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_scheduler_runtime_ms{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\",group=~\"$group\", shard=~\"[[shard]]\"}[1m])/1000) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Time used by [[by]] - $group",
+ "description": "Scylla employs an event-loop like reactor that alternates between the execution of different groups of tasks periodically. This graph shows how much time was spent in $group group"
+ },
+ {
+ "class": "percentunit_panel",
+ "span":3,
+ "pointradius": 1,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_scheduler_time_spent_on_task_quota_violations_ms{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\",group=~\"$group\", shard=~\"[[shard]]\"}[1m])/1000) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Time spent in task quota violations by [[by]] - $group",
+ "description": "Scylla employs an event-loop like reactor that alternates between the execution of different groups of tasks periodically. The maximum amount of time during which a task group can run is called the \"task quota\". Some task groups may disrespect that and run for longer. This may cause latency issues"
+ },
+ {
+ "class": "percentunit_panel",
+ "span":3,
+ "pointradius": 1,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_scheduler_starvetime_ms{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\",group=~\"$group\", shard=~\"[[shard]]\"}[1m])/1000) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Starvation time by [[by]] - $group",
+ "description": "Scylla employs an event-loop like reactor that alternates between the execution of different groups of tasks periodically. The maximum amount of time during which a task group can run is called the \"task quota\". Some task groups may disrespect that and run for longer.\n\n This graph shows the amount of time the group was waiting to get CPU time."
+ },
+ {
+ "class": "graph_panel",
+ "span":3,
+ "pointradius": 1,
+ "targets": [
+ {
+ "expr": "$func(scylla_scheduler_shares{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\",group=~\"$group\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Scheduler shares [[by]] - $group",
+ "description": "Shares assigned to the $group. Shares determine how Scylla reactor distributes the task quotas between groups (Higher share gets more quotas)"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": "Internal node errors"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "plain_text",
+ "content": "Internal node Errors - $cluster
"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "rps_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "avg(rate(scylla_storage_proxy_coordinator_read_errors_local_node{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Local Reads Error by [[by]]",
+ "description": "Number of Read requests that failed due to an 'unavailable' error"
+ },
+ {
+ "class": "wps_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "avg(rate(scylla_storage_proxy_coordinator_write_errors_local_node{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Local Write Error by [[by]]",
+ "description": "Number of write requests that failed due to an 'unavailable' error"
+ },
+ {
+ "class": "text_panel",
+ "content": "## ",
+ "mode": "markdown",
+ "span": 4,
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "rps_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "avg(rate(scylla_storage_proxy_coordinator_read_unavailable{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Reads Unavailable Error by [[by]]",
+ "description": "Number of Read requests that failed due to an 'unavailable' error"
+ },
+ {
+ "class": "wps_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "avg(rate(scylla_storage_proxy_coordinator_write_unavailable{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Write Unavailable Error by [[by]]",
+ "description": "Number of write requests that failed on a local Node"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "avg(rate(scylla_storage_proxy_coordinator_range_unavailable{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Range Unavailable Error by [[by]]",
+ "description": "Number of write requests that failed on a local Node"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "avg(rate(scylla_reactor_aio_errors{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "AIO Error by [[by]]",
+ "description": "Number of AIO Errors"
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_reactor_abandoned_failed_futures{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Ignored Future By [[by]]",
+ "description": "Total number of abandoned failed futures, futures destroyed while still containing an exception."
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_reactor_cpp_exceptions{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "C++ Exceptions [[by]]",
+ "description": "Number of C++ exceptions thrown.\n\n An exception by itself does not indicate a problem"
+ }
+
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": "Commit Log"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "plain_text",
+ "content": "Commit log Information
"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "bytes_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "avg(scylla_commitlog_disk_total_bytes{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Avg reserved disk space by [[by]]",
+ "description": "Holds the size of disk space in bytes reserved for data so far. A too high value indicates that we have some bottleneck in the writing to sstables path"
+ },
+ {
+ "class": "bytes_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "avg(scylla_commitlog_disk_active_bytes{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Avg used disk space by [[by]]",
+ "description": "Holds the size of disk space in bytes used for data so far. A too high value indicates that we have some bottleneck in the writing to sstables path"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "avg(rate(scylla_commitlog_flush{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Avg flush by [[by]]",
+ "description": "Counts a number of times the flush() method was called for a file"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "avg(scylla_commitlog_segments{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Segments by [[by]]",
+ "description": "Holds the current number of segments"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "avg(rate(scylla_commitlog_flush_limit_exceeded{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Avg flush limit exceeded by [[by]]",
+ "description": "Counts a number of times a flush limit was exceeded. A non-zero value indicates that there are too many pending flush operations (see pending_flushes) and some of them will be blocked till the total amount of pending flush operations drops below 5."
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "avg(scylla_commitlog_pending_allocations{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Pending allocations by [[by]]",
+ "description": "Holds the number of currently pending allocations. A non-zero value indicates that we have a bottleneck in the disk write flow."
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "avg(scylla_commitlog_pending_flushes{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Pending flush by [[by]]",
+ "description": "Counts a number of requests blocked due to memory pressure. A non-zero value indicates that the commitlog memory quota is not enough to serve the required amount of requests."
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "avg(scylla_commitlog_unused_segments{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Unused segments by [[by]]",
+ "description": "Holds the current number of unused segments. A non-zero value indicates that the disk write path became temporary slow."
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "avg(scylla_commitlog_allocating_segments{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "title": "Allocating segments by [[by]]",
+ "description": "Holds the number of not closed segments that still have some free space. This value should not get too high."
+ }
+ ]
+ },
+ {
+ "class": "user_panels_collapse"
+ },
+ {
+ "class": "user_panel_row_header"
+ },
+ {
+ "class": "user_panels_row"
+ },
+ {
+ "class": "monitoring_version_row"
+ }
+ ],
+ "templating": {
+ "list": [
+ {
+ "allValue": null,
+ "current": {
+ "tags": [],
+ "text": "Instance",
+ "value": "instance"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "by",
+ "multi": false,
+ "name": "by",
+ "options": [
+ {
+ "selected": false,
+ "text": "Cluster",
+ "value": "cluster"
+ },
+ {
+ "selected": false,
+ "text": "DC",
+ "value": "dc"
+ },
+ {
+ "selected": true,
+ "text": "Instance",
+ "value": "instance"
+ },
+ {
+ "selected": false,
+ "text": "Shard",
+ "value": "instance,shard"
+ }
+ ],
+ "query": "Cluster,DC,Instance,Shard",
+ "type": "custom"
+ },
+ {
+ "class": "template_variable_single",
+ "label": "cluster",
+ "name": "cluster",
+ "query": "label_values(scylla_reactor_utilization, cluster)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "dc",
+ "name": "dc",
+ "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "node",
+ "name": "node",
+ "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "shard",
+ "name": "shard",
+ "query": "label_values(scylla_reactor_utilization,shard)",
+ "sort": 3
+ },
+ {
+ "class": "template_variable_all",
+ "label": "classes",
+ "name": "classes",
+ "hide": 0,
+ "query": "label_values(scylla_io_queue_delay,class)",
+ "sort": 1
+ },
+ {
+ "class": "template_variable_all",
+ "label": "group",
+ "name": "group",
+ "hide": 0,
+ "query": "label_values(scylla_scheduler_time_spent_on_task_quota_violations_ms,group)",
+ "sort": 1
+ },
+ {
+ "class": "aggregation_function",
+ "current": {
+ "tags": [],
+ "text": "avg",
+ "value": "avg"
+ }
+ },
+ {
+ "class": "template_variable_custom",
+ "current": {
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ },
+ "name": "scylla_version",
+ "options": [
+ {
+ "selected": true,
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ }
+ ],
+ "query": "__SCYLLA_VERSION_DOT__"
+ },
+ {
+ "class": "monitor_version_var"
+ }
+ ]
+ },
+ "tags": [
+ "__SCYLLA_VERSION_DOT__"
+ ],
+ "time": {
+ "from": "now-30m",
+ "to": "now"
+ },
+ "annotations" :{
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ },
+ {
+ "class" : "annotation_restart"
+ },
+ {
+ "class" : "annotation_stall"
+ },
+ {
+ "class" : "annotation_schema_changed"
+ }
+ ]
+ },
+ "title": "Advanced",
+ "overwrite": true,
+ "version": 5
+ }
+}
diff --git a/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-cql.template.json b/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-cql.template.json
new file mode 100644
index 00000000000..ffa5a95bcc4
--- /dev/null
+++ b/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-cql.template.json
@@ -0,0 +1,1247 @@
+{
+ "dashboard": {
+ "class": "dashboard",
+ "uid": "cql-__SCYLLA_VERSION_DASHED__",
+ "originalTitle": "CQL",
+ "rows": [
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": ""
+ }
+ ]
+ },
+ {
+ "class": "logo_row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": "CQL By User"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "text_panel",
+ "options": {
+ "content": "CQL By User - Coordinator
",
+ "mode": "html"
+ },
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_inserts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) - sum(rate(scylla_cql_inserts_per_ks{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", who=\"internal\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Number of CQL INSERT commands generated by the user",
+ "description": "scylla_cql_inserts",
+ "title": "CQL Insert"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) - sum(rate(scylla_cql_reads_per_ks{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", who=\"internal\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Number of CQL SELECT commands generated by the user",
+ "title": "CQL Reads"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_deletes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])-sum(rate(scylla_cql_deletes_per_ks{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", who=\"internal\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Number of CQL DELETE commands generated by the user",
+ "description": "scylla_cql_deletes",
+ "title": "CQL Deletes"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_updates{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])-sum(rate(scylla_cql_updates_per_ks{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", who=\"internal\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Number of CQL UPDATE commands generated by the user",
+ "description": "scylla_cql_updates",
+ "title": "CQL Updates"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "pointradius": 1,
+ "targets": [
+ {
+ "expr": "sum(scylla_transport_current_connections{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "title": "Client CQL connections by [[by]]",
+ "description": "amount of CQL connections currently established"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "pointradius": 1,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_batches{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "title": "CQL Batches by [[by]]",
+ "description": "Number of CQL batches command, each batched command is counted once"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "pointradius": 1,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_statements_in_batches{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "title": "CQL Command In Batches by [[by]]",
+ "description": "Number of CQL command batched. Each batch would add the number of commands inside the batch"
+ },
+ {
+ "class": "ops_panel",
+ "description": "Counts the number of SELECT statements with BYPASS CACHE option",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_select_bypass_caches{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "title": "BYPASS CACHE"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "pointradius": 1,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_transport_cql_errors_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]],type) >0",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "title": "CQL Errors [[by]]",
+ "description": "CQL errors by type, only active errors are shown",
+ "dashversion":[">4.4", ">2021.1"]
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "pointradius": 1,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_rows_read{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "title": "CQL Row Reads [[by]]",
+ "description": "Number of CQL row reads"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "pointradius": 1,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_secondary_index_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "title": "Secondary indexes Reads [[by]]",
+ "description": "Number of reads using secondary indexes"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 3},
+ "dashproductreject": "no-cql-connection",
+ "panels": [
+ {
+ "class": "text_panel",
+ "options": {
+ "content": "CQL System tablesThe following information is based on Scylla Plugin configuration, check the documentation for more details on how to enable it.",
+ "mode": "html"
+ },
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "dashproductreject": "no-cql-connection",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "collapsed": true,
+ "title": "Connection"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "dashproductreject": "no-cql-connection",
+ "panels": [
+ {
+ "class": "single_value_table",
+ "datasource": "scylla-datasource",
+ "span":12,
+ "targets": [
+ {
+ "refId": "A",
+ "queryText": "select address, port, shard_id, ssl_enabled, username from system.clients",
+ "queryHost": "$node",
+ "dashversion":["<4.4", "<2020.1"]
+
+ },
+ {
+ "refId": "A",
+ "queryText": "select address, port, shard_id, connection_stage, client_type, ssl_enabled, username, driver_name, driver_version, protocol_version from system.clients",
+ "queryHost": "$node",
+ "dashversion":[">4.4", ">2020.1"]
+ }
+ ],
+ "title": "Connection Table"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "dashproductreject": "no-cql-connection",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "collapsed": true,
+ "title": "Large Rows"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "dashproductreject": "no-cql-connection",
+ "panels": [
+ {
+ "class": "single_value_table",
+ "datasource": "scylla-datasource",
+ "span":12,
+ "targets": [
+ {
+ "refId": "A",
+ "queryText": "select keyspace_name, table_name,partition_key, clustering_key, row_size,compaction_time from system.large_rows",
+ "queryHost": "$node"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "align": null,
+ "filterable": false
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "value": null,
+ "color": "green"
+ },
+ {
+ "value": 80,
+ "color": "red"
+ }
+ ]
+ },
+ "mappings": []
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "row_size"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ }
+ ]
+ },
+ "title": "Large Rows"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "dashproductreject": "no-cql-connection",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "collapsed": true,
+ "title": "Large Cells"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "dashproductreject": "no-cql-connection",
+ "panels": [
+ {
+ "class": "single_value_table",
+ "datasource": "scylla-datasource",
+ "span":12,
+ "targets": [
+ {
+ "refId": "A",
+ "queryText": "select keyspace_name, table_name,partition_key, clustering_key, column_name,cell_size, collection_elements, compaction_time from system.large_cells",
+ "queryHost": "$node"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "align": null,
+ "filterable": false
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "value": null,
+ "color": "green"
+ },
+ {
+ "value": 80,
+ "color": "red"
+ }
+ ]
+ },
+ "mappings": []
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "cell_size"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ }
+ ]
+ },
+ "title": "Large Cells"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "dashproductreject": "no-cql-connection",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "collapsed": true,
+ "title": "Large Partitions"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "dashproductreject": "no-cql-connection",
+ "panels": [
+ {
+ "class": "single_value_table",
+ "datasource": "scylla-datasource",
+ "span":12,
+ "targets": [
+ {
+ "refId": "A",
+ "queryText": "select keyspace_name, table_name,partition_key, partition_size, compaction_time, rows from system.large_partitions",
+ "queryHost": "$node"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "align": null,
+ "filterable": false
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "value": null,
+ "color": "green"
+ },
+ {
+ "value": 80,
+ "color": "red"
+ }
+ ]
+ },
+ "mappings": []
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "partition_size"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ }
+ ]
+ },
+ "title": "Large Partitions"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "collapsed": true,
+ "title": "CQL Internal"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "text_panel",
+ "options": {
+ "content": "CQL Internal - Coordinator
",
+ "mode": "html"
+ },
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_inserts_per_ks{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", who=\"internal\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Number of CQL INSERT commands generated by intenal operations",
+ "title": "CQL Internal Insert"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_reads_per_ks{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", who=\"internal\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Number of CQL SELECT commands generated by intenal operations",
+ "title": "CQL Internal Reads"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_deletes_per_ks{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", who=\"internal\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Number of CQL DELETE commands generated by intenal operations",
+ "description": "scylla_cql_deletes",
+ "title": "CQL Deletes"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_updates_per_ks{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", who=\"internal\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Number of CQL UPDATE commands generated by intenal operations",
+ "description": "scylla_cql_updates",
+ "title": "CQL Updates"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": "LWT"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "text_panel",
+ "options": {
+ "content": "LWT
",
+ "mode": "html"
+ },
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_inserts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", conditional=\"yes\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cql_inserts",
+ "title": "CQL Insert"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_deletes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", conditional=\"yes\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cql_deletes",
+ "title": "CQL Deletes"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_updates{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", conditional=\"yes\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cql_updates",
+ "title": "CQL Updates"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "pointradius": 1,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_batches{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", conditional=\"yes\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "title": "CQL Batches by [[by]]",
+ "description": "Number of CQL batches command, each batched command is counted once"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": "Optimization"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "text_panel",
+ "options": {
+ "content": "Optimization
",
+ "mode": "html"
+ },
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+
+ {
+ "class": "gauge_errors_panel",
+ "description": "All of the requests should be prepared\n\nPrepared statements remove the overhead of parsing the query every time and allow optimal routing of requests from client to server",
+ "targets": [
+ {
+ "expr": "floor(100 *sum(cql:non_system_prepared1m)/ (sum(cql:all_shardrate1m{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) - sum(cql:all_system_shardrate1m{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}))) OR vector(0)",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "CQL Non-Prepared Statements"
+ },
+ {
+ "class": "ops_panel",
+ "description": "All of the requests should be prepared\n",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(cql:non_system_prepared1m{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "title": "CQL Non-Prepared Statements"
+ },
+ {
+ "class": "gauge_errors_panel",
+ "description": "All requests should be paged\n\nNon Paged request sources:\n- Client modifying the fetch size\n\nNon Paged requests require reading all the results and returning them in a single request.",
+ "targets": [
+ {
+ "expr": "100 * ((sum(rate(scylla_cql_unpaged_select_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))-sum(rate(scylla_cql_unpaged_select_queries_per_ks{ks=\"system\",instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]))/sum(rate(scylla_cql_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))) OR vector(0)",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Non-Paged CQL Reads"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "description": "Non-Paged requests require reading all the results and returning them in a single request",
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_unpaged_select_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])-sum(rate(scylla_cql_unpaged_select_queries_per_ks{ks=\"system\",instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "title": "Non-Paged CQL Reads"
+ },
+ {
+ "class": "gauge_errors_panel",
+ "description": "All of the requests should be Token Aware\n\nNon Token Aware requests sources:\n* Non-Prepared Stamements\n* Client not using a Token Aware load balancing policy\n\nTokenAware requests are sent to a Scylla node that is also a replica. Token Un-Aware requests require extra hop and additional processing.\n\nNote that the metric shows incorrect values when batches are used.",
+ "targets": [
+ {
+ "expr": "scalar(sum(rate(scylla_storage_proxy_coordinator_cas_total_operations{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) <=bool 0)*scalar(sum(rate(scylla_storage_proxy_replica_received_counter_updates{cluster=~\"$cluster|$^\"}[1m]))<=bool 0) *(100 - clamp_max(100*(sum(rate(scylla_storage_proxy_coordinator_reads_local_node{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) + sum(rate(scylla_storage_proxy_coordinator_total_write_attempts_local_node{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])))/(sum(rate(scylla_cql_inserts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) +sum(rate(scylla_cql_updates{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) +sum(rate(scylla_cql_deletes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) +sum(rate(scylla_cql_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))) + 1,100)) OR vector(0)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Non-Token Aware"
+ },
+ {
+ "class": "ops_panel",
+ "description": "Requests that are not token aware indicates that requests are not routed to the right node, which require extra hop and additional processing.\n\nNote that the metric shows incorrect values when batches are used.",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "scalar(sum(rate(scylla_storage_proxy_coordinator_cas_total_operations{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) <=bool 0)*scalar(sum(rate(scylla_storage_proxy_replica_received_counter_updates{cluster=~\"$cluster|$^\"}[1m]))<=bool 0) *clamp_min(sum(rate(scylla_cql_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) - sum(rate(scylla_storage_proxy_coordinator_reads_local_node{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) + sum(rate(scylla_cql_inserts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]) + rate(scylla_cql_updates{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]) + rate(scylla_cql_deletes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) - sum(rate(scylla_storage_proxy_coordinator_total_write_attempts_local_node{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]),0)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "title": "Non-Token Aware Queries"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "gauge_errors_panel",
+ "description": "Reversed CQL Reads entail additional processing on server side\n\nSources: CQL Read requests with ORDER BY that is different from the \"CLUSTERING ORDER BY\" of the table\nAlternatives:\n\n* Denormalize your data (use a Materialized View)",
+ "targets": [
+ {
+ "expr": "100 * sum(rate(scylla_cql_reverse_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) / sum(rate(scylla_cql_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) OR vector(0)",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Reversed CQL Reads"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "description": "Reversed CQL Reads entail additional processing on server side and should be avoided",
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_reverse_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "title": "Reversed CQL Reads"
+ },
+ {
+ "class": "gauge_errors_panel",
+ "description": "ALLOW FILTERING CQL Reads, the percentage of read requests with 'ALLOW FILTERING'\n\nALLOW FILTERING CQL Reads entail additional processing on server side\n\nSources: CQL Read requests with \"ALLOW FILTERING\"\n\nALLOW FILTERING should be used when large parts of the filtered data is returned - check \n\"ALLOW FILTERING CQL Read Filtered Rows to check what percentage of the data is used\"\n\nAlternatives:\n- Use a Secondary Index\n- Denormalize your data (use a Materialized View)",
+ "targets": [
+ {
+ "expr": "100 * sum(rate(scylla_cql_filtered_read_requests{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) / sum(rate(scylla_cql_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) OR vector(0)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "ALLOW FILTERING CQL Reads"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "description": "Read requests with ALLOW FILTERING\n\nALLOW FILTERING CQL Reads entail additional processing on server side and should be avoided",
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_filtered_read_requests{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "title": "ALLOW FILTERING CQL Reads"
+ },
+ {
+ "class": "gauge_errors_panel",
+ "description": "ALLOW FILTERING Filtered rows, the percentage of rows that were read and then filtered.\n\nALLOW FILTERING CQL Reads entail additional processing on server side. \nReading a row and then filter it is a waste of resources.\n\nSources: CQL Read requests with \"ALLOW FILTERING\"\n\nALLOW FILTERING should be used when large parts of the filtered data is returned\n\nAlternatives:\n- Use a Secondary Index\n- Denormalize your data (use a Materialized View)",
+ "targets": [
+ {
+ "expr": "100 * sum(rate(scylla_cql_filtered_rows_dropped_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) /sum(rate(scylla_cql_filtered_rows_read_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) OR vector(0)",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "ALLOW FILTERING Filtered Rows"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "description": "CQL Queries with ALLOW FILTERING should be avoided.\nDropped rows are rows that were read but were filtered by the server.\nWhen dropped rows is relatively high you should consider the alternatives",
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_filtered_rows_read_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "rows read $node $shard",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(rate(scylla_cql_filtered_rows_matched_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "rows matched $node $shard",
+ "refId": "B"
+ },
+ {
+ "expr": "sum(rate(scylla_cql_filtered_rows_dropped_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "rows dropped $node $shard",
+ "refId": "C"
+ }
+ ],
+ "title": "ALLOW FILTERING CQL Read Filtering"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "gauge_errors_panel",
+ "description": "Range scans should typically by pass the cache.\n\n Add BYPASS CACHE to your select queries.",
+ "targets": [
+ {
+ "expr": "floor(100 *sum(rate(scylla_cql_select_partition_range_scan_no_bypass_cache{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))/(sum(rate(scylla_cql_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) - sum(rate(scylla_cql_reads_per_ks{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", who=\"internal\"}[1m])) )) OR vector(0)",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Range Scans"
+ },
+ {
+ "class": "ops_panel",
+ "description": "Range scans should typically by pass the cache.\n\n Add BYPASS CACHE to your select queries.",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_select_partition_range_scan_no_bypass_cache{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "title": "Range Scans Without BYPASS CACHE "
+ },
+ {
+ "class": "gauge_errors_panel",
+ "description": "Using consistency level ANY in a query may hurt persistency, if the node receiving the request will fail the data may be lost",
+ "targets": [
+ {
+ "expr": "floor(100 *sum(rate(scylla_query_processor_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", consistency_level=\"ANY\"}[1m]))/sum(rate(scylla_query_processor_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))) OR vector(0)",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "CQL ANY Queries"
+ },
+ {
+ "class": "ops_panel",
+ "description": "Using consistency level ANY in a query may hurt persistency, if the node receiving the request will fail the data may be lost",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_query_processor_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", consistency_level=\"ANY\"}[1m])) by ([[by]])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "title": "CQL ANY CL Queries"
+ },
+ {
+ "class": "gauge_errors_panel",
+ "description": "Using consistency level ALL in a query may hurt availability, if a node is unavailable operations will fail",
+ "targets": [
+ {
+ "expr": "floor(100 *sum(rate(scylla_query_processor_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", consistency_level=\"ALL\"}[1m]))/sum(rate(scylla_query_processor_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))) OR vector(0)",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "CQL ALL CL Queries"
+ },
+ {
+ "class": "ops_panel",
+ "description": "Using consistency level ALL in a query may hurt availability, if a node is unavailable operations will fail",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_query_processor_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", consistency_level=\"ALL\"}[1m])) by ([[by]])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "title": "CQL ALL CL Queries"
+ }
+
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 3},
+ "panels": [
+ {
+ "class": "text_panel",
+ "options": {
+ "content": "Cross DC Information
This section is relevant only if you have more than one DC
",
+ "mode": "html"
+ },
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "gauge_errors_panel",
+ "description": "How many Queries use Consistency level ONE\n\nThis is an issue when using multiple datacenters.\n\nUsing consistency level ONE in a query when there is more than one DC may hurt performance, queries may end in the non-local DC. Use LOCAL_ONE instead",
+ "targets": [
+ {
+ "expr": "floor(100 *sum(rate(scylla_query_processor_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", consistency_level=\"ONE\"}[1m]))/sum(rate(scylla_query_processor_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))) OR vector(0)",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "CQL ONE Queries"
+ },
+ {
+ "class": "ops_panel",
+ "description": "How many Queries use Consistency level ONE\n\nThis is an issue when using multiple datacenters.\n\nUsing consistency level ONE in a query when there is more than one DC may hurt performance, queries may end in the non-local DC. Use LOCAL_ONE instead",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_query_processor_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", consistency_level=\"ONE\"}[1m])) by ([[by]])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "title": "CQL ONE CL Queries"
+ },
+ {
+ "class": "gauge_errors_panel",
+ "description": "How many Queries use Consistency level QUORUM\n\nThis is an issue when using multiple datacenters.\n\nUsing consistency level QUORUM in a query when there is more than one DC may hurt performance, queries may end in the non-local DC. Use LOCAL_QUORUM instead",
+ "targets": [
+ {
+ "expr": "floor(100 *sum(rate(scylla_query_processor_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", consistency_level=\"QUORUM\"}[1m]))/sum(rate(scylla_query_processor_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))) OR vector(0)",
+ "format": "time_series",
+ "hide": false,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "CQL QUORUM CL Queries"
+ },
+ {
+ "class": "ops_panel",
+ "description": "How many Queries use Consistency level QUORUM\n\nThis is an issue when using multiple datacenters.\n\nUsing consistency level QUORUM in a query when there is more than one DC may hurt performance, queries may end in the non-local DC. Use LOCAL_QUORUM instead",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_query_processor_queries{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", consistency_level=\"QUORUM\"}[1m])) by ([[by]])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "title": "CQL QUORUM CL Queries"
+ }
+
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "gauge_errors_panel",
+ "repeat": "dc",
+ "span": 2,
+ "description": "Cross DC traffic may cause additional latencies and network loads and in most cases, should be avoided.\n\nCross DC Read requests sources:\n- Consistency Level that is not LOCAL_XXX\n- Tables with read_repair_chance > 0\n\nNote:\n- If requests are supposed to be DC local - verify client is using a DCAware policy and a LOCAL_XX consistency level",
+ "targets": [
+ {
+ "expr": "100*(sum(rate(scylla_storage_proxy_coordinator_reads_remote_node{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) - sum(rate(scylla_storage_proxy_coordinator_reads_remote_node{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", datacenter=~\"$dc\", shard=~\"[[shard]]\"}[1m])))/sum(rate(scylla_storage_proxy_coordinator_reads_remote_node{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) OR vector(0)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "title": "Cross DC read requests $dc"
+ }
+ ]
+ },
+ {
+ "class": "monitoring_version_row"
+ }
+ ],
+ "tags": [
+ "__SCYLLA_VERSION_DOT__"
+ ],
+ "templating": {
+ "list": [
+ {
+ "class":"by_template_var"
+ },
+ {
+ "class": "template_variable_single",
+ "label": "cluster",
+ "name": "cluster",
+ "query": "label_values(scylla_reactor_utilization, cluster)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "dc",
+ "name": "dc",
+ "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)"
+ },
+ {
+ "class":"template_variable_single",
+ "current":{
+ "selected":true,
+ "text":[
+ "All"
+ ],
+ "value":[
+ "$__all"
+ ]
+ },
+ "includeAll":true,
+ "multi":true,
+ "label": "node",
+ "name": "node",
+ "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "shard",
+ "name": "shard",
+ "query": "label_values(scylla_reactor_utilization,shard)",
+ "sort": 3
+ },
+ {
+ "class": "template_variable_custom",
+ "current": {
+ "text": "__SCYLLA_VERSION_DASHED__",
+ "value": "__SCYLLA_VERSION_DASHED__"
+ },
+ "name": "dash_version",
+ "options": [
+ {
+ "selected": true,
+ "text": "__SCYLLA_VERSION_DASHED__",
+ "value": "__SCYLLA_VERSION_DASHED__"
+ }
+ ],
+ "query": "__SCYLLA_VERSION_DASHED__"
+ },
+ {
+ "class": "template_variable_custom",
+ "current": {
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ },
+ "name": "scylla_version",
+ "options": [
+ {
+ "selected": true,
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ }
+ ],
+ "query": "__SCYLLA_VERSION_DOT__"
+ },
+ {
+ "class": "monitor_version_var"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-5m",
+ "to": "now"
+ },
+ "annotations" :{
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ },
+ {
+ "class" : "annotation_restart"
+ },
+ {
+ "class" : "annotation_stall"
+ },
+ {
+ "class" : "annotation_schema_changed"
+ }
+ ]
+ },
+ "overwrite": true,
+ "title": "Scylla CQL"
+ }
+}
diff --git a/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-detailed.template.json b/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-detailed.template.json
new file mode 100644
index 00000000000..d6ad50bea75
--- /dev/null
+++ b/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-detailed.template.json
@@ -0,0 +1,2055 @@
+{
+ "dashboard": {
+ "class": "dashboard",
+ "uid": "detailed-__SCYLLA_VERSION_DASHED__",
+ "rows": [
+ {
+ "class": "logo_row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "percent_panel",
+ "description": "The percentage of the time during which Scylla utilized the CPU. Note that because Scylla does busy polling for some time before going idle, CPU utilization as seen by the operating system may be much higher. Your system is not yet CPU-bottlenecked until this metric is high.",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "avg(scylla_reactor_utilization{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"} ) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "title": "Load"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_transport_requests_served{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) + ($func(rate(scylla_thrift_served{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or max(scylla_transport_requests_served{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by([[by]])*0)",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Amount of requests served as the coordinator. Imbalances here represent dispersion at the connection level, not your data model.",
+ "title": "Requests Served per [[by]] - Coordinator"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_database_total_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_database_total_reads",
+ "title": "Reads per [[by]] - Replica"
+ },
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_database_total_writes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_database_total_writes",
+ "title": "Writes per [[by]] - Replica"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": "reads and writes"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "plain_text",
+ "content": "Reads and Writes - Coordinator
"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "height": "200px",
+ "panels": [
+ {
+ "class": "writes_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_storage_proxy_coordinator_foreground_writes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Foreground writes are writes that weren't acknowledged yet to the application. For instance, if a single replica responded and two are needed due to the consistency level. This metric represents a queue size, not a rate. High values here correlate with increased write latencies.",
+ "title": "Foreground Writes per [[by]]"
+ },
+ {
+ "class": "writes_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_storage_proxy_coordinator_background_writes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Background writes are writes that are already acknowledged to the application but have additional work to be done. For instance, if a replica responded and only one is needed, this request is still listed as a background request until all replicas respond. This metric represents a queue size, not a rate. High values here correlate with increased write latencies.",
+ "title": "Background Writes per [[by]]"
+ },
+ {
+ "class": "reads_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_storage_proxy_coordinator_foreground_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Foreground reads are reads that weren't acknowledged yet to the application. For instance, if a single replica responded and two are needed due to the consistency level. This metric represents a queue size, not a rate. High values here correlate with increased read latencies.",
+ "title": "Foreground Reads per [[by]]"
+ },
+ {
+ "class": "reads_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_storage_proxy_coordinator_background_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "description": "Background reads are reads that are already acknowledged to the application but have additional work to be done. For instance, if a replica responded and only one is needed, this request is still listed as a background request until all replicas respond. This metric represents a queue size, not a rate. High values here correlate with increased read latencies.",
+ "title": "Background Reads per [[by]]"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "height": "200px",
+ "panels": [
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_hints_manager_written{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_hints_manager_written",
+ "title": "Hints Written per [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_hints_manager_sent{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "description": "scylla_hints_manager_sent",
+ "title": "Hints sent per [[by]]"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_speculative_digest_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "description" : "Number of times a digest read was done on behalf of a speculative retry.\n\nSpeculative retry is a mechanism that causes the client or server to speculate that a request may fail, and send a new request.\n\nspeculative retry may reduce latency in exchange for system load, but only if there is little activity.\n\nA lot of speculative retries increases load and can harm latency more than helping.",
+ "title": "Speculative Digest Reads By [[by]]"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_speculative_data_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "description" : "Number of times a read was done on behalf of a speculative retry.\n\nSpeculative retry is a mechanism that causes the client or server to speculate that a request may fail, and send a new request.\n\nspeculative retry may reduce latency in exchange for system load, but only if there is little activity.\n\nA lot of speculative retries increases load and can harm latency more than helping.",
+ "title": "Speculative Data Reads By [[by]]"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": "Timeouts and Errors"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "plain_text",
+ "content": "Timeouts and Errors - Coordinator
"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_write_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Requests that Scylla tried to write but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
+ "title": "Write Timeouts/Seconds per [[by]]"
+ },
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_write_unavailable{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Requests that Scylla did not even try to write because replicas that were needed to execute this write were unavailable. Unavailable writes are counted in the node that received the request (the coordinator), not at the replicas.",
+ "title": "Write Unavailable/Seconds per [[by]]"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_read_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]) + rate(scylla_storage_proxy_coordinator_cas_read_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])+rate(scylla_storage_proxy_coordinator_range_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Requests that Scylla tried to read but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
+ "title": "Read Timeouts/Seconds per [[by]]"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_read_unavailable{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "description": "Requests that Scylla did not even try to read because replicas that were needed to execute this write were unavailable. Unavailable reads are counted in the node that received the request (the coordinator), not at the replicas.",
+ "title": "Read Unavailable/Seconds per [[by]]"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": "Replica"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "plain_text",
+ "content": "Replica
"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "auto",
+ "panels": [
+ {
+ "class": "reads_panel",
+ "description" : "The number of currently active read operations",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_database_active_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Active reads"
+ },
+ {
+ "class": "reads_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_database_queued_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "number of currently queued read operations",
+ "title": "Queued reads"
+ },
+ {
+ "class": "writes_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_database_requests_blocked_memory_current{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description" :"The current number of requests blocked due to reaching the memory quota. Non-zero value indicates that our bottleneck is memory",
+ "title": "Writes currently blocked on dirty"
+ },
+ {
+ "class": "writes_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_commitlog_pending_allocations{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description" :"number of currently pending allocations. A non-zero value indicates that we have a bottleneck in the disk write flow.",
+ "title": "Writes currently blocked on commitlog"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "clamp_max(1 + sum((rate(scylla_cache_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]) - rate(scylla_cache_reads_with_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))) by ([[by]])/(sum(rate(scylla_cache_reads_with_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) + 0.00001),100)",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description" :"Reciprocal Miss Rate is a score in the range of 1 to 100 that is used to decide the fraction of read requests to send to each replica - a replica with twice the RMR value of another replica will serve twice the number of read requests.\n\nRMR is calculated on a table level, this is an aggregate estimation of that score.",
+ "title": "Reciprocal Miss Rate (HWLB)"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_database_total_reads_failed{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_database_total_reads_failed",
+ "title": "Reads failed"
+ },
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_database_requests_blocked_memory{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_database_requests_blocked_memory",
+ "title": "Writes blocked on dirty"
+ },
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_commitlog_requests_blocked_memory{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_commitlog_requests_blocked_memory",
+ "title": "Writes blocked on commitlog"
+ },
+ {
+ "class": "requestsps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_transport_requests_shed{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_transport_requests_shed",
+ "title": "Requests Shed"
+ },
+ {
+ "class": "text_panel",
+ "content": "",
+ "mode": "markdown",
+ "span": 3
+ },
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_database_total_writes_failed{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_database_total_writes_failed",
+ "title": "Writes failed"
+ },
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_database_total_writes_timedout{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_database_total_writes_timedout",
+ "title": "Writes timed out"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": "Cache"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "text_panel",
+ "content": "Cache - Replica
",
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "rps_panel",
+ "span": 6,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]) - rate(scylla_cache_reads_with_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "number of reads that were served from the cache",
+ "title": "Reads with no misses"
+ },
+ {
+ "class": "rps_panel",
+ "span": 6,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_reads_with_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_reads_with_misses",
+ "title": "Reads with misses"
+ }
+ ]
+ },
+ {
+ "class" : "row",
+ "panels": [
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_row_hits{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_row_hits",
+ "title": "Row Hits"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_partition_hits{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_partition_hits",
+ "title": "Partition Hits"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_row_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_row_misses",
+ "title": "Row Misses"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_partition_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_partition_misses",
+ "title": "Partition Misses"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_row_insertions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_row_insertions",
+ "title": "Row Insertions"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_partition_insertions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_partition_insertions",
+ "title": "Partition Insertions"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_row_evictions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_row_evictions",
+ "title": "Row Evictions"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_partition_evictions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_partition_evictions",
+ "title": "Partition Evictions"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_rows_merged_from_memtable{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_rows_merged_from_memtable",
+ "title": "Row Merges"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_partition_merges{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_partition_merges",
+ "title": "Partition Merges"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_row_removals{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_row_removals",
+ "title": "Row Removals"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_partition_removals{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_partition_removals",
+ "title": "Partition Removals"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_cache_rows{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_rows",
+ "title": "Rows"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_cache_partitions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_partitions",
+ "title": "Partitions"
+ },
+ {
+ "class": "bytes_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_cache_bytes_used{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_bytes_used",
+ "title": "Used Bytes"
+ },
+ {
+ "class": "bytes_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_cache_bytes_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cache_bytes_total",
+ "title": "Total Bytes"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cql_prepared_cache_evictions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cql_prepared_cache_evictions",
+ "title": "Prepared Statements Cache Eviction"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cql_authorized_prepared_statements_cache_evictions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_cql_authorized_prepared_statements_cache_evictions",
+ "title": "Authorized Prepared Statements Cache Eviction"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "collapsed": true,
+ "title": "Materialized Views"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "text_panel",
+ "content": "Materialized Views - Replica
",
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_database_total_view_updates_pushed_local{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "View Local Update",
+ "description" : "Number of view update locally"
+ },
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_database_total_view_updates_pushed_remote{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "View Remote Update",
+ "description" : "Number of view update remotely"
+ },
+ {
+ "class": "bytes_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_database_view_update_backlog{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "View Update Backlog",
+ "description" : "Size in bytes of the view update backlog at each base replica."
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_database_dropped_view_updates{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Dropped View Updates",
+ "description" : "Number of dropped view updates due to an excessive view update backlog."
+ } ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_hints_for_views_manager_sent{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Hints for view",
+ "description" : "Number of hints sent for view."
+ },
+ {
+ "class": "writes_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_storage_proxy_coordinator_current_throttled_base_writes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Throttled Base Writes",
+ "description" : "Currently throttled base writes, as a consequence of the respective view update backlog."
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "collapsed": true,
+ "dashversion":[">4.5", ">2021.1"],
+ "title": "Tombstones"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "dashversion":[">4.5", ">2021.1"],
+ "panels": [
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_sstables_range_tombstone_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Range Tombstones reads",
+ "dashversion":[">4.6", ">2021.1"],
+ "description" : "Amount of range tombstones processed during read."
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_range_tombstone_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Cache Range Tombstones Read",
+ "dashversion":[">4.6", ">2021.1"],
+ "description" : "Amount of range tombstones processed during read."
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_sstables_row_tombstone_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Row Tombstones reads",
+ "dashversion":[">4.6", ">2021.1"],
+ "description" : "Amount of row tombstones read"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_row_tombstone_reads{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Cache Row Tombstones reads",
+ "dashversion":[">4.6", ">2021.1"],
+ "description" : "Amount of cache row tombstones read"
+ },
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_sstables_tombstone_writes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Tombstones Writes",
+ "description" : "Amount of tombstones writes."
+ },
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_sstables_range_tombstone_writes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Range Tombstones Writes",
+ "description" : "Amount of range tombstones writes."
+ },
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_sstables_cell_tombstone_writes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Cell Tombstones Writes",
+ "description" : "Amount of Cell Tombstones Writes."
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "collapsed": true,
+ "title": "LWT"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "text_panel",
+ "content": "LWT - Coordinator
",
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or on([[by]]) $func(rate(scylla_storage_proxy_coordinator_cas_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Reads",
+ "description" : "LWT read rate."
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(casrlatencya{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", by=\"[[by]]\"} or on([[by]]) $func(rate(scylla_storage_proxy_coordinator_cas_read_latency_sum{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])/($func(rate(scylla_storage_proxy_coordinator_cas_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]]) + 1)",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Avrage Read latency",
+ "description" : "LWT Avrage Read latency."
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "casrlatencyp95{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "95% latency",
+ "description" : "LWT 95% Read latency."
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_read_timeouts{instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"statement|$\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Read Timeouts",
+ "description" : "LWT Read Timeouts"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or on ([[by]]) ($func(rate(scylla_storage_proxy_coordinator_cas_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]))",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Writes",
+ "description" : "LWT write rate."
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(caswlatencya{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", by=\"[[by]]\"} or on([[by]]) ($func(rate(scylla_storage_proxy_coordinator_cas_write_latency_sum{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])/($func(rate(scylla_storage_proxy_coordinator_cas_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) + 1))",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Avrage Write latency",
+ "description" : "LWT Avrage Write latency."
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "caswlatencyp95{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "95% latency",
+ "description" : "LWT 95% write latency."
+ },
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_write_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Write Timeouts",
+ "description" : "LWT Write Timeouts"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "dashversion":[">4.2", ">2021.1"],
+ "panels": [
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_total_operations{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Paxos operations",
+ "description" : "A single Read/Write LWT will result in multiple paxos operations"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_storage_proxy_coordinator_cas_foreground{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Paxos Foreground operations",
+ "description" : "How many paxos operations that did not yet produce a result are running"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(scylla_storage_proxy_coordinator_cas_background{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Paxos Background operations",
+ "description" : "How many paxos operations are still running after a result was alredy returned"
+ },
+ {
+ "class": "text_panel",
+ "span": 3,
+ "options": {
+ "content": "# "
+ }
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_write_condition_not_met{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Condition-Not-Met",
+ "description" : "An LWT INSERT, UPDATE or DELETE command that involves a condition will be rejected if the condition is not met.\n\nWhile it is ok, a high value may indicate that there is a potential problem with data distribution"
+ },
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_write_contention_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Write Contention",
+ "description" : "Number of times some INSERT, UPDATE or DELETE request with conditions had to retry because there was a concurrent conditional statement against the same key. Each retry is performed after a randomized sleep interval, so it can lead to statement timing out completely.\n\nIt can indicates contention over a hot row or key"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_read_contention_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]]) - $func(rate(scylla_storage_proxy_coordinator_cas_read_contention_bucket{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\", le=\"1.000000\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Read Contention",
+ "description" : "Number of times some SELECT with SERIAL consistency had to retry because there was a concurrent conditional statement against the same key. Each retry is performed after a randomized sleep interval, so it can lead to statement timing out completely.\n\nIt can indicates contention over a hot row or key"
+ },
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_write_timeout_due_to_uncertainty{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Write Timeout Due to Uncertainty",
+ "description" : "Number of partially succeeded conditional statements. These statements were not committed by the coordinator, due to some replicas responding with errors or timing out. The coordinator had to propagate the error to the client. However, the statement succeeded on a minority of replicas, so may later be propagated to the rest during repair."
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_write_unavailable{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Write Unavailable",
+ "description" : "Number of times a INSERT, UPDATE, or DELETE with conditions failed after being unable to contact enough replicas to match the consistency level"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_read_unavailable{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Read Unavailable",
+ "description" : "Number of times a SELECT with SERIAL consistency failed after being unable to contact enough replicas to match the consistency level"
+ },
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_write_unfinished_commit{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Write Unfinished - Repair Attempts",
+ "description" : "Number of Paxos-repairs of INSERT, UPDATE, or DELETE with conditions.\n\nA repair is necessary when a previous Paxos statement was partialy successful. A subsequent statement then may not proceed before completing the work of its predecessor. A repair is not guaranteed to succeed, the metric indicates the number of repair attempts made"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_read_unfinished_commit{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Read Unfinished - Repair Attempts",
+ "description" : "Number of Paxos-repairs of SELECT statement with SERIAL consistency.\n\nA repair is necessary when a previous Paxos statement was partialy successful. A subsequent statement then may not proceed before completing the work of its predecessor. A repair is not guaranteed to succeed, the metric indicates the number of repair attempts made"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_failed_read_round_optimization{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Failed Read-Round Optimization",
+ "description" : "Normally, a PREPARE Paxos-round piggy-backs the previous value along with the PREPARE response. When the coordinator is unable to obtain the previous value (or its digest) from some of the participants, or when the digests did not match, a separate repair round has to be performed.\n\nThis indicates that some Paxos queries did not run successfully to completion, e.g. because some node is overloaded, down, or there was contention around a key."
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_prune{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Prune",
+ "description" : "Number of pruning requests.\n\nA successful conditional statement deletes the intermediate state from system.paxos table using PRUNE command."
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_cas_dropped_prune{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "LWT Dropped Prune",
+ "description" : "Number of Dropped pruning requests.\n\nA successful conditional statement deletes the intermediate state from system.paxos table using PRUNE command. If the system is busy it may not keep up with the PRUNE requests, so such requests are dropped.\n\nHigh value suggests the system is overloaded and also that system.paxos table is taking up space. If a prune is dropped, system.paxos table key and value for respective LWT transaction will stay around until next transaction against the same key or until the gc_grace_period, when it's removed by compaction."
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "collapsed": true,
+ "title": "CDC"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "text_panel",
+ "content": "CDC - Replica
",
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cdc_operations_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "CDC Operations",
+ "description" : "The rate of CDC operations."
+ },
+ {
+ "class": "ops_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cdc_operations_failed{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])/($func(rate(scylla_storage_proxy_coordinator_cas_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_cas_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) + 1)",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Failed CDC operations",
+ "description" : "The rate of failed CDC operations."
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": "Memory"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "text_panel",
+ "content": "Memory - Replica
",
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "bytes_panel",
+ "span": 6,
+ "targets": [
+ {
+ "expr": "$func(scylla_lsa_total_space_bytes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_lsa_total_space_bytes",
+ "title": "LSA total memory"
+ },
+ {
+ "class": "bytes_panel",
+ "span": 6,
+ "targets": [
+ {
+ "expr": "$func(scylla_lsa_non_lsa_used_space_bytes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_lsa_non_lsa_used_space_bytes",
+ "title": "Non-LSA used memory"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": "Compaction"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "text_panel",
+ "content": "Compaction - Replica
",
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "graph_panel_int",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "$func(scylla_compaction_manager_compactions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_compaction_manager_compactions",
+ "title": "Running Compactions"
+ },
+ {
+ "class": "percent_panel",
+ "span": 4,
+ "targets": [
+ {
+ "expr": "($func(rate(scylla_scheduler_runtime_ms{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\", group=\"compaction\"}[1m])) by ([[by]]))/10",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Percentage of CPU time used by compaction",
+ "title": "Compactions CPU Runtime"
+ },
+ {
+ "class": "graph_panel",
+ "span": 4,
+ "targets": [
+ {
+ "refId": "A",
+ "expr": "avg(scylla_scheduler_shares{group=\"compaction\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "format": "time_series"
+ }
+ ],
+ "description": "Shares assigned to the compaction",
+ "title": "Compactions Shares"
+ }
+
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "repeat":"scheduling_group",
+ "collapsed": false,
+ "title": "Latencies - $scheduling_group"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[60s])) by ([[by]]) or on([[by]]) $func(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$scheduling_group\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_storage_proxy_coordinator_write_latency",
+ "title": "Writes by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "wlatencya{by=\"[[by]]\", instance=~\"[[node]]|$^\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Average write latency by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "wlatencyp95{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group|$\"}",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "95th percentile write latency by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "wlatencyp99{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "99th percentile write latency by [[by]]"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[60s])) by ([[by]]) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_storage_proxy_coordinator_read_latency",
+ "title": "Reads by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "rlatencya{by=\"[[by]]\", instance=~\"[[node]]|$^\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Average read latency by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "rlatencyp95{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "95th percentile read latency by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "rlatencyp99{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "99th percentile read latency by [[by]]"
+ },
+ {
+ "class": "bytes_panel",
+ "span": 3,
+ "dashversion":[">5.3", ">2022.1"],
+ "description": "Bytes received in CQL messages",
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_transport_cql_request_bytes{kind=~\"$kind\",instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Received payload by [[by]]"
+ },
+ {
+ "class": "bytes_panel",
+ "span": 3,
+ "dashversion":[">5.3", ">2022.1"],
+ "description": "Average CQL message size (received)",
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_transport_cql_request_bytes{kind=~\"$kind\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])/sum(rate(scylla_transport_cql_requests_count{kind=~\"$kind\",instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Average received payload size by [[by]]"
+ },
+ {
+ "class": "bytes_panel",
+ "span": 3,
+ "dashversion":[">5.3", ">2022.1"],
+ "description": "Bytes sent in CQL messages",
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_transport_cql_response_bytes{kind=~\"$kind\",instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Response payload by [[by]]"
+ },
+ {
+ "class": "bytes_panel",
+ "span": 3,
+ "dashversion":[">5.3", ">2022.1"],
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_transport_cql_response_bytes{kind=~\"$kind\",instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])/sum(rate(scylla_transport_cql_requests_count{kind=~\"$kind\",instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Average CQL message size (sent)",
+ "title": "Average response payload size by [[by]]"
+ },
+ {
+ "class": "bytes_panel",
+ "span": 3,
+ "dashversion":[">5.3", ">2022.1"],
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_transport_cql_request_bytes{kind=~\"QUERY|EXECUTE\",instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])/(sum(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]]) or on([[by]]) sum(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]]))",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "This is a ballpark estimation of the write-messages size (like insert and update).\n\nIt is based on the assumption that write-messages are responsible for most inwards traffic.",
+ "title": "Estimated write message size by [[by]]"
+ },
+ {
+ "class": "bytes_panel",
+ "span": 3,
+ "dashversion":[">5.3", ">2022.1"],
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_transport_cql_response_bytes{kind=~\"QUERY|EXECUTE\",instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]])/(sum(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]]) or on ([[by]]) sum(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$scheduling_group\"}[1m])) by ([[by]]))",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "This is a ballpark estimation of the read-messages size (like select).\n\nIt is based on the assumption that read-messages are responsible for most outbound traffic.",
+ "title": "Estimated read message size by [[by]]"
+ }
+ ]
+ },
+ {
+ "class": "user_panels_collapse"
+ },
+ {
+ "class": "user_panel_row_header"
+ },
+ {
+ "class": "user_panels_row"
+ },
+ {
+ "class": "monitoring_version_row"
+ }
+ ],
+ "templating": {
+ "list": [
+ {
+ "class":"by_template_var"
+ },
+ {
+ "class": "template_variable_single",
+ "label": "cluster",
+ "name": "cluster",
+ "query": "label_values(scylla_reactor_utilization, cluster)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "dc",
+ "name": "dc",
+ "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "node",
+ "name": "node",
+ "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "shard",
+ "name": "shard",
+ "allValue":".+",
+ "query": "label_values(scylla_reactor_utilization,shard)",
+ "sort": 3
+ },
+ {
+ "class": "template_variable_all",
+ "label": "SG",
+ "dashversion":[">2021.1"],
+ "current": {
+ "selected": true,
+ "tags": [],
+ "text": [
+ "sl:default"
+ ],
+ "value": [
+ "sl:default"
+ ]
+ },
+ "name": "scheduling_group",
+ "query": "label_values(all_scheduling_group{cluster=~\"$cluster|$^\"}, scheduling_group_name)",
+ "sort": 3
+ },
+ {
+ "class": "template_variable_all",
+ "label": "SG",
+ "dashversion":[">4.3"],
+ "current": {
+ "selected": true,
+ "tags": [],
+ "text": [
+ "statement"
+ ],
+ "value": [
+ "statement"
+ ]
+ },
+ "name": "scheduling_group",
+ "query": "label_values(all_scheduling_group{cluster=~\"$cluster|$^\"}, scheduling_group_name)",
+ "sort": 3
+ },
+ {
+ "class": "template_variable_all",
+ "label": "cql_kind",
+ "dashversion":[">5.3", ">2022.1"],
+ "name": "kind",
+ "query": "label_values(scylla_transport_cql_requests_count{cluster=~\"$cluster|$^\"}, kind)",
+ "sort": 3
+ },
+ {
+ "class": "aggregation_function"
+ },
+ {
+ "class": "template_variable_custom",
+ "current": {
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ },
+ "name": "scylla_version",
+ "options": [
+ {
+ "selected": true,
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ }
+ ],
+ "query": "__SCYLLA_VERSION_DOT__"
+ },
+ {
+ "class": "monitor_version_var"
+ }
+ ]
+ },
+ "tags": [
+ "__SCYLLA_VERSION_DOT__"
+ ],
+ "time": {
+ "from": "now-30m",
+ "to": "now"
+ },
+ "title": "Detailed",
+ "overwrite": true,
+ "version": 5
+ }
+}
diff --git a/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-ks.template.json b/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-ks.template.json
new file mode 100644
index 00000000000..95ae29299f6
--- /dev/null
+++ b/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-ks.template.json
@@ -0,0 +1,276 @@
+{
+ "dashboard": {
+ "class": "dashboard",
+ "uid": "ks-__SCYLLA_VERSION_DASHED__",
+ "rows": [
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class":"text_panel",
+ "options": {
+ "mode": "html",
+ "content": "
[[cluster]]Keyspace and table level metrics are not available. To enable, set enable_keyspace_column_family_metrics to true in scylla.yaml. Note this has a significant effect on the monitoring stack sizing.
"
+ },
+ "gridPos":{
+ "x":0,
+ "y":1,
+ "w":24,
+ "h":3
+ }
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "repeat":"table",
+ "collapsed": true,
+ "title": "Latencies - $ks:$table"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_column_family_write_latency_count{ks=\"$ks\", cf=\"$table\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Writes by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "wlatencyaks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|$^\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Average write latency by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "wlatencyp95ks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"} ",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "95th percentile write latency by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "wlatencyp99ks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"} ",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "99th percentile write latency by [[by]]"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_column_family_read_latency_count{ks=\"$ks\", cf=\"$table\", instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Reads by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "rlatencyaks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|$^\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Average read latency by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "rlatencyp95ks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"} ",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "95th percentile read latency by [[by]]"
+ },
+ {
+ "class": "us_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "rlatencyp99ks{ks=\"$ks\", cf=\"$table\", by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"} ",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "99th percentile read latency by [[by]]"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": ""
+ }
+ ]
+ },
+ {
+ "class": "user_panel_row_header"
+ },
+ {
+ "class": "user_panels_row"
+ },
+ {
+ "class": "monitoring_version_row"
+ }
+ ],
+ "templating": {
+ "list": [
+ {
+ "class":"by_template_var"
+ },
+ {
+ "class": "template_variable_single",
+ "label": "cluster",
+ "name": "cluster",
+ "query": "label_values(scylla_reactor_utilization, cluster)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "dc",
+ "name": "dc",
+ "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "node",
+ "name": "node",
+ "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "shard",
+ "name": "shard",
+ "query": "label_values(scylla_reactor_utilization,shard)",
+ "sort": 3
+ },
+ {
+ "class": "template_variable_single",
+ "label": "ks",
+ "name": "ks",
+ "query": "label_values(scylla_column_family_write_latency_count{cluster=~\"$cluster|$^\"},ks)",
+ "sort": 3
+ },
+ {
+ "class": "template_variable_all",
+ "label": "table",
+ "name": "table",
+ "query": "label_values(scylla_column_family_write_latency_count{cluster=~\"$cluster|$^\", ks=\"$ks\"},cf)",
+ "sort": 3
+ },
+ {
+ "class": "aggregation_function"
+ },
+ {
+ "class": "template_variable_custom",
+ "current": {
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ },
+ "name": "scylla_version",
+ "options": [
+ {
+ "selected": true,
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ }
+ ],
+ "query": "__SCYLLA_VERSION_DOT__"
+ },
+ {
+ "class": "monitor_version_var"
+ },
+ {
+ "allValue": null,
+ "datasource": "prometheus",
+ "definition": "scylla_column_family_write_latency_count{cluster=~\"$cluster|$^\"}",
+ "description": null,
+ "error": null,
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "no_ks",
+ "options": [],
+ "query": {
+ "query": "scylla_column_family_write_latency_count{cluster=~\"$cluster|$^\"}",
+ "refId": "StandardVariableQuery"
+ },
+ "refresh": 1,
+ "regex": "/^(scylla_column_family_write_latency_count)/",
+ "skipUrlSync": false,
+ "sort": 0,
+ "type": "query"
+ }
+ ]
+ },
+ "tags": [
+ "__SCYLLA_VERSION_DOT__"
+ ],
+ "time": {
+ "from": "now-30m",
+ "to": "now"
+ },
+ "title": "Keyspace",
+ "overwrite": true,
+ "version": 5
+ }
+}
diff --git a/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-manager.template.json b/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-manager.template.json
new file mode 100644
index 00000000000..f02573a4527
--- /dev/null
+++ b/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-manager.template.json
@@ -0,0 +1,710 @@
+{
+ "dashboard": {
+ "class": "dashboard",
+ "uid": "manager-__SCYLLA_VERSION_DASHED__",
+ "originalTitle": "Scylla Manager Metrics",
+ "rows": [
+ {
+ "class": "logo_row",
+ "panels":[
+ {
+ "class":"text_panel",
+ "options":{
+ "content":"\n

\n
[[cluster_name]]
",
+ "mode":"html"
+ },
+ "gridPos":{
+ "x":0,
+ "y":1,
+ "w":24,
+ "h":3
+ }
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "150px",
+ "panels": [
+ {
+ "class": "small_stat",
+ "span": 3,
+ "title":"Manager",
+ "targets": [
+ {
+ "expr": "count(scrape_samples_scraped{job=\"scylla_manager\"}==0) OR vector(0)",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "fieldConfig":{
+ "defaults":{
+ "noValue":" Offline",
+ "thresholds":{
+ "mode":"absolute",
+ "steps":[
+ {
+ "color":"red",
+ "value":null
+ },
+ {
+ "color":"green",
+ "value":0
+ }
+ ]
+ },
+ "mappings":[
+ {
+ "from":"",
+ "id":0,
+ "text":"Online",
+ "to":"",
+ "type":1,
+ "value":"0"
+ },
+ {
+ "from":"",
+ "id":1,
+ "text":"Offline",
+ "to":"",
+ "type":1,
+ "value":"1"
+ }
+ ]
+ }
+ }
+ },
+ {
+ "class": "small_stat",
+ "targets": [
+ {
+ "expr": "count(scylla_manager_healthcheck_cql_rtt_ms{cluster=~\"$cluster|$^\"})",
+ "intervalFactor": 1,
+ "legendFormat": "Total Nodes",
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "title": "Total Nodes"
+ },
+ {
+ "class": "small_stat",
+ "targets": [
+ {
+ "expr": "count(scylla_manager_healthcheck_cql_status{cluster=~\"$cluster|$^\"}==-1) OR vector(0)",
+ "intervalFactor": 1,
+ "legendFormat": "Nodes without CQL connection",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": "1,2",
+ "description": "The number of nodes that Scylla manager CQL probe failed connecting to. It could indicate a network or a node problem",
+ "fieldConfig":{
+ "defaults":{
+ "custom":{
+ },
+ "thresholds":{
+ "mode":"absolute",
+ "steps":[
+ {
+ "color":"green",
+ "value":null
+ },
+ {
+ "value":0,
+ "color":"green"
+ },
+ {
+ "value":1,
+ "color":"red"
+ }
+ ]
+ }
+ }
+ },
+ "title": "CQL Fail"
+ },
+ {
+ "class": "vertical_lcd",
+ "span": 2,
+ "targets": [
+ {
+ "expr": "manager:repair_progress{cluster=~\"[[cluster]]\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "fieldConfig":{
+ "defaults":{
+ "custom":{
+ },
+ "unit":"percentunit",
+ "decimals":0,
+ "thresholds":{
+ "mode":"absolute",
+ "steps":[
+ {
+ "color":"green",
+ "value":null
+ }
+ ]
+ },
+ "mappings":[]
+ },
+ "overrides":[]
+ },
+ "gridPos":{
+ "w":2,
+ "h":4
+ },
+ "title": "Repair"
+ },
+ {
+ "class": "small_stat",
+ "description": "The time of the last successful repair",
+ "targets": [
+ {
+ "expr": "manager:repair_done_ts{cluster=\"$cluster\"}*1000",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "thresholds": {
+ "mode": "absolute",
+ "steps": []
+ },
+ "mappings": [],
+ "color": {
+ "mode": "fixed",
+ "fixedColor": "green"
+ },
+ "unit": "dateTimeAsIsoNoDateIfToday",
+ "noValue": "Never"
+ },
+ "overrides": []
+ },
+ "gridPos":{
+ "w":2,
+ "h":4
+ },
+ "title": "Last repair"
+ },
+ {
+ "class": "small_stat",
+ "description": "The time of the last failed repair",
+ "targets": [
+ {
+ "expr": "manager:repair_fail_ts{cluster=\"$cluster\"}*1000",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "thresholds": {
+ "mode": "absolute",
+ "steps": []
+ },
+ "mappings": [],
+ "color": {
+ "mode": "fixed",
+ "fixedColor": "green"
+ },
+ "unit": "dateTimeAsIsoNoDateIfToday",
+ "noValue": "Never"
+ },
+ "overrides": []
+ },
+ "gridPos":{
+ "w":2,
+ "h":4
+ },
+ "title": "Last Failure"
+ },
+ {
+ "class": "vertical_lcd",
+ "span": 2,
+ "targets": [
+ {
+ "expr": "manager:backup_progress{cluster=~\"[[cluster]]\"}*100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "fieldConfig":{
+ "defaults":{
+ "custom":{
+ },
+ "unit":"percent",
+ "decimals":0,
+ "thresholds":{
+ "mode":"absolute",
+ "steps":[
+ {
+ "color":"green",
+ "value":null
+ }
+ ]
+ },
+ "mappings":[]
+ },
+ "overrides":[]
+ },
+ "gridPos":{
+ "w":2,
+ "h":4
+ },
+ "title": "Backup"
+ },
+ {
+ "class": "small_stat",
+ "description": "The time of the last successful backup",
+ "targets": [
+ {
+ "expr": "manager:backup_done_ts{cluster=\"$cluster\"}*1000",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "thresholds": {
+ "mode": "absolute",
+ "steps": []
+ },
+ "mappings": [],
+ "color": {
+ "mode": "fixed",
+ "fixedColor": "green"
+ },
+ "unit": "dateTimeAsIsoNoDateIfToday",
+ "noValue": "Never"
+ },
+ "overrides": []
+ },
+ "gridPos":{
+ "w":2,
+ "h":4
+ },
+ "title": "Last Backup"
+ },
+ {
+ "class": "small_stat",
+ "description": "The time of the last failed backup",
+ "targets": [
+ {
+ "expr": "manager:backup_fail_ts{cluster=\"$cluster\"}*1000",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "thresholds": {
+ "mode": "absolute",
+ "steps": []
+ },
+ "mappings": [],
+ "color": {
+ "mode": "fixed",
+ "fixedColor": "green"
+ },
+ "unit": "dateTimeAsIsoNoDateIfToday",
+ "noValue": "Never"
+ },
+ "overrides": []
+ },
+ "gridPos":{
+ "w":2,
+ "h":4
+ },
+ "title": "Last Failure"
+ }
+ ],
+ "title": "Summary row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "percentunit_panel",
+ "targets": [
+ {
+ "expr": "manager:repair_progress{cluster=~\"[[cluster]]\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "span":2,
+ "title": "Repair Progress"
+ },
+ {
+ "class": "ops_panel",
+ "fieldConfig": {
+ "defaults": {
+ "links": [],
+ "unit": "si:tr/s"
+ },
+ "overrides": []
+ },
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_manager_repair_token_ranges_success{cluster=~\"[[cluster]]\", instance=~\"$instance\", shard=~\"$shard\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "title": "Repair Token-Range Rate"
+ },
+ {
+ "class": "ops_panel",
+ "fieldConfig": {
+ "defaults": {
+ "links": [],
+ "unit": "si:tr/s"
+ },
+ "overrides": []
+ },
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_manager_repair_token_ranges_error{cluster=~\"[[cluster]]\", instance=~\"$instance\", shard=~\"$shard\"}[60s])) by ([[by]])",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "title": "Repair Token-Range Error Rate"
+ }
+ ],
+ "title": "Repair rate Row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "percentunit_panel",
+ "targets": [
+ {
+ "expr": "manager:backup_progress{cluster=~\"[[cluster]]\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "span":2,
+ "title": "Backup Progress"
+ },
+ {
+ "class": "ops_panel",
+ "fieldConfig": {
+ "defaults": {
+ "links": [],
+ "unit": "decbytes"
+ },
+ "overrides": []
+ },
+ "targets": [
+ {
+ "expr": "sum(scylla_manager_backup_files_uploaded_bytes{cluster=~\"[[cluster]]\", instance=~\"$instance\", shard=~\"$shard\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "span":3,
+ "title": "Uploaded bytes"
+ },
+ {
+ "class": "ops_panel",
+ "fieldConfig": {
+ "defaults": {
+ "links": [],
+ "unit": "decbytes"
+ },
+ "overrides": []
+ },
+ "targets": [
+ {
+ "expr": "sum(scylla_manager_backup_files_skipped_bytes{cluster=~\"[[cluster]]\", instance=~\"$instance\", shard=~\"$shard\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "span":3,
+ "title": "Uploaded skipped"
+ },
+ {
+ "class": "ops_panel",
+ "fieldConfig": {
+ "defaults": {
+ "links": [],
+ "unit": "decbytes"
+ },
+ "overrides": []
+ },
+ "targets": [
+ {
+ "expr": "sum(scylla_manager_backup_files_failed_bytes{cluster=~\"[[cluster]]\", instance=~\"$instance\", shard=~\"$shard\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "span":3,
+ "title": "Uploaded Failed"
+ }
+ ],
+ "title": "Repair rate Row"
+ },
+ {
+ "class": "row",
+ "dashversion":">3.1",
+ "panels": [
+ {
+ "class": "percent_panel",
+ "dashversion":">3.2",
+ "targets": [
+ {
+ "expr": "scylla_manager_restore_progress{cluster=~\"[[cluster]]\"} < 100",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "span":4,
+ "description": "Shows current restore progress",
+ "title": "Restore Progress"
+ },
+ {
+ "class": "ops_panel",
+ "fieldConfig": {
+ "defaults": {
+ "links": [],
+ "unit": "decbytes"
+ },
+ "overrides": []
+ },
+ "targets": [
+ {
+ "expr": "sum(scylla_manager_restore_remaining_bytes{cluster=~\"[[cluster]]\", instance=~\"$instance\", shard=~\"$shard\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "span":4,
+ "description": "Shows restore progress, the remaining bytes to complete",
+ "title": "Restore Remaining Bytes"
+ }
+ ],
+ "title": "Backup Restore Bytes"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "ms_panel",
+ "targets": [
+ {
+ "expr": "avg(scylla_manager_healthcheck_cql_rtt_ms{instance=~\"$instance\", cluster=~\"[[cluster]]\"}) by ($by)",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "description": "Average duration time for a CQL ping operation",
+ "title": "CQL probe duration by $by"
+ }
+ ],
+ "title": "SSH connection Row"
+ },
+ {
+ "class": "row",
+ "height": "25px",
+ "gridPos": {"h": 2},
+ "panels": [
+ {
+ "class": "text_panel",
+ "content": "Manager Agent
",
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "bytes_panel",
+ "targets": [
+ {
+ "expr": "sum(go_memstats_heap_inuse_bytes{job=~\"manager_agent.?\", instance=~\"$instance\"}) by ($by)",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "description": "An increase in memory can indicate an issue with the Manager agent",
+ "title": "Agent Memory Heap usage"
+ },
+ {
+ "class": "graph_panel",
+ "targets": [
+ {
+ "expr": "avg(go_threads{job=~\"manager_agent.?\", instance=~\"$instance\"}) by ($by)",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "description": "An increase in the number of threads can indicate an issue with the Manager agent",
+ "title": "Agent Threads"
+ }
+ ],
+ "title": "SSH connection Row"
+ },
+ {
+ "class": "monitoring_version_row"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ },
+ {
+ "class" : "annotation_restart"
+ },
+ {
+ "class" : "annotation_manager_task"
+ },
+ {
+ "class" : "annotation_manager_task_failed"
+ }
+ ]
+ },
+ "templating": {
+ "list": [
+ {
+ "allValue": null,
+ "current": {
+ "tags": [],
+ "text": "Cluster",
+ "value": "cluster"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "by",
+ "multi": false,
+ "name": "by",
+ "options": [
+ {
+ "selected": true,
+ "text": "Cluster",
+ "value": "cluster"
+ },
+ {
+ "selected": false,
+ "text": "Instance",
+ "value": "instance"
+ },
+ {
+ "selected": false,
+ "text": "Keyspace",
+ "value": "keyspace"
+ },
+ {
+ "selected": false,
+ "text": "Shard",
+ "value": "shard"
+ }
+ ],
+ "query": "Instance,Shard,Keyspace,Cluster",
+ "type": "custom"
+ },
+ {
+ "class": "template_variable_single",
+ "label": "cluster",
+ "name": "cluster_name",
+ "query": "label_values(scylla_manager_cluster_name,name)"
+ },
+ {
+ "class": "template_variable_single",
+ "hide": 2,
+ "label": "cluster_id",
+ "name": "cluster",
+ "query": "label_values(scylla_manager_cluster_name{name=\"$cluster_name\"}, cluster)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "instance",
+ "name": "instance",
+ "query": "label_values(scylla_manager_healthcheck_cql_rtt_ms{cluster=~\"$cluster|$^\"}, instance)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "shard",
+ "name": "shard",
+ "query": "label_values(scylla_manager_repair_segments_total, shard)",
+ "sort": 3
+ },
+ {
+ "class": "template_variable_custom",
+ "current": {
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ },
+ "name": "scylla_version",
+ "options": [
+ {
+ "selected": true,
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ }
+ ],
+ "query": "__SCYLLA_VERSION_DOT__"
+ },
+ {
+ "class": "monitor_version_var"
+ }
+ ]
+ },
+ "tags": [
+ "__SCYLLA_VERSION_DOT__"
+ ],
+ "time": {
+ "from": "now-30m",
+ "to": "now"
+ },
+ "title": "Scylla Manager Metrics",
+ "overwrite": true,
+ "version": 3
+ }
+}
diff --git a/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-os.template.json b/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-os.template.json
new file mode 100644
index 00000000000..4002194d4bf
--- /dev/null
+++ b/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-os.template.json
@@ -0,0 +1,741 @@
+{
+ "dashboard": {
+ "class": "dashboard",
+ "overwrite": true,
+ "rows": [
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": ""
+ }
+ ]
+ },
+ {
+ "class": "logo_row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "collapsible_row_panel",
+ "title": ""
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "height": "200px",
+ "panels": [
+ {
+ "class": "piechart_panel_percent",
+ "height": "250px",
+ "repeat": "node",
+ "targets": [
+ {
+ "expr": "sum(node_filesystem_avail_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"})",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Free",
+ "metric": "",
+ "refId": "A",
+ "instant": true,
+ "step": 7200
+ },
+ {
+ "expr": "(sum(node_filesystem_size_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"})-sum(node_filesystem_avail_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}))",
+ "intervalFactor": 1,
+ "legendFormat": "Used",
+ "refId": "B",
+ "instant": true,
+ "step": 7200
+ }
+ ],
+ "title": "Total Storage $node"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "id": "auto",
+ "gridPos": {
+ "h": 1,
+ "w": 24
+ },
+ "panels": [],
+ "repeat": "mount_point",
+ "title": "Partition $mount_point",
+ "type": "row"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "percentunit_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "1-sum(node_filesystem_avail_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])/sum(node_filesystem_size_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Used disk by $by"
+ },
+ {
+ "class": "bytes_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(node_filesystem_size_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])-sum(node_filesystem_avail_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "sum(node_filesystem_size{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])-sum(node_filesystem_avail{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "title": "Used Bytes by $by"
+ },
+ {
+ "class": "bytes_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(node_filesystem_free_bytes{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Free Bytes by $by"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(node_filesystem_files{mountpoint=\"$mount_point\", instance=~\"$node\"}- node_filesystem_files_free{mountpoint=\"$mount_point\", instance=~\"$node\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Number of files by $by"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "id": "auto",
+ "gridPos": {
+ "h": 1,
+ "w": 24
+ },
+ "panels": [],
+ "repeat": "monitor_disk",
+ "title": "Disk $monitor_disk",
+ "type": "row"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "gridPos": {
+ "h": 2
+ },
+ "height": "25px",
+ "panels": [
+ {
+ "class": "text_panel",
+ "content": "Disk $monitor_disk
",
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "wps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(node_disk_writes_completed_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "sum(rate(node_disk_writes_completed{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "title": "Disk Writes per $by"
+ },
+ {
+ "class": "rps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(node_disk_reads_completed_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "sum(rate(node_disk_reads_completed{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "title": "Disk Reads per $by"
+ },
+ {
+ "class": "bps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(node_disk_written_bytes_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "sum(rate(node_disk_bytes_written{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "title": "Disk Writes Bps per $by"
+ },
+ {
+ "class": "bps_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(node_disk_read_bytes_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "sum(rate(node_disk_bytes_read{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "title": "Disk Read Bps per $by"
+ },
+ {
+ "class": "seconds_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "rate(node_disk_read_time_seconds_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])/rate(node_disk_reads_completed_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "The average read time from disk",
+ "title": "Read AWait per instance"
+ },
+ {
+ "class": "seconds_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "rate(node_disk_write_time_seconds_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])/rate(node_disk_writes_completed_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_disk\"}[4m])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "The average write time to disk",
+ "title": "Write AWait per instance"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "id": "auto",
+ "gridPos": {
+ "h": 1,
+ "w": 24
+ },
+ "panels": [],
+ "repeat": "monitor_network_interface",
+ "title": "Network Interface $monitor_network_interface",
+ "type": "row"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "gridPos": {
+ "h": 2
+ },
+ "height": "25px",
+ "panels": [
+ {
+ "class": "text_panel",
+ "content": "Network $monitor_network_interface
",
+ "style": {}
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "pps_panel",
+ "span": 6,
+ "targets": [
+ {
+ "expr": "sum(rate(node_network_receive_packets_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "sum(rate(node_network_receive_packets{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "title": "Interface Rx Packets"
+ },
+ {
+ "class": "pps_panel",
+ "span": 6,
+ "targets": [
+ {
+ "expr": "sum(rate(node_network_transmit_packets_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "sum(rate(node_network_transmit_packets{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "title": "Interface Tx Packets"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "bps_panel",
+ "span": 6,
+ "targets": [
+ {
+ "expr": "sum(rate(node_network_receive_bytes_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "sum(rate(node_network_receive_bytes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "title": "Interface Rx Bps"
+ },
+ {
+ "class": "bps_panel",
+ "span": 6,
+ "targets": [
+ {
+ "expr": "sum(rate(node_network_transmit_bytes_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "sum(rate(node_network_transmit_bytes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", device=\"$monitor_network_interface\"}[4m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "title": "Interface Tx Bps"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "id": "auto",
+ "gridPos": {
+ "h": 1,
+ "w": 24
+ },
+ "panels": [],
+ "title": "CPU and Memory",
+ "type": "row"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "bps_panel",
+ "span": 3,
+ "description": "The available memory, note that in a production environment we expect this to be low, Scylla would use most of the available memory when possible",
+ "targets": [
+ {
+ "expr": "sum(node_memory_MemAvailable_bytes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Available memory"
+ },
+ {
+ "class": "percentunit_panel",
+ "span": 3,
+ "description": "Percent of available memory, note that in a production environment we expect this to be low, Scylla would use most of the available memory when possible",
+ "targets": [
+ {
+ "expr": "sum(node_memory_MemAvailable_bytes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by ([[by]])/sum(node_memory_MemTotal_bytes{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Available memory"
+ },
+ {
+ "class": "percentunit_panel",
+ "span": 3,
+ "description": "Percent of CPU used, note that in production Scylla would try to use most of the CPU and this is not a problem",
+ "targets": [
+ {
+ "expr": "1-sum(rate(node_cpu_seconds_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", mode=\"idle\"}[3m])) by ([[by]])/count(node_cpu_seconds_total{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", mode=\"idle\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "CPU used"
+ },
+ {
+ "class": "graph_panel",
+ "span": 3,
+ "description": "CPU frequency should be set for performance.\n\n The current frequency should match the max frequency. If that is not the case, check your host configuration.",
+ "targets": [
+ {
+ "expr": "max(node_cpu_scaling_frequency_max_hertz{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\"}) or on() max(node_cpu_frequency_max_hertz{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\"})",
+ "intervalFactor": 1,
+ "legendFormat": "Max",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "min(node_cpu_scaling_frequency_hertz{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by ([[by]]) or on() min(node_cpu_frequency_hertz{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:211",
+ "alias": "Max",
+ "color": "#F2495C"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "links": [],
+ "unit": "hertz"
+ },
+ "overrides": []
+ },
+ "title": "CPU Frequency"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "monitoring_version_row"
+ }
+ ],
+ "tags": [
+ "__SCYLLA_VERSION_DOT__"
+ ],
+ "templating": {
+ "list": [
+ {
+ "class": "template_variable_single",
+ "label": "cluster",
+ "name": "cluster",
+ "query": "label_values(node_filesystem_avail_bytes, cluster)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "dc",
+ "name": "dc",
+ "query": "label_values(node_filesystem_avail_bytes{cluster=~\"$cluster\"}, dc)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "node",
+ "name": "node",
+ "query": "label_values(node_filesystem_avail_bytes{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "tags": [],
+ "text": "Instance",
+ "value": "instance"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "by",
+ "multi": false,
+ "name": "by",
+ "options": [
+ {
+ "selected": false,
+ "text": "Cluster",
+ "value": "cluster"
+ },
+ {
+ "selected": false,
+ "text": "DC",
+ "value": "dc"
+ },
+ {
+ "selected": true,
+ "text": "Instance",
+ "value": "instance"
+ },
+ {
+ "selected": true,
+ "text": "Shard",
+ "value": "instance,cpu"
+ }
+ ],
+ "query": "Cluster,DC,Instance",
+ "type": "custom"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "isNone": true,
+ "text": "None",
+ "value": ""
+ },
+ "datasource": "prometheus",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": true,
+ "name": "monitor_disk",
+ "options": [],
+ "query": "node_disk_read_bytes_total",
+ "refresh": 2,
+ "regex": "/.*device=\"([^\\\"]*)\".*/",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "isNone": true,
+ "text": "None",
+ "value": ""
+ },
+ "datasource": "prometheus",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": true,
+ "name": "monitor_network_interface",
+ "options": [],
+ "query": "node_network_receive_packets_total",
+ "refresh": 2,
+ "regex": "/.*device=\"([^\\\"]*)\".*/",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "/var/lib/scylla",
+ "value": "/var/lib/scylla"
+ },
+ "datasource": "prometheus",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Mount path",
+ "multi": true,
+ "name": "mount_point",
+ "options": [],
+ "query": "node_filesystem_avail_bytes",
+ "refresh": 2,
+ "regex": "/mountpoint=\"([^\"]*)\".*/",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "class": "template_variable_custom",
+ "current": {
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ },
+ "name": "scylla_version",
+ "options": [
+ {
+ "selected": true,
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ }
+ ],
+ "query": "__SCYLLA_VERSION_DOT__"
+ },
+ {
+ "class": "monitor_version_var"
+ }
+ ]
+ },
+ "annotations" :{
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ },
+ {
+ "class" : "annotation_restart"
+ },
+ {
+ "class" : "annotation_stall"
+ },
+ {
+ "class" : "annotation_schema_changed"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-30m",
+ "to": "now"
+ },
+ "title": "OS Metrics",
+ "uid": "OS-__SCYLLA_VERSION_DASHED__",
+ "version": 5
+ }
+}
\ No newline at end of file
diff --git a/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-overview.template.json b/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-overview.template.json
new file mode 100644
index 00000000000..a5c4abf948c
--- /dev/null
+++ b/assets/monitoring/grafana/v1alpha1/dashboards/platform/scylla-overview.template.json
@@ -0,0 +1,713 @@
+{
+ "dashboard": {
+ "class": "dashboard",
+ "originalTitle": "Scylla Cluster Metrics",
+ "overwrite": true,
+ "rows": [
+ {
+ "class": "row",
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "id": "auto",
+ "gridPos": {
+ "h": 1,
+ "w": 24
+ },
+ "panels": [],
+ "title": "Cluster overview $cluster",
+ "type": "row"
+ }
+ ]
+ },
+ {
+ "class" : "small_stat_rows"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "alert_table",
+ "span": 4,
+ "title": "Active Alerts"
+ },
+ {
+ "class": "ops_panel",
+ "span": 2,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Write attempts - include all writes that reached the coordinator node, even if they will eventually fail",
+ "title": "Writes"
+ },
+ {
+ "class": "us_panel",
+ "span": 2,
+ "targets": [
+ {
+ "expr": "wlatencyp95{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name=~\"$sg\"}>0",
+ "intervalFactor": 1,
+ "legendFormat": "{{scheduling_group_name}} 95%",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "wlatencyp99{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name=~\"$sg\"}>0",
+ "intervalFactor": 1,
+ "legendFormat": "{{scheduling_group_name}} 99%",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "legend": {
+ "class": "show_legend"
+ },
+ "title": "Write Latencies"
+ },
+ {
+ "class": "ops_panel",
+ "span": 2,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))",
+ "intervalFactor": 1,
+ "legendFormat": "Reads",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "Read attempts - include all reads that reached the coordinator node, even if they will eventually fail",
+ "title": "Reads"
+ },
+ {
+ "class": "us_panel",
+ "span": 2,
+ "targets": [
+ {
+ "expr": "rlatencyp95{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name=~\"$sg\"}>0",
+ "intervalFactor": 1,
+ "legendFormat": "{{scheduling_group_name}} 95%",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "rlatencyp99{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name=~\"$sg\"}>0",
+ "intervalFactor": 1,
+ "legendFormat": "{{scheduling_group_name}} 99%",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "legend": {
+ "class": "show_legend"
+ },
+ "title": "Read Latencies"
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "id": "auto",
+ "gridPos": {
+ "h": 1,
+ "w": 24
+ },
+ "panels": [],
+ "title": "",
+ "repeat": "dc",
+ "type": "row"
+ }
+ ]
+ },
+ {
+ "class": "header_row",
+ "panels": [
+ {
+ "class": "plain_text",
+ "content": "Information for $dc
"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class": "vertical_lcd",
+ "targets": [
+ {
+ "expr": "avg(scylla_reactor_utilization{instance=~\"[[node]]\",cluster=~\"$cluster\", dc=~\"$dc\", shard=~\"[[shard]]\"} )",
+ "interval": "",
+ "legendFormat": "",
+ "instant": true,
+ "refId": "A"
+ }
+ ],
+ "title": "Load"
+ },
+ {
+ "class": "bytes_panel",
+ "gridPos": {
+ "w": 3
+ },
+ "targets": [
+ {
+ "expr": "Avg(node_filesystem_size_bytes{mountpoint=\"$mount_point\", dc=~\"$dc\", instance=~\"$node\"}) by ([[by]])-avg(node_filesystem_avail_bytes{mountpoint=\"$mount_point\", dc=~\"$dc\", instance=~\"$node\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Avg Usage {{[[by]]}}",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "avg(node_filesystem_size_bytes{mountpoint=\"$mount_point\", dc=~\"$dc\", instance=~\"$node\"}) by ([[by]])",
+ "legendFormat": "Size {{[[by]]}}",
+ "interval": "",
+ "refId": "B"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "unit": "bytes"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byFrameRefID",
+ "options": "B"
+ },
+ "properties": [
+ {
+ "id": "custom.lineStyle",
+ "value": {
+ "fill": "dash",
+ "dash": [
+ 10,
+ 10
+ ]
+ }
+ },
+ {
+ "id": "custom.lineWidth",
+ "value": 2
+ }
+ ]
+ }
+ ]
+ },
+ "options": {
+ "class":"desc_tooltip_options"
+ },
+ "description": "The average Disk usage per [[by]].\n\n The dashed line represent the total size.",
+ "title": "Average Disk Usage"
+ },
+ {
+ "class": "graph_panel_int",
+ "span": 2,
+ "targets": [
+ {
+ "expr": "$func(scylla_compaction_manager_compactions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "description": "scylla_compaction_manager_compactions",
+ "title": "Running Compactions"
+ },
+ {
+ "class": "ops_panel",
+ "description": "The Hits and Misses",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_row_hits{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Hit {{[[by]]}}",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "$func(rate(scylla_cache_row_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Misses {{[[by]]}}",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "legend": {
+ "class": "show_legend"
+ },
+ "title": "Cache Hits/Misses"
+ },
+ {
+ "class":"small_nodes_table",
+ "gridPos": {
+ "h": 17,
+ "w": 10
+ }
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or on([[by]]) $func(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Writes",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m] offset 1d)) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m] offset 1d))",
+ "legendFormat": "1 Day Ago",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "B",
+ "step": 1
+ },
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m] offset 1w)) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m] offset 1w))",
+ "legendFormat": "1 Week Ago",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "C",
+ "step": 1
+ }
+ ],
+ "legend": {
+ "class": "show_legend"
+ },
+ "seriesOverrides": [
+ {
+ "alias": "1 Day Ago",
+ "dashes": true,
+ "dashLength": 4
+ },
+ {
+ "alias": "1 Week Ago",
+ "dashes": true,
+ "dashLength": 2
+ }
+ ],
+ "description": "Write attempts - include all writes that reached the coordinator node, even if they will eventually fail",
+ "title": "Writes"
+ },
+ {
+ "class": "us_panel",
+ "span": 2,
+ "targets": [
+ {
+ "expr": "avg(wlatencyp95{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$sg\"}>0) by (scheduling_group_name, [[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "{{scheduling_group_name}} 95% {{[[by]]}}",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "avg(wlatencyp99{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$sg\"}>0) by (scheduling_group_name, [[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "{{scheduling_group_name}} 99% {{[[by]]}}",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "legend": {
+ "class": "show_legend"
+ },
+ "title": "Write Latencies"
+ },
+ {
+ "class": "ops_panel",
+ "description": "Requests that Scylla tried to write but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
+ "span": 2,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_write_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Writes {{[[by]]}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "legend": {
+ "class": "show_legend"
+ },
+ "title": "Write Timeouts by [[by]]"
+ },
+ {
+ "class": "ops_panel",
+ "span": 3,
+ "gridPos": {
+ "x": 0
+ },
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m])) by ([[by]]) or on([[by]]) $func(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Reads",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m] offset 1d)) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m] offset 1d))",
+ "intervalFactor": 1,
+ "legendFormat": "1 Day Ago",
+ "refId": "B",
+ "step": 1
+ },
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m] offset 1w)) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m] offset 1w))",
+ "intervalFactor": 1,
+ "legendFormat": "1 Week Ago",
+ "refId": "C",
+ "step": 1
+ }
+ ],
+ "legend": {
+ "class": "show_legend"
+ },
+ "seriesOverrides": [
+ {
+ "alias": "1 Day Ago",
+ "dashes": true,
+ "dashLength": 4
+ },
+ {
+ "alias": "1 Week Ago",
+ "dashes": true,
+ "dashLength": 2
+ }
+ ],
+ "description": "Read attempts - include all reads that reached the coordinator node, even if they will eventually fail",
+ "title": "Reads"
+ },
+ {
+ "class": "us_panel",
+ "span": 2,
+ "gridPos": {
+ "x": 6
+ },
+ "targets": [
+ {
+ "expr": "avg(rlatencyp95{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$sg\"}>0) by(scheduling_group_name, [[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "{{scheduling_group_name}} 95% {{[[by]]}}",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "avg(rlatencyp99{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$sg\"}>0) by(scheduling_group_name, [[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "{{scheduling_group_name}} 99% {{[[by]]}}",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "legend": {
+ "class": "show_legend"
+ },
+ "title": "Read Latencies"
+ },
+ {
+ "class": "ops_panel",
+ "description": "Requests that Scylla tried to read but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
+ "span": 2,
+ "gridPos": {
+ "x": 10
+ },
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_read_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])+rate(scylla_storage_proxy_coordinator_cas_read_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])+rate(scylla_storage_proxy_coordinator_range_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Read {{[[by]]}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "legend": {
+ "class": "show_legend"
+ },
+ "title": "Read Timeouts by [[by]]"
+ },
+ {
+ "class": "plain_text",
+ "dashproductreject": "no-version-check",
+ "gridPos": {
+ "w": 10,
+ "x": 14,
+ "h": 1
+ },
+ "options": {
+ "mode": "html",
+ "content": "
"
+ }
+ },
+ {
+ "class": "plain_text",
+ "dashproduc": "no-version-check",
+ "gridPos": {
+ "w": 10,
+ "x": 14,
+ "h": 1
+ },
+ "options": {
+ "mode": "html",
+ "content": ""
+ }
+ }
+ ]
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "id": "auto",
+ "gridPos": {
+ "h": 1,
+ "w": 24
+ },
+ "panels": [],
+ "title": "",
+ "type": "row"
+ }
+ ]
+ },
+ {
+ "class": "header_row",
+ "panels": [
+ {
+ "class": "plain_text",
+ "content": "Advisor
"
+ }
+ ],
+ "title": "New row"
+ },
+ {
+ "class": "row",
+ "panels": [
+ {
+ "class":"advisor_table",
+ "dashversion":">4.1"
+ },
+ {
+ "class":"enterprise_advisor_table",
+ "dashversion":">2019.1"
+ }
+ ]
+ },
+ {
+ "class": "user_panels_collapse"
+ },
+ {
+ "class": "user_panel_row_header"
+ },
+ {
+ "class": "user_panels_row"
+ },
+ {
+ "class": "monitoring_version_row"
+ }
+ ],
+ "tags": [
+ "__SCYLLA_VERSION_DOT__"
+ ],
+ "templating": {
+ "list": [
+ {
+ "class": "by_template_var",
+ "current": {
+ "tags": [],
+ "text": "DC",
+ "value": "dc"
+ },
+ "options": [
+ {
+ "selected": false,
+ "text": "Cluster",
+ "value": "cluster"
+ },
+ {
+ "selected": true,
+ "text": "DC",
+ "value": "dc"
+ },
+ {
+ "selected": false,
+ "text": "Instance",
+ "value": "instance"
+ },
+ {
+ "selected": false,
+ "text": "instance,shard",
+ "value": "instance,shard"
+ }
+ ]
+ },
+ {
+ "class": "template_variable_single",
+ "label": "cluster",
+ "name": "cluster",
+ "query": "label_values(scylla_reactor_utilization, cluster)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "dc",
+ "name": "dc",
+ "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "node",
+ "name": "node",
+ "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
+ },
+ {
+ "class": "template_variable_all",
+ "label": "shard",
+ "name": "shard",
+ "allValue":".+",
+ "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\"},shard)",
+ "sort": 3
+ },
+ {
+ "class": "template_variable_single",
+ "current": {
+ "text": "/var/lib/scylla",
+ "value": "/var/lib/scylla"
+ },
+ "label": "Mount path",
+ "name": "mount_point",
+ "query": "node_filesystem_avail_bytes",
+ "regex": "/mountpoint=\"([^\"]*)\".*/",
+ "sort": 0
+ },
+ {
+ "class": "template_variable_single",
+ "current": {
+ "selected": true,
+ "text": [
+ "statement"
+ ],
+ "value": [
+ "statement"
+ ]
+ },
+ "label": "SG",
+ "name": "sg",
+ "includeAll":true,
+ "multi":true,
+ "dashversion":[">4.3"],
+ "query": "label_values(rlatencyp99{cluster=~\"$cluster\", scheduling_group_name!~\"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache\"},scheduling_group_name)",
+ "sort": 3
+ },
+ {
+ "class": "template_variable_single",
+ "dashversion":[">2019.1"],
+ "current": {
+ "selected": true,
+ "text": [
+ "sl:default"
+ ],
+ "value": [
+ "sl:default"
+ ]
+ },
+ "label": "SG",
+ "name": "sg",
+ "includeAll":true,
+ "multi":true,
+ "query": "label_values(rlatencyp99{cluster=~\"$cluster\", scheduling_group_name!~\"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache\"},scheduling_group_name)",
+ "sort": 3
+ },
+ {
+ "class": "aggregation_function"
+ },
+ {
+ "class": "template_variable_custom",
+ "current": {
+ "text": "__SCYLLA_VERSION_DASHED__",
+ "value": "__SCYLLA_VERSION_DASHED__"
+ },
+ "name": "dash_version",
+ "options": [
+ {
+ "selected": true,
+ "text": "__SCYLLA_VERSION_DASHED__",
+ "value": "__SCYLLA_VERSION_DASHED__"
+ }
+ ],
+ "query": "__SCYLLA_VERSION_DASHED__"
+ },
+ {
+ "class": "template_variable_all",
+ "hide":2,
+ "name": "all_scyllas_versions",
+ "current":{
+ "selected":true,
+ "text":[
+ "All"
+ ],
+ "value":[
+ "$__all"
+ ]
+ },
+ "query": "label_values(scylla_scylladb_current_version{cluster=~\"$cluster|$^\"}, version)"
+ },
+ {
+ "class": "template_variable_all",
+ "hide":2,
+ "name": "count_dc",
+ "definition": "query_result(count(up{job=\"scylla\"}) by (dc))",
+ "query": {
+ "query": "query_result(count(up{job=\"scylla\"}) by (dc))",
+ "refId": "StandardVariableQuery"
+ },
+ "regex": "/(?\\{dc=\"[^\"]+\".* \\d+) .*/"
+ },
+ {
+ "class": "template_variable_custom",
+ "current": {
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ },
+ "name": "scylla_version",
+ "options": [
+ {
+ "selected": true,
+ "text": "__SCYLLA_VERSION_DOT__",
+ "value": "__SCYLLA_VERSION_DOT__"
+ }
+ ],
+ "query": "__SCYLLA_VERSION_DOT__"
+ },
+ {
+ "class": "monitor_version_var"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-30m",
+ "to": "now"
+ },
+ "title": "Overview",
+ "uid": "overview-__SCYLLA_VERSION_DASHED__",
+ "version": 1
+ }
+}
diff --git a/assets/monitoring/grafana/v1alpha1/dashboards/saas/scylla-saas.template.json b/assets/monitoring/grafana/v1alpha1/dashboards/saas/scylla-saas.template.json
new file mode 100644
index 00000000000..64366bde14f
--- /dev/null
+++ b/assets/monitoring/grafana/v1alpha1/dashboards/saas/scylla-saas.template.json
@@ -0,0 +1,4680 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ },
+ {
+ "class": "annotation_manager_task",
+ "datasource": "prometheus",
+ "enable": true,
+ "expr": "scylla_manager_task_active_count{type=~\"repair|backup\",cluster=~\"$cluster|$^\"}>0",
+ "hide": false,
+ "iconColor": "#73BF69",
+ "limit": 100,
+ "name": "Task",
+ "showIn": 0,
+ "tagKeys": "type",
+ "tags": [],
+ "titleFormat": "Running",
+ "type": "tags"
+ },
+ {
+ "class": "mv_building",
+ "datasource": "prometheus",
+ "enable": true,
+ "expr": "sum(scylla_view_builder_builds_in_progress)>0",
+ "hide": false,
+ "iconColor": "rgb(50, 176, 0, 128)",
+ "limit": 100,
+ "name": "MV",
+ "showIn": 0,
+ "tagKeys": "instance,dc,cluster",
+ "tags": [],
+ "titleFormat": "Materialized View built",
+ "type": "tags"
+ },
+ {
+ "class": "ops_annotation",
+ "datasource": "prometheus",
+ "enable": true,
+ "expr": "10*min(scylla_node_ops_finished_percentage) by (ops, dc,instance) < 10",
+ "hide": false,
+ "iconColor": "rgb(50, 176, 0, 128)",
+ "limit": 100,
+ "name": "ops",
+ "showIn": 0,
+ "tagKeys": "ops,dc,instance",
+ "tags": [],
+ "titleFormat": "Operation",
+ "type": "tags"
+ },
+ {
+ "class": "annotation_schema_changed",
+ "datasource": "prometheus",
+ "enable": false,
+ "expr": "changes(scylla_database_schema_changed[$__rate_interval])>0",
+ "hide": false,
+ "iconColor": "rgba(255, 96, 96, 1)",
+ "limit": 100,
+ "name": "Schema Changed",
+ "showIn": 0,
+ "tagKeys": "instance,dc,cluster",
+ "tags": [],
+ "titleFormat": "schema changed",
+ "type": "tags"
+ }
+ ]
+ },
+ "class": "dashboard",
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 1,
+ "hideControls": true,
+ "id": null,
+ "links": [
+ {
+ "asDropdown": true,
+ "icon": "external link",
+ "includeVars": true,
+ "keepTime": true,
+ "tags": [],
+ "type": "dashboards"
+ }
+ ],
+ "originalTitle": "Scylla Cluster Metrics",
+ "overwrite": true,
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 1,
+ "panels": [],
+ "title": "Cluster overview $cluster",
+ "type": "row"
+ },
+ {
+ "class": "small_stat",
+ "datasource": "prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "decimals": 1,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "si:"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 0,
+ "y": 1
+ },
+ "id": 2,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_transport_requests_served{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[60s])) + (sum(rate(scylla_thrift_served{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[60s])) or on() vector(0))",
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Requests/s",
+ "type": "stat"
+ },
+ {
+ "class": "small_stat",
+ "datasource": "prometheus",
+ "description": "Average Write Latency",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 50000
+ }
+ ]
+ },
+ "unit": "µs"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 3,
+ "y": 1
+ },
+ "id": 3,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "avg(wlatencya{by=\"cluster\", cluster=~\"$cluster|^$\",scheduling_group_name!=\"streaming\"}>0)",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Avg Write",
+ "type": "stat"
+ },
+ {
+ "class": "small_stat",
+ "datasource": "prometheus",
+ "description": "99% write Latency",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 100000
+ }
+ ]
+ },
+ "unit": "µs"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 5,
+ "y": 1
+ },
+ "id": 4,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "avg(wlatencyp95{by=\"cluster\", cluster=~\"$cluster|^$\",scheduling_group_name!=\"streaming\"}>0)",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "95% Write",
+ "type": "stat"
+ },
+ {
+ "class": "small_stat",
+ "datasource": "prometheus",
+ "description": "99% write Latency",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 100000
+ }
+ ]
+ },
+ "unit": "µs"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 7,
+ "y": 1
+ },
+ "id": 5,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "avg(wlatencyp99{by=\"cluster\", cluster=~\"$cluster|^$\",scheduling_group_name!=\"streaming\"}>0)",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "99% Write",
+ "type": "stat"
+ },
+ {
+ "class": "small_stat",
+ "datasource": "prometheus",
+ "description": "Average Read Latency",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 50000
+ }
+ ]
+ },
+ "unit": "µs"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 9,
+ "y": 1
+ },
+ "id": 6,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "avg(rlatencya{by=\"cluster\", cluster=~\"$cluster|^$\",scheduling_group_name!=\"streaming\"}>0)",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Avg Read",
+ "type": "stat"
+ },
+ {
+ "class": "small_stat",
+ "datasource": "prometheus",
+ "description": "99% read Latency",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 100000
+ }
+ ]
+ },
+ "unit": "µs"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 11,
+ "y": 1
+ },
+ "id": 7,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "avg(rlatencyp95{by=\"cluster\", cluster=~\"$cluster|^$\",scheduling_group_name!=\"streaming\"}>0)",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "95% Read",
+ "type": "stat"
+ },
+ {
+ "class": "small_stat",
+ "datasource": "prometheus",
+ "description": "99% read Latency",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 100000
+ }
+ ]
+ },
+ "unit": "µs"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 13,
+ "y": 1
+ },
+ "id": 8,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "avg(rlatencyp99{by=\"cluster\", cluster=~\"$cluster|^$\",scheduling_group_name!=\"streaming\"}>0)",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "99% Read",
+ "type": "stat"
+ },
+ {
+ "class": "small_stat",
+ "datasource": "prometheus",
+ "description": "The percentage of the time during which Scylla utilized the CPU. Note that because Scylla does busy polling for some time before going idle, CPU utilization as seen by the operating system may be much higher. Your system is not yet CPU-bottlenecked until this metric is high.",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 15,
+ "y": 1
+ },
+ "id": 9,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "avg(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"} )",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Load",
+ "type": "stat"
+ },
+ {
+ "class": "small_stat",
+ "datasource": "prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 17,
+ "y": 1
+ },
+ "id": 10,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_database_total_reads_failed{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "R Failed",
+ "type": "stat"
+ },
+ {
+ "class": "small_stat",
+ "datasource": "prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 19,
+ "y": 1
+ },
+ "id": 11,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_database_total_writes_failed{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "W Failed",
+ "type": "stat"
+ },
+ {
+ "class": "small_stat",
+ "datasource": "prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 21,
+ "y": 1
+ },
+ "id": 12,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_storage_proxy_coordinator_write_timeouts{cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m]))",
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Timeouts",
+ "type": "stat"
+ },
+ {
+ "class": "alert_table",
+ "columns": [],
+ "datasource": "alertmanager",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": null,
+ "filterable": false
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute"
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Time"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 150
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "instance"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 100
+ }
+ ]
+ }
+ ]
+ },
+ "fontSize": "100%",
+ "gridPos": {
+ "h": 6,
+ "w": 8,
+ "x": 0,
+ "y": 5
+ },
+ "id": 13,
+ "links": [],
+ "options": {
+ "showHeader": true
+ },
+ "pageSize": null,
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 0,
+ "desc": true
+ },
+ "span": 4,
+ "targets": [
+ {
+ "active": true,
+ "annotations": true,
+ "filters": "job!=\"scylla_manager\",advisor=\"\"",
+ "legendFormat": "{{description}}",
+ "refId": "A",
+ "target": "Query"
+ }
+ ],
+ "title": "Active Alerts",
+ "transform": "table",
+ "transformations": [
+ {
+ "id": "filterFieldsByName",
+ "options": {
+ "include": {
+ "names": [
+ "Time",
+ "summary",
+ "instance"
+ ]
+ }
+ }
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {},
+ "indexByName": {
+ "Time": 0,
+ "instance": 1,
+ "summary": 2
+ },
+ "renameByName": {}
+ }
+ }
+ ],
+ "type": "table"
+ },
+ {
+ "class": "ops_panel",
+ "datasource": "prometheus",
+ "description": "Write attempts - include all writes that reached the coordinator node, even if they will eventually fail",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "si:ops/s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 8,
+ "y": 5
+ },
+ "id": 14,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 2,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]))",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Writes",
+ "type": "timeseries"
+ },
+ {
+ "class": "us_panel",
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "µs"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 12,
+ "y": 5
+ },
+ "id": 15,
+ "isNew": true,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "class": "show_legend",
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 2,
+ "targets": [
+ {
+ "expr": "avg(wlatencyp95{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)",
+ "intervalFactor": 1,
+ "legendFormat": "95%",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "avg(wlatencyp99{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)",
+ "intervalFactor": 1,
+ "legendFormat": "99%",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "title": "Write Latencies",
+ "type": "timeseries"
+ },
+ {
+ "class": "ops_panel",
+ "datasource": "prometheus",
+ "description": "Read attempts - include all reads that reached the coordinator node, even if they will eventually fail",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "si:ops/s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 16,
+ "y": 5
+ },
+ "id": 16,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 2,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]))",
+ "intervalFactor": 1,
+ "legendFormat": "Reads",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Reads",
+ "type": "timeseries"
+ },
+ {
+ "class": "us_panel",
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "µs"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 20,
+ "y": 5
+ },
+ "id": 17,
+ "isNew": true,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "class": "show_legend",
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 2,
+ "targets": [
+ {
+ "expr": "avg(rlatencyp95{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)",
+ "intervalFactor": 1,
+ "legendFormat": "95%",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "avg(rlatencyp99{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)",
+ "intervalFactor": 1,
+ "legendFormat": "99%",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "title": "Read Latencies",
+ "type": "timeseries"
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 11
+ },
+ "id": 18,
+ "panels": [],
+ "title": "",
+ "type": "row"
+ },
+ {
+ "class": "plain_text",
+ "content": "Advisor
",
+ "datasource": null,
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 12
+ },
+ "id": 19,
+ "isNew": true,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "span": 12,
+ "style": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "datasource": null,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 12,
+ "x": 12,
+ "y": 14
+ },
+ "id": 20,
+ "options": {
+ "content": "Balance
\nAn Imbalance between shards or nodes may indicates a potential problem",
+ "mode": "html"
+ },
+ "pluginVersion": "7.3.4",
+ "targets": [
+ {
+ "queryType": "randomWalk",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "",
+ "type": "text"
+ },
+ {
+ "class": "advisor_table",
+ "dashversion": ">4.1",
+ "datasource": "alertmanager",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "align": null,
+ "filterable": false
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "dashboard"
+ },
+ "properties": [
+ {
+ "id": "links",
+ "value": [
+ {
+ "title": "",
+ "url": "/d/${__data.fields.dashboard}-[[dash_version]]?refresh=30s&orgId=1&var-by=instance&from=${__from}&to=${__to}"
+ }
+ ]
+ },
+ {
+ "id": "custom.width",
+ "value": 100
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "advisor"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 120
+ },
+ {
+ "id": "displayName",
+ "value": "Category"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "severity"
+ },
+ "properties": [
+ {
+ "id": "links",
+ "value": [
+ {
+ "targetBlank": true,
+ "title": "Open an issue",
+ "url": "https://github.com/scylladb/scylla/issues/new?body=description%3D${__data.fields[4]}%0ASource%3DAdvisor%0AScylla-versions%3D${all_scyllas_versions}%0Ascylla-monitoring%3D${monitoring_version}%0Acluster%3D${count_dc}%0Aname%3D${cluster}%0A%0A"
+ }
+ ]
+ },
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "from": "0",
+ "id": 1,
+ "text": "🔔",
+ "to": "10",
+ "type": 2,
+ "value": ""
+ }
+ ]
+ },
+ {
+ "id": "displayName",
+ "value": "Report"
+ },
+ {
+ "id": "custom.width",
+ "value": 65
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "summary"
+ },
+ "properties": [
+ {
+ "id": "links",
+ "value": [
+ {
+ "targetBlank": true,
+ "title": "${__data.fields.description}\n\n click for more information",
+ "url": "https://monitoring.docs.scylladb.com/branch-master/use-monitoring/advisor/${__data.fields.alertname}"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "alertname"
+ },
+ "properties": [
+ {
+ "id": "displayName",
+ "value": "."
+ },
+ {
+ "id": "custom.width",
+ "value": 1
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "description"
+ },
+ "properties": [
+ {
+ "id": "displayName",
+ "value": "."
+ },
+ {
+ "id": "custom.width",
+ "value": 1
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Time"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 150
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 12,
+ "x": 0,
+ "y": 16
+ },
+ "id": 21,
+ "links": [],
+ "options": {
+ "showHeader": true
+ },
+ "pluginVersion": "7.3.4",
+ "targets": [
+ {
+ "active": true,
+ "annotations": true,
+ "filters": "advisor!=\"\"",
+ "legendFormat": "{{description}}",
+ "refId": "A",
+ "target": "Query"
+ }
+ ],
+ "title": "",
+ "transformations": [
+ {
+ "id": "filterFieldsByName",
+ "options": {
+ "include": {
+ "names": [
+ "advisor",
+ "dashboard",
+ "description",
+ "severity",
+ "alertname",
+ "summary",
+ "Time"
+ ]
+ }
+ }
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {},
+ "indexByName": {
+ "Time": 1,
+ "advisor": 2,
+ "dashboard": 3,
+ "severity": 0,
+ "summary": 4
+ },
+ "renameByName": {}
+ }
+ }
+ ],
+ "type": "table"
+ },
+ {
+ "class": "small_stat_error",
+ "datasource": "prometheus",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "mappings": [
+ {
+ "from": "-1000",
+ "id": 1,
+ "text": "✓",
+ "to": "0.001",
+ "type": 2,
+ "value": ""
+ },
+ {
+ "from": "0.001",
+ "id": 2,
+ "text": "âš ",
+ "to": "10000",
+ "type": 2,
+ "value": "0.001"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "orange",
+ "value": 0.001
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 12,
+ "y": 16
+ },
+ "id": 22,
+ "links": [
+ {
+ "title": "The number of connections per shard should be balanced"
+ }
+ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "max(abs(sum(scylla_transport_current_connections{cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by (instance,shard)-scalar(avg(scylla_transport_current_connections{cluster=~\"$cluster|$^\", dc=~\"$dc\"})))) - 8",
+ "hide": false,
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Connections",
+ "transformations": [
+ {
+ "id": "calculateField",
+ "options": {
+ "mode": "reduceRow",
+ "reduce": {
+ "reducer": "max"
+ },
+ "replaceFields": true
+ }
+ }
+ ],
+ "type": "stat"
+ },
+ {
+ "class": "small_stat_error",
+ "datasource": "prometheus",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "mappings": [
+ {
+ "from": "-1000",
+ "id": 1,
+ "text": "✓",
+ "to": "0.001",
+ "type": 2,
+ "value": ""
+ },
+ {
+ "from": "0.001",
+ "id": 2,
+ "text": "âš ",
+ "to": "10000",
+ "type": 2,
+ "value": "0.001"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "orange",
+ "value": 0.001
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 14,
+ "y": 16
+ },
+ "id": 23,
+ "links": [
+ {
+ "title": "Indicates that the number of CQL operations (inserts, updates, deletes, reads) is not balance between shards in one of the nodes"
+ }
+ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "max(abs(rate(scylla_cql_updates{conditional=\"no\", dc=~\"$dc\"}[1m]) - on(dc) group_left avg(rate(scylla_cql_updates{conditional=\"no\", dc=~\"$dc\"}[1m])) by (dc))/on(dc) group_left sum(stddev(rate(scylla_cql_updates{conditional=\"no\", dc=~\"$dc\"}[1m])) by(dc)+100) by(dc))-3",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ },
+ {
+ "expr": "max(abs(rate(scylla_cql_inserts{conditional=\"no\", dc=~\"$dc\"}[1m]) - on(dc) group_left avg(rate(scylla_cql_inserts{conditional=\"no\", dc=~\"$dc\"}[1m])) by (dc))/on(dc) group_left sum(stddev(rate(scylla_cql_inserts{conditional=\"no\", dc=~\"$dc\"}[1m])) by(dc)+100) by(dc))-3",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "B"
+ },
+ {
+ "expr": "max(abs(rate(scylla_cql_reads{ dc=~\"$dc\"}[1m]) - on(dc) group_left avg(rate(scylla_cql_reads{ dc=~\"$dc\"}[1m])) by (dc))/on(dc) group_left sum(stddev(rate(scylla_cql_reads{ dc=~\"$dc\"}[1m])) by(dc)+100) by(dc))-3",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "C"
+ },
+ {
+ "expr": "max(abs(rate(scylla_cql_deletes{conditional=\"no\", dc=~\"$dc\"}[1m]) - on(dc) group_left avg(rate(scylla_cql_deletes{conditional=\"no\", dc=~\"$dc\"}[1m])) by (dc))/on(dc) group_left sum(stddev(rate(scylla_cql_deletes{conditional=\"no\", dc=~\"$dc\"}[1m])) by(dc)+100) by(dc))-3",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "D"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CQL OPs",
+ "transformations": [
+ {
+ "id": "calculateField",
+ "options": {
+ "mode": "reduceRow",
+ "reduce": {
+ "reducer": "max"
+ },
+ "replaceFields": true
+ }
+ }
+ ],
+ "type": "stat"
+ },
+ {
+ "class": "small_stat_error",
+ "datasource": "prometheus",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "mappings": [
+ {
+ "from": "-1000",
+ "id": 1,
+ "text": "✓",
+ "to": "0.001",
+ "type": 2,
+ "value": ""
+ },
+ {
+ "from": "0.001",
+ "id": 2,
+ "text": "âš ",
+ "to": "10000",
+ "type": 2,
+ "value": "0.001"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "orange",
+ "value": 0.001
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 16,
+ "y": 16
+ },
+ "id": 24,
+ "links": [
+ {
+ "title": "A single node with higher latency is an indication for a node related issue"
+ }
+ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "((max(wlatencyp99{by=\"instance\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"})-scalar(avg(wlatencyp99{by=\"instance\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)))/(scalar(stddev(wlatencyp99{by=\"instance\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0))+100)-3)",
+ "legendFormat": "",
+ "refId": "A"
+ },
+ {
+ "expr": "((max(rlatencyp99{by=\"instance\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"})-scalar(avg(rlatencyp99{by=\"instance\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)))/(scalar(stddev(rlatencyp99{by=\"instance\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0))+100)-3)",
+ "legendFormat": "",
+ "refId": "B"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Node Latency",
+ "transformations": [
+ {
+ "id": "calculateField",
+ "options": {
+ "mode": "reduceRow",
+ "reduce": {
+ "reducer": "max"
+ },
+ "replaceFields": true
+ }
+ }
+ ],
+ "type": "stat"
+ },
+ {
+ "class": "small_stat_error",
+ "datasource": "prometheus",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "mappings": [
+ {
+ "from": "-1000",
+ "id": 1,
+ "text": "✓",
+ "to": "0.001",
+ "type": 2,
+ "value": ""
+ },
+ {
+ "from": "0.001",
+ "id": 2,
+ "text": "âš ",
+ "to": "10000",
+ "type": 2,
+ "value": "0.001"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "orange",
+ "value": 0.001
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 18,
+ "y": 16
+ },
+ "id": 25,
+ "links": [
+ {
+ "title": "A single shard with high latency is an indication of a hot-partition, or a large row/cell/partition"
+ }
+ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "((max(wlatencyp99{by=\"instance,shard\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"})-scalar(avg(wlatencyp99{by=\"instance,shard\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)))/(scalar(stddev(wlatencyp99{by=\"instance,shard\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0))+100)-3)",
+ "legendFormat": "",
+ "refId": "A"
+ },
+ {
+ "expr": "((max(rlatencyp99{by=\"instance,shard\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"})-scalar(avg(rlatencyp99{by=\"instance,shard\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0)))/(scalar(stddev(rlatencyp99{by=\"instance,shard\", cluster=~\"$cluster|$^\",scheduling_group_name!=\"streaming\"}>0))+100)-3)",
+ "legendFormat": "",
+ "refId": "B"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Shard Latency",
+ "transformations": [
+ {
+ "id": "calculateField",
+ "options": {
+ "mode": "reduceRow",
+ "reduce": {
+ "reducer": "max"
+ },
+ "replaceFields": true
+ }
+ }
+ ],
+ "type": "stat"
+ },
+ {
+ "class": "small_stat_error",
+ "datasource": "prometheus",
+ "description": "A shard that reads more from the cache could be an indication for hot partition",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "mappings": [
+ {
+ "from": "-1000",
+ "id": 1,
+ "text": "✓",
+ "to": "0.001",
+ "type": 2,
+ "value": ""
+ },
+ {
+ "from": "0.001",
+ "id": 2,
+ "text": "âš ",
+ "to": "10000",
+ "type": 2,
+ "value": "0.001"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "orange",
+ "value": 0.001
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 20,
+ "y": 16
+ },
+ "id": 26,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "((rate(scylla_cache_reads{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]) - rate(scylla_cache_reads_with_misses{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]))- scalar(avg(rate(scylla_cache_reads{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]) - rate(scylla_cache_reads_with_misses{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]))))/scalar(stddev(rate(scylla_cache_reads{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]) - rate(scylla_cache_reads_with_misses{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m]))+100)-3",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cache",
+ "transformations": [
+ {
+ "id": "calculateField",
+ "options": {
+ "mode": "reduceRow",
+ "reduce": {
+ "reducer": "max"
+ },
+ "replaceFields": true
+ }
+ }
+ ],
+ "type": "stat"
+ },
+ {
+ "class": "small_stat_error",
+ "datasource": "prometheus",
+ "description": "A single shard that reads more from sstables, could be that a node is slow",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "mappings": [
+ {
+ "from": "-1000",
+ "id": 1,
+ "text": "✓",
+ "to": "0.001",
+ "type": 2,
+ "value": ""
+ },
+ {
+ "from": "0.001",
+ "id": 2,
+ "text": "âš ",
+ "to": "10000",
+ "type": 2,
+ "value": "0.001"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "orange",
+ "value": 0.001
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 2,
+ "x": 22,
+ "y": 16
+ },
+ "id": 27,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "max(abs(scylla_database_active_reads{ dc=~\"$dc\"} - scalar(avg(scylla_database_active_reads{ dc=~\"$dc\"})))/scalar(stddev(scylla_database_active_reads{ dc=~\"$dc\"})+0.001))-3",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "SSTable",
+ "transformations": [
+ {
+ "id": "calculateField",
+ "options": {
+ "mode": "reduceRow",
+ "reduce": {
+ "reducer": "max"
+ },
+ "replaceFields": true
+ }
+ }
+ ],
+ "type": "stat"
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 22
+ },
+ "id": 28,
+ "panels": [],
+ "repeat": "dc",
+ "title": "",
+ "type": "row"
+ },
+ {
+ "class": "plain_text",
+ "content": "Information for $dc
",
+ "datasource": null,
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 23
+ },
+ "id": 29,
+ "isNew": true,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "span": 12,
+ "style": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "class": "vertical_lcd",
+ "datasource": "prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "orange",
+ "value": 85
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 1,
+ "x": 0,
+ "y": 25
+ },
+ "id": 30,
+ "options": {
+ "displayMode": "lcd",
+ "orientation": "vertical",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "showUnfilled": true
+ },
+ "pluginVersion": "7.1.3",
+ "targets": [
+ {
+ "expr": "avg(scylla_reactor_utilization{cluster=~\"$cluster\", dc=~\"$dc\"} )",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Load",
+ "type": "bargauge"
+ },
+ {
+ "class": "bytes_panel",
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "bytes"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 3,
+ "x": 1,
+ "y": 25
+ },
+ "id": 31,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "span": 5,
+ "targets": [
+ {
+ "expr": "Avg(node_filesystem_size_bytes{mountpoint=\"$mount_point\", dc=~\"$dc\"}) by ([[by]])-avg(node_filesystem_avail_bytes{mountpoint=\"$mount_point\", dc=~\"$dc\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Avg Usage {{[[by]]}}",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "avg(node_filesystem_size_bytes{mountpoint=\"$mount_point\", dc=~\"$dc\"}) by ([[by]])",
+ "interval": "",
+ "legendFormat": "Size {{[[by]]}}",
+ "refId": "B"
+ }
+ ],
+ "title": "Disk Size by $by",
+ "type": "timeseries"
+ },
+ {
+ "class": "graph_panel_int",
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 4,
+ "y": 25
+ },
+ "id": 32,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "span": 2,
+ "targets": [
+ {
+ "expr": "$func(scylla_compaction_manager_compactions{cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "Running Compactions",
+ "type": "timeseries"
+ },
+ {
+ "class": "ops_panel",
+ "datasource": "prometheus",
+ "description": "The Hits and Misses",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "si:ops/s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 8,
+ "y": 25
+ },
+ "id": 33,
+ "isNew": true,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "class": "show_legend",
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_row_hits{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Hit {{[[by]]}}",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "$func(rate(scylla_cache_row_misses{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Misses {{[[by]]}}",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "title": "Cache Rows Hits/Misses",
+ "type": "timeseries"
+ },
+ {
+ "class": "ops_panel",
+ "datasource": "prometheus",
+ "description": "Write attempts - include all writes that reached the coordinator node, even if they will eventually fail",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "si:ops/s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 14,
+ "y": 25
+ },
+ "id": 34,
+ "isNew": true,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "class": "show_legend",
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {
+ "alias": "1 Day Ago",
+ "dashLength": 4,
+ "dashes": true
+ },
+ {
+ "alias": "1 Week Ago",
+ "dashLength": 2,
+ "dashes": true
+ }
+ ],
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Writes",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m] offset 1d))",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "1 Day Ago",
+ "refId": "B",
+ "step": 1
+ },
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m] offset 1w))",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "1 Week Ago",
+ "refId": "C",
+ "step": 1
+ }
+ ],
+ "title": "Writes",
+ "type": "timeseries"
+ },
+ {
+ "class": "us_panel",
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "µs"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 20,
+ "y": 25
+ },
+ "id": 35,
+ "isNew": true,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "class": "show_legend",
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 2,
+ "targets": [
+ {
+ "expr": "avg(wlatencyp95{by=\"[[by]]\", cluster=~\"$cluster|$^\", dc=~\"$dc\",scheduling_group_name!=\"streaming\"}>0) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "95% {{[[by]]}}",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "avg(wlatencyp99{by=\"[[by]]\", cluster=~\"$cluster|$^\", dc=~\"$dc\",scheduling_group_name!=\"streaming\"}>0) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "99% {{[[by]]}}",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "title": "Write Latencies",
+ "type": "timeseries"
+ },
+ {
+ "class": "ops_panel",
+ "datasource": "prometheus",
+ "description": "Requests that Scylla tried to write but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "si:ops/s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 0,
+ "y": 31
+ },
+ "id": 36,
+ "isNew": true,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "class": "show_legend",
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 2,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_write_timeouts{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Writes {{[[by]]}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Write Timeouts by [[by]]",
+ "type": "timeseries"
+ },
+ {
+ "class": "ops_panel",
+ "datasource": "prometheus",
+ "description": "Requests that Scylla tried to read but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "si:ops/s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 4,
+ "y": 31
+ },
+ "id": 37,
+ "isNew": true,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "class": "show_legend",
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 2,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_read_timeouts{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])+rate(scylla_storage_proxy_coordinator_cas_read_timeouts{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])+rate(scylla_storage_proxy_coordinator_range_timeouts{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Read {{[[by]]}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "title": "Read Timeouts by [[by]]",
+ "type": "timeseries"
+ },
+ {
+ "class": "ops_panel",
+ "datasource": "prometheus",
+ "description": "The Hits and Misses",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "si:ops/s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 8,
+ "y": 31
+ },
+ "id": 38,
+ "isNew": true,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "class": "show_legend",
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_cache_reads{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])-$func(rate(scylla_cache_reads_with_misses{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Hit {{[[by]]}}",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "$func(rate(scylla_cache_reads_with_misses{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Misses {{[[by]]}}",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "title": "Cache Reads Hits/Misses",
+ "type": "timeseries"
+ },
+ {
+ "class": "ops_panel",
+ "datasource": "prometheus",
+ "description": "Read attempts - include all reads that reached the coordinator node, even if they will eventually fail",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "si:ops/s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 14,
+ "y": 31
+ },
+ "id": 39,
+ "isNew": true,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "class": "show_legend",
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {
+ "alias": "1 Day Ago",
+ "dashLength": 4,
+ "dashes": true
+ },
+ {
+ "alias": "1 Week Ago",
+ "dashLength": 2,
+ "dashes": true
+ }
+ ],
+ "span": 3,
+ "targets": [
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\",scheduling_group_name!=\"streaming\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "Reads",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\",scheduling_group_name!=\"streaming\"}[1m] offset 1d))",
+ "intervalFactor": 1,
+ "legendFormat": "1 Day Ago",
+ "refId": "B",
+ "step": 1
+ },
+ {
+ "expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{cluster=~\"$cluster|$^\", dc=~\"$dc\",scheduling_group_name!=\"streaming\"}[1m] offset 1w))",
+ "intervalFactor": 1,
+ "legendFormat": "1 Week Ago",
+ "refId": "C",
+ "step": 1
+ }
+ ],
+ "title": "Reads",
+ "type": "timeseries"
+ },
+ {
+ "class": "us_panel",
+ "datasource": "prometheus",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "µs"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 20,
+ "y": 31
+ },
+ "id": 40,
+ "isNew": true,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "class": "show_legend",
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 2,
+ "targets": [
+ {
+ "expr": "avg(rlatencyp95{by=\"[[by]]\", cluster=~\"$cluster|$^\", dc=~\"$dc\",scheduling_group_name!=\"streaming\"}>0) by([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "95% {{[[by]]}}",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "avg(rlatencyp99{by=\"[[by]]\", cluster=~\"$cluster|$^\", dc=~\"$dc\",scheduling_group_name!=\"streaming\"}>0) by([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "99% {{[[by]]}}",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "title": "Read Latencies",
+ "type": "timeseries"
+ },
+ {
+ "class": "ops_panel",
+ "datasource": "prometheus",
+ "description": "Number of CQL INSERT commands generated by the user",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "si:ops/s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 0,
+ "y": 37
+ },
+ "id": 41,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_inserts{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]]) - sum(rate(scylla_cql_inserts_per_ks{cluster=~\"$cluster|$^\", dc=~\"$dc\", who=\"internal\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "CQL Insert",
+ "type": "timeseries"
+ },
+ {
+ "class": "ops_panel",
+ "datasource": "prometheus",
+ "description": "Number of CQL SELECT commands generated by the user",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "si:ops/s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 6,
+ "y": 37
+ },
+ "id": 42,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_reads{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]]) - sum(rate(scylla_cql_reads_per_ks{cluster=~\"$cluster|$^\", dc=~\"$dc\", who=\"internal\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "CQL Reads",
+ "type": "timeseries"
+ },
+ {
+ "class": "ops_panel",
+ "datasource": "prometheus",
+ "description": "Number of CQL DELETE commands generated by the user",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "si:ops/s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 12,
+ "y": 37
+ },
+ "id": 43,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_deletes{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])-sum(rate(scylla_cql_deletes_per_ks{cluster=~\"$cluster|$^\", dc=~\"$dc\", who=\"internal\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "CQL Deletes",
+ "type": "timeseries"
+ },
+ {
+ "class": "ops_panel",
+ "datasource": "prometheus",
+ "description": "Number of CQL UPDATE commands generated by the user",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "si:ops/s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 18,
+ "y": 37
+ },
+ "id": 44,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_updates{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])-sum(rate(scylla_cql_updates_per_ks{cluster=~\"$cluster|$^\", dc=~\"$dc\", who=\"internal\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "title": "CQL Updates",
+ "type": "timeseries"
+ },
+ {
+ "class": "graph_panel",
+ "datasource": "prometheus",
+ "description": "amount of CQL connections currently established",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 0,
+ "y": 43
+ },
+ "id": 45,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "pointradius": 1,
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(scylla_transport_current_connections{cluster=~\"$cluster|$^\", dc=~\"$dc\"}) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "title": "Client CQL connections by [[by]]",
+ "type": "timeseries"
+ },
+ {
+ "class": "graph_panel",
+ "datasource": "prometheus",
+ "description": "Number of CQL batches command, each batched command is counted once",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 6,
+ "y": 43
+ },
+ "id": 46,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "pointradius": 1,
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_batches{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "title": "CQL Batches by [[by]]",
+ "type": "timeseries"
+ },
+ {
+ "class": "graph_panel",
+ "datasource": "prometheus",
+ "description": "Number of CQL command batched. Each batch would add the number of commands inside the batch",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 12,
+ "y": 43
+ },
+ "id": 47,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "pointradius": 1,
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_statements_in_batches{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "title": "CQL Command In Batches by [[by]]",
+ "type": "timeseries"
+ },
+ {
+ "class": "ops_panel",
+ "datasource": "prometheus",
+ "description": "Counts the number of SELECT statements with BYPASS CACHE option",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "si:ops/s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 18,
+ "y": 43
+ },
+ "id": 48,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_select_bypass_caches{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "title": "BYPASS CACHE",
+ "type": "timeseries"
+ },
+ {
+ "class": "graph_panel",
+ "dashversion": [
+ ">4.4",
+ ">2021.1"
+ ],
+ "datasource": "prometheus",
+ "description": "CQL errors by type, only active errors are shown",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 0,
+ "y": 49
+ },
+ "id": 49,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "pointradius": 1,
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_transport_cql_errors_total{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]],type) >0",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "title": "CQL Errors [[by]]",
+ "type": "timeseries"
+ },
+ {
+ "class": "graph_panel",
+ "datasource": "prometheus",
+ "description": "Number of CQL row reads",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 6,
+ "y": 49
+ },
+ "id": 50,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "pointradius": 1,
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_rows_read{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "title": "CQL Row Reads [[by]]",
+ "type": "timeseries"
+ },
+ {
+ "class": "graph_panel",
+ "datasource": "prometheus",
+ "description": "Number of reads using secondary indexes",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 12,
+ "y": 49
+ },
+ "id": 51,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "pointradius": 1,
+ "span": 3,
+ "targets": [
+ {
+ "expr": "sum(rate(scylla_cql_secondary_index_reads{cluster=~\"$cluster|$^\", dc=~\"$dc\"}[1m])) by ([[by]])",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "title": "Secondary indexes Reads [[by]]",
+ "type": "timeseries"
+ },
+ {
+ "class": "collapsible_row_panel",
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 55
+ },
+ "id": 52,
+ "panels": [],
+ "repeat": "",
+ "title": "Your panels",
+ "type": "row"
+ },
+ {
+ "class": "plain_text",
+ "datasource": null,
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 56
+ },
+ "id": 53,
+ "isNew": true,
+ "links": [],
+ "mode": "html",
+ "options": {
+ "content": "Your Panels
",
+ "mode": "html"
+ },
+ "span": 12,
+ "style": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "class": "user_panel",
+ "datasource": "prometheus",
+ "description": "This graph panel was left empty on purpose for ad-hoc usage. Change it when needed. Pay attention that changes to the panel will not be saved.\n\nIf you do need a panel that can be saved, create a new dashboard, or edit the panel inside the json file",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "si:ops/s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 12,
+ "x": 0,
+ "y": 58
+ },
+ "id": 54,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 6,
+ "title": "Your Graph here",
+ "type": "timeseries"
+ },
+ {
+ "class": "user_panel",
+ "datasource": "prometheus",
+ "description": "This graph panel was left empty on purpose for ad-hoc usage. Change it when needed. Pay attention that changes to the panel will not be saved.\n\nIf you do need a panel that can be saved, create a new dashboard, or edit the panel inside the json file",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "class": "fieldConfig_defaults",
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "axisSoftMin": 0,
+ "barAlignment": 0,
+ "class": "fieldConfig_defaults_custom",
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "unit": "si:ops/s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 12,
+ "x": 12,
+ "y": 58
+ },
+ "id": 55,
+ "isNew": true,
+ "links": [],
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "hidden",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "asc"
+ }
+ },
+ "seriesOverrides": [
+ {}
+ ],
+ "span": 6,
+ "title": "Your Graph here",
+ "type": "timeseries"
+ },
+ {
+ "class": "plain_text",
+ "datasource": null,
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 24,
+ "x": 0,
+ "y": 64
+ },
+ "id": 56,
+ "isNew": true,
+ "links": [],
+ "mode": "html",
+ "options": {
+ "content": "Scylla Monitoring version - master
",
+ "mode": "html"
+ },
+ "span": 12,
+ "style": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 26,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "allValue": null,
+ "class": "by_template_var",
+ "current": {
+ "tags": [],
+ "text": "DC",
+ "value": "dc"
+ },
+ "error": null,
+ "hide": 0,
+ "includeAll": false,
+ "label": "by",
+ "multi": false,
+ "name": "by",
+ "options": [
+ {
+ "selected": false,
+ "text": "Cluster",
+ "value": "cluster"
+ },
+ {
+ "selected": true,
+ "text": "DC",
+ "value": "dc"
+ }
+ ],
+ "query": "Cluster,DC,Instance,Shard",
+ "skipUrlSync": false,
+ "type": "custom"
+ },
+ {
+ "allValue": null,
+ "class": "template_variable_single",
+ "current": {
+ "isNone": true,
+ "selected": false,
+ "text": "None",
+ "value": ""
+ },
+ "datasource": "prometheus",
+ "definition": "",
+ "error": null,
+ "hide": 0,
+ "includeAll": false,
+ "label": "cluster",
+ "multi": false,
+ "name": "cluster",
+ "options": [],
+ "query": "label_values(scylla_reactor_utilization, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "class": "template_variable_all",
+ "current": {
+ "selected": true,
+ "text": [
+ "All"
+ ],
+ "value": [
+ "$__all"
+ ]
+ },
+ "datasource": "prometheus",
+ "definition": "",
+ "error": null,
+ "hide": 0,
+ "includeAll": true,
+ "label": "dc",
+ "multi": true,
+ "name": "dc",
+ "options": [],
+ "query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "class": "template_variable_single",
+ "current": {
+ "text": "/var/lib/scylla",
+ "value": "/var/lib/scylla"
+ },
+ "datasource": "prometheus",
+ "definition": "",
+ "error": null,
+ "hide": 0,
+ "includeAll": false,
+ "label": "Mount path",
+ "multi": false,
+ "name": "mount_point",
+ "options": [
+ {
+ "selected": true,
+ "text": "/var/lib/scylla",
+ "value": "/var/lib/scylla"
+ }
+ ],
+ "query": "/var/lib/scylla",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "custom",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "class": "aggregation_function",
+ "current": {
+ "tags": [],
+ "text": "sum",
+ "value": "sum"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "Function",
+ "multi": false,
+ "name": "func",
+ "options": [
+ {
+ "selected": true,
+ "text": "sum",
+ "value": "sum"
+ },
+ {
+ "selected": false,
+ "text": "avg",
+ "value": "avg"
+ },
+ {
+ "selected": false,
+ "text": "max",
+ "value": "max"
+ },
+ {
+ "selected": false,
+ "text": "min",
+ "value": "min"
+ },
+ {
+ "selected": false,
+ "text": "stddev",
+ "value": "stddev"
+ },
+ {
+ "selected": false,
+ "text": "stdvar",
+ "value": "stdvar"
+ }
+ ],
+ "query": "sum,avg,max,min,stddev,stdvar",
+ "skipUrlSync": false,
+ "type": "custom"
+ },
+ {
+ "allValue": null,
+ "class": "template_variable_all",
+ "current": {
+ "selected": true,
+ "text": [
+ "All"
+ ],
+ "value": [
+ "$__all"
+ ]
+ },
+ "datasource": "prometheus",
+ "definition": "",
+ "error": null,
+ "hide": 2,
+ "includeAll": true,
+ "multi": true,
+ "name": "all_scyllas_versions",
+ "options": [],
+ "query": "label_values(scylla_scylladb_current_version{cluster=~\"$cluster|$^\"}, version)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "class": "template_variable_all",
+ "current": {
+ "selected": true,
+ "text": [
+ "All"
+ ],
+ "value": [
+ "$__all"
+ ]
+ },
+ "datasource": "prometheus",
+ "definition": "query_result(count(up{job=~\"$cluster|$^\"}) by (dc))",
+ "error": null,
+ "hide": 2,
+ "includeAll": true,
+ "multi": true,
+ "name": "count_dc",
+ "options": [],
+ "query": {
+ "query": "query_result(count(up{job=~\"$cluster|$^\"}) by (dc))",
+ "refId": "StandardVariableQuery"
+ },
+ "refresh": 2,
+ "regex": "/(?\\{dc=\"[^\"]+\".* \\d+) .*/",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "class": "monitor_version_var",
+ "current": {
+ "text": "master",
+ "value": "master"
+ },
+ "error": null,
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "monitoring_version",
+ "options": [
+ {
+ "selected": true,
+ "text": "master",
+ "value": "master"
+ }
+ ],
+ "query": "master",
+ "skipUrlSync": false,
+ "type": "custom"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-30m",
+ "to": "now"
+ },
+ "timepicker": {
+ "now": true,
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "utc",
+ "title": "CQL Overview",
+ "uid": "cql-overview",
+ "version": 1
+}
diff --git a/assets/monitoring/grafana/v1alpha1/registry.go b/assets/monitoring/grafana/v1alpha1/registry.go
index 84bfb1fddb5..5a09834fd25 100644
--- a/assets/monitoring/grafana/v1alpha1/registry.go
+++ b/assets/monitoring/grafana/v1alpha1/registry.go
@@ -1,9 +1,14 @@
package v1alpha1
import (
+ "embed"
_ "embed"
+ "fmt"
+ "io/fs"
+ "path/filepath"
"github.com/scylladb/scylla-operator/pkg/assets"
+ "github.com/scylladb/scylla-operator/pkg/helpers"
"github.com/scylladb/scylla-operator/pkg/scheme"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
@@ -36,13 +41,17 @@ var (
grafanaProvisioningConfigMapTemplateString string
GrafanaProvisioningConfigMapTemplate = ParseObjectTemplateOrDie[*corev1.ConfigMap]("grafana-provisioning-cm", grafanaProvisioningConfigMapTemplateString)
- //go:embed "dashboards-platform.cm.yaml"
- grafanaDashboardsPlatformConfigMapTemplateString string
- GrafanaDashboardsPlatformConfigMapTemplate = ParseObjectTemplateOrDie[*corev1.ConfigMap]("grafana-dashboards-platform-cm", grafanaDashboardsPlatformConfigMapTemplateString)
+ //go:embed "dashboards.cm.yaml"
+ grafanaDashboardsConfigMapTemplateString string
+ GrafanaDashboardsConfigMapTemplate = ParseObjectTemplateOrDie[*corev1.ConfigMap]("grafana-dashboards-cm", grafanaDashboardsConfigMapTemplateString)
- //go:embed "dashboards-saas.cm.yaml"
- grafanaDashboardsSAASConfigMapTemplateString string
- GrafanaDashboardsSAASConfigMapTemplate = ParseObjectTemplateOrDie[*corev1.ConfigMap]("grafana-dashboards-saas-cm", grafanaDashboardsSAASConfigMapTemplateString)
+ //go:embed "dashboards/platform/*.template.json"
+ grafanaDashboardsPlatformFS embed.FS
+ GrafanaDashboardsPlatform = helpers.Must(parseDashboardsFromFS(grafanaDashboardsPlatformFS, "dashboards/platform"))
+
+ //go:embed "dashboards/saas/*.template.json"
+ grafanaDashboardsSAASFS embed.FS
+ GrafanaDashboardsSAAS = helpers.Must(parseDashboardsFromFS(grafanaDashboardsSAASFS, "dashboards/saas"))
//go:embed "service.yaml"
grafanaServiceTemplateString string
@@ -52,3 +61,34 @@ var (
grafanaIngressTemplateString string
GrafanaIngressTemplate = ParseObjectTemplateOrDie[*networkingv1.Ingress]("grafana-ingress", grafanaIngressTemplateString)
)
+
+func parseDashboardsFromFS(files embed.FS, root string) (map[string][]byte, error) {
+ res := map[string][]byte{}
+
+ err := fs.WalkDir(files, root, func(p string, de fs.DirEntry, err error) error {
+ if err != nil {
+ return err
+ }
+
+ if p == root || de.IsDir() {
+ return nil
+ }
+
+ data, err := fs.ReadFile(files, p)
+ if err != nil {
+ return fmt.Errorf("can't read file %q: %w", p, err)
+ }
+ relPath, err := filepath.Rel(root, p)
+ if err != nil {
+ return fmt.Errorf("can't compute relative path to file %q: %w", p, err)
+ }
+ res[relPath] = data
+
+ return nil
+ })
+ if err != nil {
+ return nil, fmt.Errorf("can't walk fs: %w", err)
+ }
+
+ return res, nil
+}
diff --git a/assets/monitoring/prometheus/v1/alerts.prometheusrule.yaml b/assets/monitoring/prometheus/v1/alerts.prometheusrule.yaml
index 66a7151997d..b00df56a325 100644
--- a/assets/monitoring/prometheus/v1/alerts.prometheusrule.yaml
+++ b/assets/monitoring/prometheus/v1/alerts.prometheusrule.yaml
@@ -5,174 +5,151 @@ metadata:
labels:
scylla-operator.scylladb.com/scylladbmonitoring-name: "{{ .scyllaDBMonitoringName }}"
spec:
+{{-`
groups:
- name: scylla.rules
- rules: {{`
+ rules:
- alert: cqlNonPrepared
expr: cql:non_prepared > 0
for: 10s
labels:
- severity: "1"
+ severity: "info"
advisor: "cqlOptimization"
dashboard: "cql"
- annotations:
description: 'Some queries are non-prepared'
summary: non prepared statments
- alert: cql:non_paged_no_system
expr: cql:non_paged > 0
for: 10s
labels:
- severity: "1"
+ severity: "info"
advisor: "cqlOptimization"
dashboard: "cql"
status: "1"
- annotations:
description: 'Some SELECT queries are non-paged'
summary: non paged statments
- alert: cqlNoTokenAware
expr: cql:non_token_aware > 0
for: 10s
labels:
- severity: "1"
+ severity: "info"
advisor: "cqlOptimization"
dashboard: "cql"
- annotations:
description: 'Some queries are not token-aware'
summary: non token aware statments
- - alert: cqlReverseOrder
- expr: cql:reverse_queries > 0
- for: 10s
- labels:
- severity: "1"
- advisor: "cqlOptimization"
- dashboard: "cql"
- annotations:
- description: 'Some queries use reverse order'
- summary: reverse order queries
- alert: cqlAllowFiltering
expr: cql:allow_filtering > 0
for: 10s
labels:
- severity: "1"
+ severity: "info"
advisor: "cqlOptimization"
dashboard: "cql"
- annotations:
description: 'Some queries use ALLOW FILTERING'
summary: Allow filtering queries
- alert: cqlCLAny
expr: cql:any_queries > 0
for: 10s
labels:
- severity: "1"
+ severity: "info"
advisor: "cqlOptimization"
dashboard: "cql"
- annotations:
description: 'Some queries use Consistency Level: ANY'
summary: non prepared statments
- alert: cqlCLAll
expr: cql:all_queries > 0
for: 10s
labels:
- severity: "1"
+ severity: "info"
advisor: "cqlOptimization"
dashboard: "cql"
- annotations:
description: 'Some queries use Consistency Level: ALL'
summary: non prepared statments
- alert: nonBalancedcqlTraffic
expr: abs(rate(scylla_cql_updates{conditional="no"}[1m]) - scalar(avg(rate(scylla_cql_updates{conditional="no"}[1m]))))/scalar(stddev(rate(scylla_cql_updates{conditional="no"}[1m]))+100) > 2
- for: 10s
+ for: 3m
labels:
- severity: "1"
+ severity: "info"
status: "1"
advisor: "balanced"
dashboard: "cql"
- annotations:
description: 'CQL queries are not balanced among shards {{ $labels.instance }} shard {{ $labels.shard }}'
summary: CQL queries are not balanced
- alert: nodeLocalErrors
expr: sum(errors:local_failed) by (cluster, instance) > 0
for: 10s
labels:
- severity: "1"
+ severity: "info"
advisor: "operationError"
dashboard: "scylla-detailed"
- annotations:
description: 'Some operation failed at the replica side'
summary: Replica side Level error
- alert: nodeIOErrors
expr: sum(rate(scylla_reactor_aio_errors[60s])) by (cluster, instance) > 0
for: 10s
labels:
- severity: "1"
+ severity: "info"
advisor: "operationError"
dashboard: "OS-master"
- annotations:
description: 'IO Errors can indicate a node with a faulty disk {{ $labels.instance }}'
summary: IO Disk Error
- alert: nodeCLErrors
expr: sum(errors:operation_unavailable) by (cluster) > 0
for: 10s
labels:
- severity: "1"
+ severity: "info"
advisor: "operationError"
dashboard: "scylla-detailed"
- annotations:
description: 'Some operation failed due to consistency level'
summary: Consistency Level error
- alert: preparedCacheEviction
expr: sum(rate(scylla_cql_prepared_cache_evictions[2m])) by (cluster) + sum(rate(scylla_cql_authorized_prepared_statements_cache_evictions[2m])) by (cluster) > 100
for: 5m
labels:
- severity: "1"
+ severity: "info"
advisor: "preparedEviction"
dashboard: "scylla-detailed"
- annotations:
description: 'The prepared-statement cache is being continuously evicted, which could indicate a problem in your prepared-statement usage logic.'
summary: Prepared cache eviction
- alert: heavyCompaction
expr: max(scylla_scheduler_shares{group="compaction"}) by (cluster) >= 1000
for: 20m
labels:
- severity: "1"
+ severity: "info"
advisor: "heavyCompaction"
dashboard: "scylla-detailed"
- annotations:
description: 'Compaction load increases to a level it can interfere with the system behaviour. If this persists set the compaction share to a static level.'
summary: Heavy compaction load
- alert: shedRequests
expr: max(sum(rate(scylla_transport_requests_shed[60s])) by (instance,cluster)/sum(rate(scylla_transport_requests_served{}[60s])) by (instance, cluster)) by(cluster) > 0.01
for: 5m
labels:
- severity: "1"
+ severity: "info"
advisor: "systemOverload"
dashboard: "scylla-detailed"
- annotations:
description: 'More than 1% of the requests got shed, this is an indication of an overload, consider system resize.'
summary: System is overloaded
- alert: cappedTombstone
expr: changes(scylla_sstables_capped_tombstone_deletion_time[1h]) > 0
for: 1m
labels:
- severity: "1"
+ severity: "info"
advisor: "cappedTombstone"
dashboard: "scylla-detailed"
- annotations:
description: 'Tombstone delete time was set too far in the future and was capped'
summary: Tobmstone delete time is capped
- alert: InstanceDown
expr: up{job="scylla"} == 0
for: 30s
labels:
- severity: "2"
+ severity: "warn"
annotations:
description: '{{ $labels.instance }} has been down for more than 30 seconds.'
summary: Instance {{ $labels.instance }} down
- alert: InstanceDown
- expr: absent(scylla_transport_requests_served{job="scylla", shard="0"})
+ expr: sum(up{job="scylla"}>0)by(instance) unless sum(scylla_transport_requests_served{shard="0"}) by(instance)
for: 1m
labels:
- severity: "2"
+ severity: "warn"
annotations:
description: '{{ $labels.instance }} instance is shutting down.'
summary: Instance {{ $labels.instance }} down
@@ -180,7 +157,7 @@ spec:
expr: scylla_node_operation_mode > 3
for: 30s
labels:
- severity: "2"
+ severity: "warn"
annotations:
description: '{{ $labels.instance }} instance is shutting down.'
summary: Instance {{ $labels.instance }} down
@@ -189,7 +166,7 @@ spec:
* 100 < 35
for: 30s
labels:
- severity: "2"
+ severity: "warn"
annotations:
description: '{{ $labels.instance }} has less than 35% free disk space.'
summary: Instance {{ $labels.instance }} low disk space
@@ -198,7 +175,7 @@ spec:
* 100 < 25
for: 30s
labels:
- severity: "3"
+ severity: "error"
annotations:
description: '{{ $labels.instance }} has less than 25% free disk space.'
summary: Instance {{ $labels.instance }} low disk space
@@ -207,7 +184,7 @@ spec:
* 100 < 15
for: 30s
labels:
- severity: "4"
+ severity: "critical"
annotations:
description: '{{ $labels.instance }} has less than 15% free disk space.'
summary: Instance {{ $labels.instance }} low disk space
@@ -216,7 +193,7 @@ spec:
* 100 < 20
for: 30s
labels:
- severity: "3"
+ severity: "error"
annotations:
description: '{{ $labels.instance }} has less than 20% free disk space on the root partition.'
summary: Instance {{ $labels.instance }} low disk space
@@ -224,7 +201,7 @@ spec:
expr: scylla_manager_healthcheck_cql_status == -1
for: 30s
labels:
- severity: "2"
+ severity: "warn"
annotations:
description: '{{ $labels.host }} has denied CQL connection for more than 30 seconds.'
summary: Instance {{ $labels.host }} no CQL connection
@@ -232,7 +209,7 @@ spec:
expr: wlatencyp95{by="instance"} > 100000
for: 5m
labels:
- severity: "1"
+ severity: "info"
annotations:
description: '{{ $labels.instance }} has 95% high latency for more than 5 minutes.'
summary: Instance {{ $labels.instance }} High Write Latency
@@ -240,7 +217,7 @@ spec:
expr: wlatencya{by="instance"} >10000
for: 5m
labels:
- severity: "1"
+ severity: "info"
annotations:
description: '{{ $labels.instance }} has average high latency for more than 5 minutes.'
summary: Instance {{ $labels.instance }} High Write Latency
@@ -248,7 +225,7 @@ spec:
expr: rlatencyp95{by="instance"} > 100000
for: 5m
labels:
- severity: "1"
+ severity: "info"
annotations:
description: '{{ $labels.instance }} has 95% high latency for more than 5 minutes.'
summary: Instance {{ $labels.instance }} High Read Latency
@@ -256,7 +233,7 @@ spec:
expr: rlatencya{by="instance"} >10000
for: 5m
labels:
- severity: "1"
+ severity: "info"
annotations:
description: '{{ $labels.instance }} has average high latency for more than 5 minutes.'
summary: Instance {{ $labels.instance }} High Read Latency
@@ -264,7 +241,7 @@ spec:
expr: (sum(scylla_manager_scheduler_run_total{type=~"backup", status="ERROR"}) or vector(0)) - (sum(scylla_manager_scheduler_run_total{type=~"backup", status="ERROR"} offset 3m) or vector(0)) > 0
for: 10s
labels:
- severity: "1"
+ severity: "info"
annotations:
description: 'Backup failed'
summary: Backup task failed
@@ -272,7 +249,7 @@ spec:
expr: (sum(scylla_manager_scheduler_run_total{type=~"repair", status="ERROR"}) or vector(0)) - (sum(scylla_manager_scheduler_run_total{type=~"repair", status="ERROR"} offset 3m) or vector(0)) > 0
for: 10s
labels:
- severity: "1"
+ severity: "info"
annotations:
description: 'Repair failed'
summary: Repair task failed
@@ -280,7 +257,7 @@ spec:
expr: resets(scylla_gossip_heart_beat[1h])>0
for: 10s
labels:
- severity: "1"
+ severity: "info"
annotations:
description: 'Node restarted'
summary: Instance {{ $labels.instance }} restarted
@@ -288,8 +265,29 @@ spec:
expr: changes(node_vmstat_oom_kill[1h])>0
for: 10s
labels:
- severity: "2"
+ severity: "warn"
annotations:
description: 'OOM Kill on {{ $labels.instance }}'
summary: A process was terminated on Instance {{ $labels.instance }}
+ - alert: tooManyFiles
+ expr: (node_filesystem_files - node_filesystem_files_free) / on(instance) group_left count(scylla_reactor_cpu_busy_ms) by (instance)>20000
+ for: 10s
+ labels:
+ severity: "info"
+ description: 'Over 20k open files per shard {{ $labels.instance }}'
+ summary: There are over 20K open files per shard on Insace {{ $labels.instance }}
+ - alert: tooManyFiles
+ expr: (node_filesystem_files - node_filesystem_files_free) / on(instance) group_left count(scylla_reactor_cpu_busy_ms) by (instance)>30000
+ for: 10s
+ labels:
+ severity: "warn"
+ description: 'Over 30k open files per shard {{ $labels.instance }}'
+ summary: There are over 30K open files per shard on Insace {{ $labels.instance }}
+ - alert: tooManyFiles
+ expr: (node_filesystem_files - node_filesystem_files_free) / on(instance) group_left count(scylla_reactor_cpu_busy_ms) by (instance)>40000
+ for: 10s
+ labels:
+ severity: "error"
+ description: 'Over 40k open files per shard {{ $labels.instance }}'
+ summary: There are over 40K open files per shard on Insace {{ $labels.instance }}
`}}
diff --git a/assets/monitoring/prometheus/v1/recording.prometheusrule.yaml b/assets/monitoring/prometheus/v1/recording.prometheusrule.yaml
index 9b060674808..a81dbb267a3 100644
--- a/assets/monitoring/prometheus/v1/recording.prometheusrule.yaml
+++ b/assets/monitoring/prometheus/v1/recording.prometheusrule.yaml
@@ -5,285 +5,501 @@ metadata:
labels:
scylla-operator.scylladb.com/scylladbmonitoring-name: "{{ .scyllaDBMonitoringName }}"
spec:
-{{`
+{{-`
groups:
- name: scylla.rules
rules:
- - alert: cqlNonPrepared
- expr: cql:non_prepared > 0
- for: 10s
- labels:
- severity: "info"
- advisor: "cqlOptimization"
- dashboard: "cql"
- description: 'Some queries are non-prepared'
- summary: non prepared statments
- - alert: cql:non_paged_no_system
- expr: cql:non_paged > 0
- for: 10s
- labels:
- severity: "info"
- advisor: "cqlOptimization"
- dashboard: "cql"
- status: "1"
- description: 'Some SELECT queries are non-paged'
- summary: non paged statments
- - alert: cqlNoTokenAware
- expr: cql:non_token_aware > 0
- for: 10s
- labels:
- severity: "info"
- advisor: "cqlOptimization"
- dashboard: "cql"
- description: 'Some queries are not token-aware'
- summary: non token aware statments
- - alert: cqlAllowFiltering
- expr: cql:allow_filtering > 0
- for: 10s
- labels:
- severity: "info"
- advisor: "cqlOptimization"
- dashboard: "cql"
- description: 'Some queries use ALLOW FILTERING'
- summary: Allow filtering queries
- - alert: cqlCLAny
- expr: cql:any_queries > 0
- for: 10s
- labels:
- severity: "info"
- advisor: "cqlOptimization"
- dashboard: "cql"
- description: 'Some queries use Consistency Level: ANY'
- summary: non prepared statments
- - alert: cqlCLAll
- expr: cql:all_queries > 0
- for: 10s
- labels:
- severity: "info"
- advisor: "cqlOptimization"
- dashboard: "cql"
- description: 'Some queries use Consistency Level: ALL'
- summary: non prepared statments
- - alert: nonBalancedcqlTraffic
- expr: abs(rate(scylla_cql_updates{conditional="no"}[1m]) - scalar(avg(rate(scylla_cql_updates{conditional="no"}[1m]))))/scalar(stddev(rate(scylla_cql_updates{conditional="no"}[1m]))+100) > 2
- for: 3m
- labels:
- severity: "info"
- status: "1"
- advisor: "balanced"
- dashboard: "cql"
- description: 'CQL queries are not balanced among shards {{ $labels.instance }} shard {{ $labels.shard }}'
- summary: CQL queries are not balanced
- - alert: nodeLocalErrors
- expr: sum(errors:local_failed) by (cluster, instance) > 0
- for: 10s
- labels:
- severity: "info"
- advisor: "operationError"
- dashboard: "scylla-detailed"
- description: 'Some operation failed at the replica side'
- summary: Replica side Level error
- - alert: nodeIOErrors
- expr: sum(rate(scylla_reactor_aio_errors[60s])) by (cluster, instance) > 0
- for: 10s
- labels:
- severity: "info"
- advisor: "operationError"
- dashboard: "OS-master"
- description: 'IO Errors can indicate a node with a faulty disk {{ $labels.instance }}'
- summary: IO Disk Error
- - alert: nodeCLErrors
- expr: sum(errors:operation_unavailable) by (cluster) > 0
- for: 10s
- labels:
- severity: "info"
- advisor: "operationError"
- dashboard: "scylla-detailed"
- description: 'Some operation failed due to consistency level'
- summary: Consistency Level error
- - alert: preparedCacheEviction
- expr: sum(rate(scylla_cql_prepared_cache_evictions[2m])) by (cluster) + sum(rate(scylla_cql_authorized_prepared_statements_cache_evictions[2m])) by (cluster) > 100
- for: 5m
- labels:
- severity: "info"
- advisor: "preparedEviction"
- dashboard: "scylla-detailed"
- description: 'The prepared-statement cache is being continuously evicted, which could indicate a problem in your prepared-statement usage logic.'
- summary: Prepared cache eviction
- - alert: heavyCompaction
- expr: max(scylla_scheduler_shares{group="compaction"}) by (cluster) >= 1000
- for: 20m
- labels:
- severity: "info"
- advisor: "heavyCompaction"
- dashboard: "scylla-detailed"
- description: 'Compaction load increases to a level it can interfere with the system behaviour. If this persists set the compaction share to a static level.'
- summary: Heavy compaction load
- - alert: shedRequests
- expr: max(sum(rate(scylla_transport_requests_shed[60s])) by (instance,cluster)/sum(rate(scylla_transport_requests_served{}[60s])) by (instance, cluster)) by(cluster) > 0.01
- for: 5m
- labels:
- severity: "info"
- advisor: "systemOverload"
- dashboard: "scylla-detailed"
- description: 'More than 1% of the requests got shed, this is an indication of an overload, consider system resize.'
- summary: System is overloaded
- - alert: cappedTombstone
- expr: changes(scylla_sstables_capped_tombstone_deletion_time[1h]) > 0
- for: 1m
- labels:
- severity: "info"
- advisor: "cappedTombstone"
- dashboard: "scylla-detailed"
- description: 'Tombstone delete time was set too far in the future and was capped'
- summary: Tobmstone delete time is capped
- - alert: InstanceDown
- expr: up{job="scylla"} == 0
- for: 30s
- labels:
- severity: "warn"
- annotations:
- description: '{{ $labels.instance }} has been down for more than 30 seconds.'
- summary: Instance {{ $labels.instance }} down
- - alert: InstanceDown
- expr: sum(up{job="scylla"}>0)by(instance) unless sum(scylla_transport_requests_served{shard="0"}) by(instance)
- for: 1m
- labels:
- severity: "warn"
- annotations:
- description: '{{ $labels.instance }} instance is shutting down.'
- summary: Instance {{ $labels.instance }} down
- - alert: InstanceDown
- expr: scylla_node_operation_mode > 3
- for: 30s
- labels:
- severity: "warn"
- annotations:
- description: '{{ $labels.instance }} instance is shutting down.'
- summary: Instance {{ $labels.instance }} down
- - alert: DiskFull
- expr: node_filesystem_avail_bytes{mountpoint="/var/lib/scylla"} / node_filesystem_size_bytes{mountpoint="/var/lib/scylla"} * 100 < 35
- for: 30s
- labels:
- severity: "warn"
- annotations:
- description: '{{ $labels.instance }} has less than 35% free disk space.'
- summary: Instance {{ $labels.instance }} low disk space
- - alert: DiskFull
- expr: node_filesystem_avail_bytes{mountpoint="/var/lib/scylla"} / node_filesystem_size_bytes{mountpoint="/var/lib/scylla"} * 100 < 25
- for: 30s
- labels:
- severity: "error"
- annotations:
- description: '{{ $labels.instance }} has less than 25% free disk space.'
- summary: Instance {{ $labels.instance }} low disk space
- - alert: DiskFull
- expr: node_filesystem_avail_bytes{mountpoint="/var/lib/scylla"} / node_filesystem_size_bytes{mountpoint="/var/lib/scylla"} * 100 < 15
- for: 30s
- labels:
- severity: "critical"
- annotations:
- description: '{{ $labels.instance }} has less than 15% free disk space.'
- summary: Instance {{ $labels.instance }} low disk space
- - alert: DiskFull
- expr: node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"} * 100 < 20
- for: 30s
- labels:
- severity: "error"
- annotations:
- description: '{{ $labels.instance }} has less than 20% free disk space on the root partition.'
- summary: Instance {{ $labels.instance }} low disk space
- - alert: NoCql
- expr: scylla_manager_healthcheck_cql_status == -1
- for: 30s
- labels:
- severity: "warn"
- annotations:
- description: '{{ $labels.host }} has denied CQL connection for more than 30 seconds.'
- summary: Instance {{ $labels.host }} no CQL connection
- - alert: HighLatencies
- expr: wlatencyp95{by="instance"} > 100000
- for: 5m
- labels:
- severity: "info"
- annotations:
- description: '{{ $labels.instance }} has 95% high latency for more than 5 minutes.'
- summary: Instance {{ $labels.instance }} High Write Latency
- - alert: HighLatencies
- expr: wlatencya{by="instance"} >10000
- for: 5m
- labels:
- severity: "info"
- annotations:
- description: '{{ $labels.instance }} has average high latency for more than 5 minutes.'
- summary: Instance {{ $labels.instance }} High Write Latency
- - alert: HighLatencies
- expr: rlatencyp95{by="instance"} > 100000
- for: 5m
- labels:
- severity: "info"
- annotations:
- description: '{{ $labels.instance }} has 95% high latency for more than 5 minutes.'
- summary: Instance {{ $labels.instance }} High Read Latency
- - alert: HighLatencies
- expr: rlatencya{by="instance"} >10000
- for: 5m
- labels:
- severity: "info"
- annotations:
- description: '{{ $labels.instance }} has average high latency for more than 5 minutes.'
- summary: Instance {{ $labels.instance }} High Read Latency
- - alert: BackupFailed
- expr: (sum(scylla_manager_scheduler_run_total{type=~"backup", status="ERROR"}) or vector(0)) - (sum(scylla_manager_scheduler_run_total{type=~"backup", status="ERROR"} offset 3m) or vector(0)) > 0
- for: 10s
- labels:
- severity: "info"
- annotations:
- description: 'Backup failed'
- summary: Backup task failed
- - alert: RepairFailed
- expr: (sum(scylla_manager_scheduler_run_total{type=~"repair", status="ERROR"}) or vector(0)) - (sum(scylla_manager_scheduler_run_total{type=~"repair", status="ERROR"} offset 3m) or vector(0)) > 0
- for: 10s
- labels:
- severity: "info"
- annotations:
- description: 'Repair failed'
- summary: Repair task failed
- - alert: restart
- expr: resets(scylla_gossip_heart_beat[1h])>0
- for: 10s
- labels:
- severity: "info"
- annotations:
- description: 'Node restarted'
- summary: Instance {{ $labels.instance }} restarted
- - alert: oomKill
- expr: changes(node_vmstat_oom_kill[1h])>0
- for: 10s
- labels:
- severity: "warn"
- annotations:
- description: 'OOM Kill on {{ $labels.instance }}'
- summary: A process was terminated on Instance {{ $labels.instance }}
- - alert: tooManyFiles
- expr: (node_filesystem_files - node_filesystem_files_free) / on(instance) group_left count(scylla_reactor_cpu_busy_ms) by (instance)>20000
- for: 10s
- labels:
- severity: "info"
- description: 'Over 20k open files per shard {{ $labels.instance }}'
- summary: There are over 20K open files per shard on Insace {{ $labels.instance }}
- - alert: tooManyFiles
- expr: (node_filesystem_files - node_filesystem_files_free) / on(instance) group_left count(scylla_reactor_cpu_busy_ms) by (instance)>30000
- for: 10s
- labels:
- severity: "warn"
- description: 'Over 30k open files per shard {{ $labels.instance }}'
- summary: There are over 30K open files per shard on Insace {{ $labels.instance }}
- - alert: tooManyFiles
- expr: (node_filesystem_files - node_filesystem_files_free) / on(instance) group_left count(scylla_reactor_cpu_busy_ms) by (instance)>40000
- for: 10s
- labels:
- severity: "error"
- description: 'Over 40k open files per shard {{ $labels.instance }}'
- summary: There are over 40K open files per shard on Insace {{ $labels.instance }}
-`}}
+ - record: cql:all_shardrate1m
+ expr: sum(rate(scylla_cql_reads[60s])) by (cluster, dc, instance, shard) + sum(rate(scylla_cql_inserts[60s]) ) by (cluster, dc, instance, shard) + sum( rate(scylla_cql_updates[60s]) ) by (cluster, dc, instance, shard) + sum( rate(scylla_cql_deletes[60s])) by (cluster, dc, instance, shard)
+ - record: cql:all_system_shardrate1m
+ expr: sum(rate(scylla_cql_reads_per_ks{ks="system"}[60s])) by (cluster, dc, instance, shard) + sum(rate(scylla_cql_inserts_per_ks{ks="system"}[60s]) ) by (cluster, dc, instance, shard) + sum( rate(scylla_cql_updates_per_ks{ks="system"}[60s]) ) by (cluster, dc, instance, shard) + sum( rate(scylla_cql_deletes_per_ks{ks="system"}[60s])) by (cluster, dc, instance, shard)
+ - record: cql:local_shardrate1m
+ expr: sum(rate(scylla_storage_proxy_coordinator_reads_local_node[60s])) by (cluster, dc, instance, shard) + sum(rate(scylla_storage_proxy_coordinator_total_write_attempts_local_node[60s]) ) by (cluster, dc, instance, shard)
+ - record: cql:all_rate1m
+ expr: sum(cql:all_shardrate1m) by (cluster, dc, instance)
+ - record: cql:non_token_aware
+ expr: (sum(cql:all_rate1m) by (cluster) >bool 100) * clamp_min(1-(sum(cql:local_shardrate1m) by (cluster) / sum(cql:all_rate1m) by (cluster)), 0)
+ - record: cql:non_system_prepared1m
+ expr: clamp_min(sum(rate(scylla_query_processor_statements_prepared[1m])) by (cluster, dc, instance, shard) - cql:all_system_shardrate1m, 0)
+ - record: cql:non_prepared
+ expr: (sum(cql:non_system_prepared1m) by (cluster) >bool 100) * (sum(cql:non_system_prepared1m) by (cluster) / clamp_min(sum(cql:all_rate1m) by (cluster)- sum(cql:all_system_shardrate1m) by (cluster), 0.001))
+ - record: cql:non_paged_no_system1m
+ expr: clamp_min(sum(rate(scylla_cql_unpaged_select_queries[60s])) by (cluster, dc, instance) - sum(rate(scylla_cql_unpaged_select_queries_per_ks{ks="system"}[60s])) by (cluster, dc, instance), 0)
+ - record: cql:non_paged_no_system
+ expr: (sum(cql:non_paged_no_system1m) by (cluster, dc, instance) >bool 100) * sum(cql:non_paged_no_system) by (cluster, dc, instance)/clamp_min(sum(rate(scylla_cql_reads[60s]))by (cluster, dc, instance) - sum(rate(scylla_cql_unpaged_select_queries_per_ks{ks="system"}[60s])) by (cluster, dc, instance), 0.01)
+ - record: cql:non_paged
+ expr: (sum(cql:non_paged_no_system1m) by (cluster) >bool 100) * sum(cql:non_paged_no_system1m) by (cluster)/clamp_min(sum(rate(scylla_cql_reads[60s]))by (cluster) - sum(rate(scylla_cql_unpaged_select_queries_per_ks{ks="system"}[60s])) by (cluster), 0.01)
+ - record: cql:reverse_queries
+ expr: sum(rate(scylla_cql_reverse_queries[60s])) by (cluster)/ sum(rate(scylla_cql_reads[60s])) by (cluster)
+ - record: cql:allow_filtering
+ expr: sum(rate(scylla_cql_filtered_read_requests[60s])) by (cluster)/ sum(rate(scylla_cql_reads[60s])) by (cluster)
+ - record: cql:any_queries
+ expr: sum(rate(scylla_query_processor_queries{consistency_level="ANY"}[60s])) by (cluster) >bool 0
+ - record: cql:all_queries
+ expr: sum(rate(scylla_query_processor_queries{consistency_level="ALL"}[60s])) by (cluster) >bool 0
+ - record: errors:operation_unavailable
+ expr: sum(rate(scylla_storage_proxy_coordinator_read_unavailable[60s])) by (cluster, dc, instance) + sum(rate(scylla_storage_proxy_coordinator_write_unavailable[60s])) by (cluster, dc, instance) + sum(rate(scylla_storage_proxy_coordinator_range_unavailable[60s])) by (cluster, dc, instance)
+ - record: errors:local_failed
+ expr: sum(rate(scylla_storage_proxy_coordinator_read_errors_local_node[60s])) by (cluster, dc, instance) + sum(rate(scylla_storage_proxy_coordinator_write_errors_local_node[60s])) by (cluster, dc, instance)
+ - record: errors:nodes_total
+ expr: errors:local_failed + errors:operation_unavailable
+ - record: manager:repair_done_ts
+ expr: timestamp(sum(changes(scylla_manager_scheduler_run_total{status="DONE",type="repair"}[2m])) by (cluster) > 0) or on(cluster) manager:repair_done_ts
+ - record: manager:backup_done_ts
+ expr: timestamp(sum(changes(scylla_manager_scheduler_run_total{status="DONE",type="backup"}[2m])) by (cluster) > 0) or on(cluster) manager:backup_done_ts
+ - record: manager:repair_fail_ts
+ expr: timestamp(sum(changes(scylla_manager_scheduler_run_total{status="ERROR",type="repair"}[2m])) by (cluster) > 0) or on(cluster) manager:repair_fail_ts
+ - record: manager:backup_fail_ts
+ expr: timestamp(sum(changes(scylla_manager_scheduler_run_total{status="ERROR",type="backup"}[2m])) by (cluster) > 0) or on(cluster) manager:backup_fail_ts
+ - record: manager:repair_progress
+ expr: (max(scylla_manager_scheduler_run_indicator{type="repair"}) by (cluster) >bool 0)*((max(scylla_manager_repair_token_ranges_total) by(cluster)<= 0)*0 or on(cluster) (sum(scylla_manager_repair_token_ranges_success>=0) by (cluster) + sum(scylla_manager_repair_token_ranges_error>=0) by (cluster))/sum(scylla_manager_repair_token_ranges_total>=0) by (cluster))
+ - record: scylla_manager_repair_progress
+ expr: sum(manager:repair_progress) by (cluster)
+ labels:
+ level: "1"
+ dd: "1"
+ by: "cluster"
+ - record: manager:backup_progress
+ expr: (max(scylla_manager_scheduler_run_indicator{type="backup"}) by (cluster) >bool 0)*((max(scylla_manager_backup_files_size_bytes) by(cluster)<= 0)*0 or on(cluster) (sum(scylla_manager_backup_files_uploaded_bytes) by (cluster) + sum(scylla_manager_backup_files_skipped_bytes) by (cluster) + sum(scylla_manager_backup_files_failed_bytes)by(cluster))/sum(scylla_manager_backup_files_size_bytes>=0) by (cluster))
+ - record: scylla_manager_backup_progress
+ expr: sum(manager:backup_progress) by (cluster)
+ labels:
+ level: "1"
+ dd: "1"
+ by: "cluster"
+ - record: wlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{shard=~".+", scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, scheduling_group_name, le))
+ labels:
+ by: "instance,shard"
+ level: "2"
+ dd: "2"
+ - record: wlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, scheduling_group_name, le))
+ labels:
+ by: "instance"
+ level: "1"
+ dd: "1"
+ - record: wlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, scheduling_group_name, le))
+ labels:
+ by: "dc"
+ level: "1"
+ dd: "1"
+ - record: wlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, scheduling_group_name, le))
+ labels:
+ by: "cluster"
+ level: "1"
+ dd: "1"
+ - record: rlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{shard=~".+", scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, scheduling_group_name, le))
+ labels:
+ by: "instance,shard"
+ level: "2"
+ dd: "2"
+ - record: rlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, scheduling_group_name, le))
+ labels:
+ by: "instance"
+ level: "1"
+ dd: "1"
+ - record: rlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, scheduling_group_name, le))
+ labels:
+ by: "dc"
+ level: "1"
+ dd: "1"
+ - record: rlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, scheduling_group_name, le))
+ labels:
+ by: "cluster"
+ level: "1"
+ dd: "1"
+ - record: wlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{shard=~".+", scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, scheduling_group_name, le))
+ labels:
+ by: "instance,shard"
+ level: "2"
+ dd: "2"
+ - record: wlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, scheduling_group_name, le))
+ labels:
+ by: "instance"
+ level: "1"
+ dd: "1"
+ - record: wlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, scheduling_group_name, le))
+ labels:
+ by: "dc"
+ level: "1"
+ dd: "1"
+ - record: wlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, scheduling_group_name, le))
+ labels:
+ by: "cluster"
+ level: "1"
+ dd: "1"
+ - record: rlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{shard=~".+", scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, scheduling_group_name, le))
+ labels:
+ by: "instance,shard"
+ level: "2"
+ dd: "2"
+ - record: rlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, scheduling_group_name, le))
+ labels:
+ by: "instance"
+ level: "1"
+ dd: "1"
+ - record: rlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, scheduling_group_name, le))
+ labels:
+ by: "dc"
+ level: "1"
+ dd: "1"
+ - record: rlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, scheduling_group_name, le))
+ labels:
+ by: "cluster"
+ level: "1"
+ dd: "1"
+ - record: wlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{shard=~".+", scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, scheduling_group_name, le))
+ labels:
+ by: "instance,shard"
+ level: "2"
+ dd: "2"
+ - record: wlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, scheduling_group_name, le))
+ labels:
+ by: "instance"
+ level: "1"
+ dd: "1"
+ - record: wlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, scheduling_group_name, le))
+ labels:
+ by: "dc"
+ level: "1"
+ dd: "1"
+ - record: wlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, scheduling_group_name, le))
+ labels:
+ by: "cluster"
+ level: "1"
+ dd: "1"
+ - record: rlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{shard=~".+", scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, scheduling_group_name, le))
+ labels:
+ by: "instance,shard"
+ level: "2"
+ dd: "2"
+ - record: rlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, scheduling_group_name, le))
+ labels:
+ by: "instance"
+ level: "1"
+ dd: "1"
+ - record: rlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, scheduling_group_name, le))
+ labels:
+ by: "dc"
+ level: "1"
+ dd: "1"
+ - record: rlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, scheduling_group_name, le))
+ labels:
+ by: "cluster"
+ level: "1"
+ dd: "1"
+ - record: wlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{shard=~".+", scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, scheduling_group_name, le))
+ labels:
+ by: "instance,shard"
+ - record: wlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, scheduling_group_name, le))
+ labels:
+ by: "instance"
+
+ - record: wlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, scheduling_group_name, le))
+ labels:
+ by: "dc"
+ - record: wlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, scheduling_group_name, le))
+ labels:
+ by: "cluster"
+ - record: rlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{shard=~".+", scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, scheduling_group_name, le))
+ labels:
+ by: "instance,shard"
+ - record: rlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, scheduling_group_name, le))
+ labels:
+ by: "instance"
+ - record: rlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, scheduling_group_name, le))
+ labels:
+ by: "dc"
+ - record: rlatencyp99
+ expr: histogram_quantile(0.99, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, scheduling_group_name, le))
+ labels:
+ by: "cluster"
+ - record: wlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{shard=~".+", scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, scheduling_group_name, le))
+ labels:
+ by: "instance,shard"
+ - record: wlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, scheduling_group_name, le))
+ labels:
+ by: "instance"
+ - record: wlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, scheduling_group_name, le))
+ labels:
+ by: "dc"
+ - record: wlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, scheduling_group_name, le))
+ labels:
+ by: "cluster"
+ - record: rlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{shard=~".+", scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, scheduling_group_name, le))
+ labels:
+ by: "instance,shard"
+ - record: rlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, scheduling_group_name, le))
+ labels:
+ by: "instance"
+ - record: rlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, scheduling_group_name, le))
+ labels:
+ by: "dc"
+ - record: rlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, scheduling_group_name, le))
+ labels:
+ by: "cluster"
+ - record: wlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{shard=~".+", scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, scheduling_group_name, le))
+ labels:
+ by: "instance,shard"
+ - record: wlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, scheduling_group_name, le))
+ labels:
+ by: "instance"
+ - record: wlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, scheduling_group_name, le))
+ labels:
+ by: "dc"
+ - record: wlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_write_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, scheduling_group_name, le))
+ labels:
+ by: "cluster"
+ - record: rlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{shard=~".+", scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, scheduling_group_name, le))
+ labels:
+ by: "instance,shard"
+ - record: rlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, scheduling_group_name, le))
+ labels:
+ by: "instance"
+ - record: rlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, scheduling_group_name, le))
+ labels:
+ by: "dc"
+ - record: rlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_read_latency_bucket{scheduling_group_name=~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, scheduling_group_name, le))
+ labels:
+ by: "cluster"
+ - record: casrlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_cas_read_latency_bucket{shard=~".+", scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, le, scheduling_group_name))
+ labels:
+ by: "instance,shard"
+ level: "2"
+ dd: "2"
+ - record: casrlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_cas_read_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, le, scheduling_group_name))
+ labels:
+ by: "instance"
+ level: "1"
+ dd: "1"
+ - record: casrlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_cas_read_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, le, scheduling_group_name))
+ labels:
+ by: "dc"
+ level: "1"
+ dd: "1"
+ - record: casrlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_cas_read_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, le, scheduling_group_name))
+ labels:
+ by: "cluster"
+ level: "1"
+ dd: "1"
+ - record: caswlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_cas_write_latency_bucket{shard=~".+", scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, le, scheduling_group_name))
+ labels:
+ by: "instance,shard"
+ level: "2"
+ dd: "2"
+ - record: caswlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_cas_write_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, le, scheduling_group_name))
+ labels:
+ by: "instance"
+ level: "1"
+ dd: "1"
+ - record: caswlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_cas_write_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, le, scheduling_group_name))
+ labels:
+ by: "dc"
+ level: "1"
+ dd: "1"
+ - record: caswlatencyp95
+ expr: histogram_quantile(0.95, sum(rate(scylla_storage_proxy_coordinator_cas_write_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, le, scheduling_group_name))
+ labels:
+ by: "cluster"
+ level: "1"
+ dd: "1"
+ - record: casrlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_cas_read_latency_bucket{shard=~".+", scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, le, scheduling_group_name))
+ labels:
+ by: "instance,shard"
+ level: "2"
+ dd: "2"
+ - record: casrlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_cas_read_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, le, scheduling_group_name))
+ labels:
+ by: "instance"
+ level: "1"
+ dd: "1"
+ - record: casrlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_cas_read_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, le, scheduling_group_name))
+ labels:
+ by: "dc"
+ level: "1"
+ dd: "1"
+ - record: casrlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_cas_read_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, le, scheduling_group_name))
+ labels:
+ by: "cluster"
+ level: "1"
+ dd: "1"
+ - record: caswlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_cas_write_latency_bucket{shard=~".+", scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, shard, le, scheduling_group_name))
+ labels:
+ by: "instance,shard"
+ level: "2"
+ dd: "2"
+ - record: caswlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_cas_write_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, instance, le, scheduling_group_name))
+ labels:
+ by: "instance"
+ level: "1"
+ dd: "1"
+ - record: caswlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_cas_write_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, dc, le, scheduling_group_name))
+ labels:
+ by: "dc"
+ level: "1"
+ dd: "1"
+ - record: caswlatencya
+ expr: histogram_quantile(0.5, sum(rate(scylla_storage_proxy_coordinator_cas_write_latency_bucket{scheduling_group_name!~"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache"}[60s])) by (cluster, le, scheduling_group_name))
+ labels:
+ by: "cluster"
+ level: "1"
+ dd: "1"
+ - record: all_scheduling_group
+ expr: sum by (cluster, scheduling_group_name) (scylla_storage_proxy_coordinator_write_latency_count + scylla_storage_proxy_coordinator_read_latency_count) > 0
+ - record: wlatencyp99ks
+ expr: histogram_quantile(0.99, sum(rate(scylla_column_family_write_latency_bucket{}[60s])) by (cluster, dc, instance, shard, le,ks,cf))
+ labels:
+ by: "instance,shard"
+ - record: wlatencyp99ks
+ expr: histogram_quantile(0.99, sum(rate(scylla_column_family_write_latency_bucket{}[60s])) by (cluster, dc, instance, le,ks,cf))
+ labels:
+ by: "instance"
+ - record: wlatencyp99ks
+ expr: histogram_quantile(0.99, sum(rate(scylla_column_family_write_latency_bucket{}[60s])) by (cluster, dc, le,ks,cf))
+ labels:
+ by: "dc"
+ - record: wlatencyp99ks
+ expr: histogram_quantile(0.99, sum(rate(scylla_column_family_write_latency_bucket{}[60s])) by (cluster, le,ks,cf))
+ labels:
+ by: "cluster"
+ - record: rlatencyp99ks
+ expr: histogram_quantile(0.99, sum(rate(scylla_column_family_read_latency_bucket{}[60s])) by (cluster, dc, instance, shard, le,ks,cf))
+ labels:
+ by: "instance,shard"
+ - record: rlatencyp99ks
+ expr: histogram_quantile(0.99, sum(rate(scylla_column_family_read_latency_bucket{}[60s])) by (cluster, dc, instance, le,ks,cf))
+ labels:
+ by: "instance"
+ - record: rlatencyp99ks
+ expr: histogram_quantile(0.99, sum(rate(scylla_column_family_read_latency_bucket{}[60s])) by (cluster, dc, le,ks,cf))
+ labels:
+ by: "dc"
+ - record: rlatencyp99ks
+ expr: histogram_quantile(0.99, sum(rate(scylla_column_family_read_latency_bucket{}[60s])) by (cluster, le,ks,cf))
+ labels:
+ by: "cluster"
+ - record: wlatencyp95ks
+ expr: histogram_quantile(0.95, sum(rate(scylla_column_family_write_latency_bucket{}[60s])) by (cluster, dc, instance, shard, le,ks,cf))
+ labels:
+ by: "instance,shard"
+ - record: wlatencyp95ks
+ expr: histogram_quantile(0.95, sum(rate(scylla_column_family_write_latency_bucket{}[60s])) by (cluster, dc, instance, le,ks,cf))
+ labels:
+ by: "instance"
+ - record: wlatencyp95ks
+ expr: histogram_quantile(0.95, sum(rate(scylla_column_family_write_latency_bucket{}[60s])) by (cluster, dc, le,ks,cf))
+ labels:
+ by: "dc"
+ - record: wlatencyp95ks
+ expr: histogram_quantile(0.95, sum(rate(scylla_column_family_write_latency_bucket{}[60s])) by (cluster, le,ks,cf))
+ labels:
+ by: "cluster"
+ - record: rlatencyp95ks
+ expr: histogram_quantile(0.95, sum(rate(scylla_column_family_read_latency_bucket{}[60s])) by (cluster, dc, instance, shard, le,ks,cf))
+ labels:
+ by: "instance,shard"
+ - record: rlatencyp95ks
+ expr: histogram_quantile(0.95, sum(rate(scylla_column_family_read_latency_bucket{}[60s])) by (cluster, dc, instance, le,ks,cf))
+ labels:
+ by: "instance"
+ - record: rlatencyp95ks
+ expr: histogram_quantile(0.95, sum(rate(scylla_column_family_read_latency_bucket{}[60s])) by (cluster, dc, le,ks,cf))
+ labels:
+ by: "dc"
+ - record: rlatencyp95ks
+ expr: histogram_quantile(0.95, sum(rate(scylla_column_family_read_latency_bucket{}[60s])) by (cluster, le,ks,cf))
+ labels:
+ by: "cluster"
+ - record: wlatencyaks
+ expr: sum(rate(scylla_column_family_write_latency_sum{}[60s])) by (cluster, dc, instance, shard,ks,cf)/sum(rate(scylla_column_family_write_latency_count{}[60s])) by (cluster, dc, instance, shard,ks,cf)
+ labels:
+ by: "instance,shard"
+ - record: wlatencyaks
+ expr: sum(rate(scylla_column_family_write_latency_sum{}[60s])) by (cluster, dc, instance,ks,cf)/sum(rate(scylla_column_family_write_latency_count{}[60s])) by (cluster, dc, instance,ks,cf)
+ labels:
+ by: "instance"
+ - record: wlatencyaks
+ expr: sum(rate(scylla_column_family_write_latency_sum{}[60s])) by (cluster, dc,ks,cf)/sum(rate(scylla_column_family_write_latency_count{}[60s])) by (cluster, dc,ks,cf)
+ labels:
+ by: "dc"
+ - record: wlatencyaks
+ expr: sum(rate(scylla_column_family_write_latency_sum{}[60s])) by (cluster,ks,cf)/sum(rate(scylla_column_family_write_latency_count{}[60s])) by (cluster,ks,cf)
+ labels:
+ by: "cluster"
+ - record: rlatencyaks
+ expr: sum(rate(scylla_column_family_read_latency_sum{}[60s])) by (cluster, dc, instance, shard,ks,cf)/sum(rate(scylla_column_family_read_latency_count{}[60s])) by (cluster, dc, instance, shard,ks,cf)
+ labels:
+ by: "instance,shard"
+ - record: rlatencyaks
+ expr: sum(rate(scylla_column_family_read_latency_sum{}[60s])) by (cluster, dc, instance,ks,cf)/sum(rate(scylla_column_family_read_latency_count{}[60s])) by (cluster, dc, instance,ks,cf)
+ labels:
+ by: "instance"
+ - record: rlatencyaks
+ expr: sum(rate(scylla_column_family_read_latency_sum{}[60s])) by (cluster, dc,ks,cf)/sum(rate(scylla_column_family_read_latency_count{}[60s])) by (cluster, dc,ks,cf)
+ labels:
+ by: "dc"
+ - record: rlatencyaks
+ expr: sum(rate(scylla_column_family_read_latency_sum{}[60s])) by (cluster,ks,cf)/sum(rate(scylla_column_family_read_latency_count{}[60s])) by (cluster,ks,cf)
+ labels:
+ by: "cluster"
+`-}}
diff --git a/pkg/controller/scylladbmonitoring/sync_grafana.go b/pkg/controller/scylladbmonitoring/sync_grafana.go
index 77f8707733f..a8fc1a34432 100644
--- a/pkg/controller/scylladbmonitoring/sync_grafana.go
+++ b/pkg/controller/scylladbmonitoring/sync_grafana.go
@@ -147,18 +147,20 @@ func makeGrafanaDashboards(sm *scyllav1alpha1.ScyllaDBMonitoring) (*corev1.Confi
t = *sm.Spec.Type
}
+ var dashboards map[string][]byte
switch t {
case scyllav1alpha1.ScyllaDBMonitoringTypePlatform:
- return grafanav1alpha1assets.GrafanaDashboardsPlatformConfigMapTemplate.RenderObject(map[string]any{
- "scyllaDBMonitoringName": sm.Name,
- })
+ dashboards = grafanav1alpha1assets.GrafanaDashboardsPlatform
case scyllav1alpha1.ScyllaDBMonitoringTypeSAAS:
- return grafanav1alpha1assets.GrafanaDashboardsSAASConfigMapTemplate.RenderObject(map[string]any{
- "scyllaDBMonitoringName": sm.Name,
- })
+ dashboards = grafanav1alpha1assets.GrafanaDashboardsSAAS
default:
return nil, "", fmt.Errorf("unkown monitoring type: %q", t)
}
+
+ return grafanav1alpha1assets.GrafanaDashboardsConfigMapTemplate.RenderObject(map[string]any{
+ "scyllaDBMonitoringName": sm.Name,
+ "dashboards": dashboards,
+ })
}
func makeGrafanaProvisionings(sm *scyllav1alpha1.ScyllaDBMonitoring) (*corev1.ConfigMap, string, error) {
diff --git a/pkg/controller/scylladbmonitoring/sync_grafana_test.go b/pkg/controller/scylladbmonitoring/sync_grafana_test.go
index d52160bca15..4d5ba5fc426 100644
--- a/pkg/controller/scylladbmonitoring/sync_grafana_test.go
+++ b/pkg/controller/scylladbmonitoring/sync_grafana_test.go
@@ -1,13 +1,18 @@
package scylladbmonitoring
import (
+ "crypto/sha512"
+ "encoding/base64"
"reflect"
"strings"
"testing"
"github.com/google/go-cmp/cmp"
scyllav1alpha1 "github.com/scylladb/scylla-operator/pkg/api/scylla/v1alpha1"
+ "github.com/scylladb/scylla-operator/pkg/pointer"
+ "github.com/scylladb/scylla-operator/pkg/scheme"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
)
func Test_makeGrafanaIngress(t *testing.T) {
@@ -125,3 +130,89 @@ spec:
})
}
}
+
+func Test_makeGrafanaDashboards(t *testing.T) {
+ tt := []struct {
+ name string
+ sm *scyllav1alpha1.ScyllaDBMonitoring
+ expectedString string
+ expectedErr error
+ }{
+ {
+ name: "renders data for default SaaS type",
+ sm: &scyllav1alpha1.ScyllaDBMonitoring{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "sm-name",
+ },
+ Spec: scyllav1alpha1.ScyllaDBMonitoringSpec{
+ Type: nil,
+ },
+ },
+ expectedString: strings.TrimLeft(`
+apiVersion: v1
+data:
+ scylla-saas.template.json: T18i6tRKmd6smL1Fm4iHOjB2xPDPwpHVKS4wrw==
+kind: ConfigMap
+metadata:
+ creationTimestamp: null
+ name: sm-name-grafana-scylladb-dashboards
+`, "\n"),
+ expectedErr: nil,
+ },
+ {
+ name: "renders data for platform type",
+ sm: &scyllav1alpha1.ScyllaDBMonitoring{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "sm-name",
+ },
+ Spec: scyllav1alpha1.ScyllaDBMonitoringSpec{
+ Type: pointer.Ptr(scyllav1alpha1.ScyllaDBMonitoringTypePlatform),
+ },
+ },
+ expectedString: strings.TrimLeft(`
+apiVersion: v1
+data:
+ alternator.template.json: WE2o3/jcBu0LR/Bj38f9WoeiLFmApRZtlxyiVw==
+ scylla-advanced.template.json: g6WNXOpTxSQSotbyW7iQGtg912Wyd0iDOtd6zw==
+ scylla-cql.template.json: A2M03naWWLAgMog9v9LTMVF/DwOERgkpmkDfRg==
+ scylla-detailed.template.json: wYCu+KYu+EuZ7LJB881YZB7SSOOCmAhcLU8JcQ==
+ scylla-ks.template.json: SkiMoLQRA1kzoq7MWUOk4+UWWnuWF6nc7t5ekA==
+ scylla-manager.template.json: fxCrzhfsl0Qm3yMAtJp52KjrcbrOT/BEcbJcOg==
+ scylla-os.template.json: NrK2sGRFLF9nlXmUb02xI/bfDHlQHFiIRgQaVw==
+ scylla-overview.template.json: VGwTPpiOqs8d+0gcZlyzX+u/JBPFvgbsu6UDtg==
+kind: ConfigMap
+metadata:
+ creationTimestamp: null
+ name: sm-name-grafana-scylladb-dashboards
+`, "\n"),
+ expectedErr: nil,
+ },
+ }
+ for _, tc := range tt {
+ t.Run(tc.name, func(t *testing.T) {
+ cm, _, err := makeGrafanaDashboards(tc.sm)
+ if !reflect.DeepEqual(err, tc.expectedErr) {
+ t.Errorf("expected and got errors differ:\n%s", cmp.Diff(tc.expectedErr, err))
+ }
+
+ // To avoid embedding thousands of lines we hash the value.
+ for k, v := range cm.Data {
+ s := sha512.Sum512_224([]byte(v))
+ cm.Data[k] = base64.StdEncoding.EncodeToString(s[:])
+ }
+
+ objStringBytes, err := runtime.Encode(scheme.DefaultYamlSerializer, cm)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ objString := string(objStringBytes)
+ if objString != tc.expectedString {
+ t.Errorf("expected and got strings differ:\n%s", cmp.Diff(
+ strings.Split(tc.expectedString, "\n"),
+ strings.Split(objString, "\n"),
+ ))
+ }
+ })
+ }
+}
diff --git a/pkg/controller/scylladbmonitoring/sync_prometheus_test.go b/pkg/controller/scylladbmonitoring/sync_prometheus_test.go
index 3cf6168fa3e..b5a6a3d5c11 100644
--- a/pkg/controller/scylladbmonitoring/sync_prometheus_test.go
+++ b/pkg/controller/scylladbmonitoring/sync_prometheus_test.go
@@ -248,6 +248,7 @@ kind: Prometheus
metadata:
name: "sm-name"
spec:
+ version: "v2.45.1"
serviceAccountName: "sm-name-prometheus"
securityContext:
runAsNonRoot: true
@@ -323,6 +324,7 @@ kind: Prometheus
metadata:
name: "sm-name"
spec:
+ version: "v2.45.1"
serviceAccountName: "sm-name-prometheus"
securityContext:
runAsNonRoot: true