-
Notifications
You must be signed in to change notification settings - Fork 146
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
(cherry picked from commit 9daecce)
- Loading branch information
Showing
8 changed files
with
4,628 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
{ | ||
"dashboard": { | ||
"class": "dashboard", | ||
"uid": "cpu-3-2", | ||
"rows": [ | ||
{ | ||
"class": "logo_row" | ||
}, | ||
{ | ||
"class": "row", | ||
"panels": [ | ||
{ | ||
"class": "percent_panel", | ||
"targets": [ | ||
{ | ||
"expr": "avg(scylla_reactor_utilization{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"} ) by ([[by]])", | ||
"intervalFactor": 1, | ||
"legendFormat": "", | ||
"refId": "A", | ||
"step": 1 | ||
} | ||
], | ||
"title": "CPU Utilization per [[by]]", | ||
"description" : "the percentage of the time during which the CPU is utilized by Scylla. Note that because Scylla does busy polling for some time before going idle, CPU utilization as seen by the operating system may be much higher. Your system is not yet CPU-bottlenecked until this metric is high" | ||
}, | ||
{ | ||
"class": "percent_panel", | ||
"pointradius": 1, | ||
"targets": [ | ||
{ | ||
"expr": "avg(irate(scylla_scheduler_runtime_ms{group=\"main\",instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[60s])) by ([[by]])/10 + avg(irate(scylla_scheduler_runtime_ms{group=\"statement\",instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[60s])) by ([[by]])/10", | ||
"intervalFactor": 1, | ||
"legendFormat": "", | ||
"metric": "", | ||
"refId": "A", | ||
"step": 30 | ||
} | ||
], | ||
"title": "Foreground CPU Utilization by [[by]]", | ||
"description": "Time spent handling foreground requests (like reads, writes, and some system tasks). The remaining time is either idle, or used by background load like compactions and repairs. Background load in Scylla is opportunistic: background requests will try to use all resources available to complete as fast as possible and rely on the schedulers to provide isolation. This graph is better understood in conjunction with the main CPU load graph. For example, if there are spikes in CPU load that are not present in this graph, that indicates that the foreground load itself is stable." | ||
} | ||
] | ||
}, | ||
{ | ||
"class": "row", | ||
"height": "25px", | ||
"gridPos": {"h": 2}, | ||
"panels": [ | ||
{ | ||
"class": "plain_text", | ||
"content": "<h1 style=\"color:#5881c2; border-bottom: 3px solid #5881c2;\">Task Quota Violation</h1>" | ||
} | ||
], | ||
"title": "New row" | ||
}, | ||
{ | ||
"class": "row", | ||
"panels": [ | ||
{ | ||
"class": "ms_panel", | ||
"repeat": "group", | ||
"span":3, | ||
"pointradius": 1, | ||
"targets": [ | ||
{ | ||
"expr": "sum(irate(scylla_scheduler_time_spent_on_task_quota_violations_ms{instance=~\"[[node]]\" ,cluster=~\"$cluster|$^\", dc=~\"$dc\",group=~\"$group\", shard=~\"[[shard]]\"}[60s])) by ([[by]])", | ||
"intervalFactor": 1, | ||
"legendFormat": "", | ||
"metric": "", | ||
"refId": "A", | ||
"step": 30 | ||
} | ||
], | ||
"title": "Time spent in task quota violations by [[by]] - $group", | ||
"description": "Scylla employs an event-loop like reactor that alternates between the execution of different groups of tasks periodically. The maximum amount of time during which a task group can run is called the \"task quota\". Some task groups may disrespect that and run for longer. This may cause latency issues" | ||
} | ||
], | ||
"title": "New row" | ||
} | ||
], | ||
"templating": { | ||
"list": [ | ||
{ | ||
"class":"by_template_var" | ||
}, | ||
{ | ||
"class": "template_variable_single", | ||
"label": "cluster", | ||
"name": "cluster", | ||
"query": "label_values(scylla_reactor_utilization, cluster)" | ||
}, | ||
{ | ||
"class": "template_variable_all", | ||
"label": "dc", | ||
"name": "dc", | ||
"query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)" | ||
}, | ||
{ | ||
"class": "template_variable_all", | ||
"label": "node", | ||
"name": "node", | ||
"query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)" | ||
}, | ||
{ | ||
"class": "template_variable_all", | ||
"label": "shard", | ||
"name": "shard", | ||
"query": "label_values(scylla_reactor_utilization,shard)", | ||
"sort": 3 | ||
} | ||
, | ||
{ | ||
"class": "template_variable_all", | ||
"label": "group", | ||
"name": "group", | ||
"hide": 2, | ||
"query": "label_values(scylla_scheduler_time_spent_on_task_quota_violations_ms,group)", | ||
"sort": 3 | ||
}, | ||
{ | ||
"class": "template_variable_custom", | ||
"current": { | ||
"text": "3.2", | ||
"value": "3.2" | ||
}, | ||
"name": "scylla_dash_version", | ||
"options": [ | ||
{ | ||
"selected": true, | ||
"text": "3.2", | ||
"value": "3.2" | ||
} | ||
], | ||
"query": "3.2" | ||
}, | ||
{ | ||
"class": "monitor_version_var" | ||
} | ||
] | ||
}, | ||
"tags": [ | ||
"3.2" | ||
], | ||
"time": { | ||
"from": "now-30m", | ||
"to": "now" | ||
}, | ||
"title": "CPU Metrics", | ||
"overwrite": true, | ||
"version": 5 | ||
} | ||
} |
Oops, something went wrong.