diff --git a/grafana/provisioning/dashboards-py/cadvisor.dashboard.py b/grafana/provisioning/dashboards-py/cadvisor.dashboard.py index b518e50..f75c109 100644 --- a/grafana/provisioning/dashboards-py/cadvisor.dashboard.py +++ b/grafana/provisioning/dashboards-py/cadvisor.dashboard.py @@ -1,14 +1,9 @@ -from grafanalib.core import ( - Dashboard, TimeSeries, GaugePanel, - Target, GridPos, - OPS_FORMAT, Templating, Template, REFRESH_ON_TIME_RANGE_CHANGE, Logs -) +from grafanalib.core import GridPos, Templating, Template, Logs from grafanalib.formatunits import BYTES_IEC, SECONDS, BYTES_SEC_IEC -from common import PROMETHEUS_DATASOURCE_NAME +from common import PROMETHEUS_DATASOURCE_NAME, MyDashboard, MyTimeSeries, PromTarget - -dashboard = Dashboard( +dashboard = MyDashboard( title='Cadvisor', uid='cadvisor', description='Container metrics collected by Cadvisor', @@ -24,7 +19,6 @@ query='label_values({__name__=~"container.*"}, container_label_com_docker_compose_project)', includeAll=True, multi=True, - refresh=REFRESH_ON_TIME_RANGE_CHANGE, ), Template( name='container_name', @@ -33,67 +27,49 @@ query='label_values({__name__=~"container.*", container_label_com_docker_compose_project=~"$compose_project"}, name)', includeAll=True, multi=True, - refresh=REFRESH_ON_TIME_RANGE_CHANGE, ), ]), - timezone='browser', panels=[ - TimeSeries( + MyTimeSeries( title='Container Memory Usage', unit=BYTES_IEC, gridPos=GridPos(h=8, w=12, x=0, y=0), - lineWidth=2, - fillOpacity=10, - showPoints='never', - stacking={'mode': 'normal'}, - tooltipMode='all', tooltipSort='desc', + stacking={'mode': 'normal'}, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='max by (name) (container_memory_usage_bytes{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"})', legendFormat='{{ name }}', - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Container CPU Usage', unit=SECONDS, gridPos=GridPos(h=8, w=12, x=12, y=0), - lineWidth=2, - fillOpacity=10, - showPoints='never', + tooltipSort='desc', + stacking={'mode': 'normal'}, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, - expr='max by (name) (rate(container_cpu_usage_seconds_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', + PromTarget( + expr='max by (name) (irate(container_cpu_usage_seconds_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', legendFormat='{{ name }}', - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Container Network Traffic', unit=BYTES_SEC_IEC, gridPos=GridPos(h=8, w=12, x=0, y=8), - lineWidth=2, - fillOpacity=10, - showPoints='never', - tooltipMode='all', tooltipSort='desc', + axisCenteredZero=True, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, - expr='max by (name) (rate(container_network_receive_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', + PromTarget( + expr='max by (name) (irate(container_network_receive_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', legendFormat="rx {{ name }}", - refId='A', ), - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, - expr='-max by (name) (rate(container_network_transmit_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', + PromTarget( + expr='-max by (name) (irate(container_network_transmit_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', legendFormat="tx {{ name }}", - refId='B', ), ], ), diff --git a/grafana/provisioning/dashboards-py/clark.dashboard.py b/grafana/provisioning/dashboards-py/clark.dashboard.py index b39fd73..8203130 100644 --- a/grafana/provisioning/dashboards-py/clark.dashboard.py +++ b/grafana/provisioning/dashboards-py/clark.dashboard.py @@ -1,60 +1,44 @@ -from grafanalib.core import Dashboard, Templating, Template, TimeSeries, Target, GridPos +from grafanalib.core import Templating, Template, Target, GridPos from grafanalib.formatunits import PERCENT_UNIT, SECONDS, NUMBER_FORMAT, TRUE_FALSE -from common import PROMETHEUS_DATASOURCE_NAME +from common import MyDashboard, MyTimeSeries, PromTarget -dashboard = Dashboard( +dashboard = MyDashboard( title='Clark', uid='clark', description='sce club website', - timezone='browser', panels=[ - TimeSeries( + MyTimeSeries( title='Clark main-endpoint hits', unit=NUMBER_FORMAT, gridPos=GridPos(h=8, w=12, x=0, y=0), - lineWidth=2, - stacking={'group': 'A','mode': 'none'}, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='endpoint_hits{route!="/metrics"}', - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Clark main-endpoint hits', unit=NUMBER_FORMAT, gridPos=GridPos(h=8, w=12, x=0, y=0), - lineWidth=2, - stacking={'group': 'A','mode': 'none'}, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='endpoint_hits{route=~"/(sendPasswordReset|validatePasswordReset|resetPassword)"}', - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Clark - messages', unit=NUMBER_FORMAT, gridPos=GridPos(h=8, w=12, x=0, y=0), - lineWidth=2, - stacking={'group': 'A','mode': 'none'}, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='endpoint_hits{route=~"/(send|listen|getLatestMessage)"}', - refId='A', ), ], ), diff --git a/grafana/provisioning/dashboards-py/cleezy.dashboard.py b/grafana/provisioning/dashboards-py/cleezy.dashboard.py index 915360c..186aa46 100644 --- a/grafana/provisioning/dashboards-py/cleezy.dashboard.py +++ b/grafana/provisioning/dashboards-py/cleezy.dashboard.py @@ -1,95 +1,69 @@ -from grafanalib.core import Dashboard, Templating, Template, TimeSeries, Target, GridPos +from grafanalib.core import Templating, Template, GridPos from grafanalib.formatunits import PERCENT_UNIT, SECONDS, NUMBER_FORMAT, TRUE_FALSE -from common import PROMETHEUS_DATASOURCE_NAME +from common import MyDashboard, MyTimeSeries, PromTarget -dashboard = Dashboard( +dashboard = MyDashboard( title='Cleezy', uid='leezy', description='sce club website', - timezone='browser', panels=[ - TimeSeries( + MyTimeSeries( title='Cache Size (entries)', unit=NUMBER_FORMAT, gridPos=GridPos(h=8, w=12, x=0, y=0), - lineWidth=2, - stacking={'group': 'A','mode': 'none'}, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='cache_size', - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='SQLite latency by query', unit=NUMBER_FORMAT, gridPos=GridPos(h=8, w=12, x=0, y=0), - lineWidth=2, - stacking={'group': 'A','mode': 'none'}, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( legendFormat="{{query_type}}", expr='query_time_sum / query_time_count', - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='SQLite latency by query', unit=NUMBER_FORMAT, gridPos=GridPos(h=8, w=12, x=0, y=0), - lineWidth=2, - stacking={'group': 'A','mode': 'none'}, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( legendFormat="{{code}} {{path}}", expr='http_code_total{path!="/metrics", job="cleezy"}', - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Cache hits and misses', unit=NUMBER_FORMAT, gridPos=GridPos(h=8, w=12, x=0, y=0), - lineWidth=2, - stacking={'group': 'A','mode': 'none'}, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( legendFormat="{{__name__}}", expr='cache_hits_total', - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Container Uptime', unit=NUMBER_FORMAT, gridPos=GridPos(h=8, w=12, x=0, y=0), - lineWidth=2, - stacking={'group': 'A','mode': 'none'}, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='time() - process_start_time_seconds{job="cleezy"}', - refId='A', ), ], ), diff --git a/grafana/provisioning/dashboards-py/common.py b/grafana/provisioning/dashboards-py/common.py index 1a78c07..69101fd 100644 --- a/grafana/provisioning/dashboards-py/common.py +++ b/grafana/provisioning/dashboards-py/common.py @@ -1 +1,35 @@ +from attrs import define +from grafanalib.core import Template, TimeSeries, Dashboard, HIDE_VARIABLE, Target + PROMETHEUS_DATASOURCE_NAME = 'Prometheus' + + +@define +class MyDashboard(Dashboard): + """Wrapper class for Dashboard with some default values""" + timezone: str = 'browser' + sharedCrosshair: bool = True + + +@define +class MyTimeSeries(TimeSeries): + """Wrapper class for TimeSeries with some default values and custom fields""" + fillOpacity: int = 10 + lineWidth: int = 1 + showPoints: str = 'never' + tooltipMode: str = 'multi' + maxDataPoints: int = None + + # new fields + axisCenteredZero: bool = False + + def to_json_data(self): + data = super().to_json_data() + data['fieldConfig']['defaults']['custom']['axisCenteredZero'] = self.axisCenteredZero + return data + + +@define +class PromTarget(Target): + """Wrapper class for Target with default prometheus datasource""" + datasource: str = PROMETHEUS_DATASOURCE_NAME diff --git a/grafana/provisioning/dashboards-py/node.dashboard.py b/grafana/provisioning/dashboards-py/node.dashboard.py index a9ef1ae..7154cbc 100644 --- a/grafana/provisioning/dashboards-py/node.dashboard.py +++ b/grafana/provisioning/dashboards-py/node.dashboard.py @@ -1,25 +1,24 @@ -from grafanalib.core import Dashboard, Templating, Template, TimeSeries, GridPos, Target -from grafanalib.formatunits import BYTES_IEC, PERCENT_UNIT +from grafanalib.core import Templating, Template, GridPos +from grafanalib.formatunits import BYTES_IEC, BITS_SEC, PERCENT_UNIT -from common import PROMETHEUS_DATASOURCE_NAME -from node_consts import CPU_BASIC_COLORS, MEMORY_BASIC_COLORS +from common import MyTimeSeries, MyDashboard, PromTarget, PROMETHEUS_DATASOURCE_NAME - -dashboard = Dashboard( +dashboard = MyDashboard( title='Node Exporter', uid='node', description='Node Exporter (not quite full)', tags=[ 'linux', ], - timezone='browser', templating=Templating(list=[ + # Job Template( name='job', label='Job', dataSource=PROMETHEUS_DATASOURCE_NAME, query='label_values(node_uname_info, job)', ), + # Instance Template( name='instance', label='Instance', @@ -29,108 +28,118 @@ ]), panels=[ # CPU Basic - TimeSeries( + MyTimeSeries( title='CPU Basic', description='Basic CPU usage info', unit=PERCENT_UNIT, gridPos=GridPos(h=8, w=12, x=0, y=0), - lineWidth=1, - fillOpacity=30, - showPoints='never', - stacking={'mode': 'percent', 'group': 'A'}, - tooltipMode='all', - tooltipSort='desc', + stacking={'mode': 'percent'}, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="system"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))', legendFormat='Busy System', - refId='A', ), - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="user"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))', legendFormat='Busy User', - refId='B', ), - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="iowait"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))', legendFormat='Busy Iowait', - refId='C', ), - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode=~".*irq"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))', legendFormat='Busy IRQs', - refId='D', ), - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode!="idle",mode!="user",mode!="system",mode!="iowait",mode!="irq",mode!="softirq"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))', legendFormat='Busy Other', - refId='E', ), - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="idle"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))', legendFormat='Idle', - refId='F', ), ], - # Extra JSON for the colors - extraJson=CPU_BASIC_COLORS, ), # Memory Basic - TimeSeries( + MyTimeSeries( title='Memory Basic', description='Basic memory usage', unit=BYTES_IEC, gridPos=GridPos(h=8, w=12, x=12, y=0), - lineWidth=1, - fillOpacity=30, - showPoints='never', - stacking={'mode': 'normal', 'group': 'A'}, - tooltipMode='all', - tooltipSort='desc', + stacking={'mode': 'normal'}, + valueMin=0, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='node_memory_MemTotal_bytes{instance="$instance",job="$job"}', format='time_series', legendFormat='RAM Total', - refId='A', ), - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='node_memory_MemTotal_bytes{instance="$instance",job="$job"} - node_memory_MemFree_bytes{instance="$instance",job="$job"} - (node_memory_Cached_bytes{instance="$instance",job="$job"} + node_memory_Buffers_bytes{instance="$instance",job="$job"} + node_memory_SReclaimable_bytes{instance="$instance",job="$job"})', format='time_series', legendFormat='RAM Used', - refId='B', ), - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='node_memory_Cached_bytes{instance="$instance",job="$job"} + node_memory_Buffers_bytes{instance="$instance",job="$job"} + node_memory_SReclaimable_bytes{instance="$instance",job="$job"}', legendFormat='RAM Cache + Buffer', - refId='C', ), - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='node_memory_MemFree_bytes{instance="$instance",job="$job"}', legendFormat='RAM Free', - refId='D', ), - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='(node_memory_SwapTotal_bytes{instance="$instance",job="$job"} - node_memory_SwapFree_bytes{instance="$instance",job="$job"})', legendFormat='SWAP Used', - refId='E', ), ], - # Extra JSON for the colors - extraJson=MEMORY_BASIC_COLORS, + overrides=[ + # Prevent total memory from being stacked + { + 'matcher': { + 'id': 'byName', + 'options': 'RAM Total' + }, + 'properties': [ + { + 'id': 'custom.stacking', + 'value': {'mode': 'none'} + } + ] + }, + ], + ), + # Network Traffic Basic + MyTimeSeries( + title='Network Traffic Basic', + description='Basic network usage info per interface', + unit=BITS_SEC, + gridPos=GridPos(h=8, w=12, x=0, y=8), + tooltipSort='desc', + axisCenteredZero=True, + targets=[ + PromTarget( + expr='irate(node_network_receive_bytes_total{instance="$instance",job="$job"}[$__rate_interval]) * 8', + legendFormat='rx {{ device }}', + ), + PromTarget( + expr='-irate(node_network_transmit_bytes_total{instance="$instance",job="$job"}[$__rate_interval]) * 8', + legendFormat='tx {{ device }}', + ), + ], + ), + # Disk Space Basic + MyTimeSeries( + title='Disk Space Used Basic', + description='Disk space used of all filesystems mounted', + unit=PERCENT_UNIT, + gridPos=GridPos(h=8, w=12, x=12, y=8), + targets=[ + PromTarget( + expr='1 - (node_filesystem_avail_bytes{instance="$instance",job="$job",device!~"rootfs"} / node_filesystem_size_bytes{instance="$instance",job="$job",device!~"rootfs"})', + legendFormat='{{ mountpoint }}', + ), + ], ), - # TODO: Network Basic - # TODO: Disk Basic ], ).auto_panel_ids() diff --git a/grafana/provisioning/dashboards-py/node_consts.py b/grafana/provisioning/dashboards-py/node_consts.py deleted file mode 100644 index a527781..0000000 --- a/grafana/provisioning/dashboards-py/node_consts.py +++ /dev/null @@ -1,487 +0,0 @@ -# TODO: Question life decisions (I'm not sure if this is good) - -CPU_BASIC_COLORS = { - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Busy Iowait" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Idle" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy Iowait" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Idle" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy System" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy User" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy Other" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } - } - ] - } - ] - }, -} - -MEMORY_BASIC_COLORS = { - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Apps" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CFFAFF", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "RAM_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "SWAP Used" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#806EB7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap Used" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#2F575E", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "RAM Total" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.stacking", - "value": { - "group": False, - "mode": "normal" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "RAM Cache + Buffer" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "RAM Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Available" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#DEDAF7", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.stacking", - "value": { - "group": False, - "mode": "normal" - } - } - ] - } - ] - } -} diff --git a/grafana/provisioning/dashboards-py/printing.dashboard.py b/grafana/provisioning/dashboards-py/printing.dashboard.py index 818291a..4488b8a 100644 --- a/grafana/provisioning/dashboards-py/printing.dashboard.py +++ b/grafana/provisioning/dashboards-py/printing.dashboard.py @@ -1,96 +1,72 @@ -from grafanalib.core import Dashboard, Templating, Template, TimeSeries, Target, GridPos +from grafanalib.core import Templating, Template, GridPos from grafanalib.formatunits import PERCENT_UNIT, SECONDS -from common import PROMETHEUS_DATASOURCE_NAME +from common import MyDashboard, MyTimeSeries, PromTarget -dashboard = Dashboard( +dashboard = MyDashboard( title='Quasar', uid='quasar', description='Printer metrics', - timezone='browser', panels=[ - TimeSeries( + MyTimeSeries( title='Ink Level', unit=PERCENT_UNIT, gridPos=GridPos(h=8, w=12, x=0, y=0), - lineWidth=2, - stacking={'group': 'A','mode': 'none'}, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, - expr='snmp_metric{name=\"ink_level\",ip=\"192.168.69.149\"} / ignoring(name) group_left() snmp_metric{name=\"ink_capacity\",ip=\"192.168.69.149\"}', + PromTarget( + expr='snmp_metric{name="ink_level",ip="192.168.69.149"} / ignoring(name) group_left() snmp_metric{name="ink_capacity",ip="192.168.69.149"}', legendFormat='Left Printer {{ip}}', - refId='A', ), - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, - expr='snmp_metric{name=\"ink_level\",ip=\"192.168.69.208\"} / ignoring(name) group_left() snmp_metric{name=\"ink_capacity\",ip=\"192.168.69.208\"}', + PromTarget( + expr='snmp_metric{name="ink_level",ip="192.168.69.208"} / ignoring(name) group_left() snmp_metric{name="ink_capacity",ip="192.168.69.208"}', legendFormat='Right Printer {{ip}}', - refId='B', ), ], ), - TimeSeries( + MyTimeSeries( title='# of Pages Printed', gridPos=GridPos(h=8, w=12, x=12, y=0), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, - expr='snmp_metric{name=\"page_count\"}', + PromTarget( + expr='snmp_metric{name="page_count"}', legendFormat='{{ip}}', - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='SNMP Request Duration', unit=SECONDS, gridPos=GridPos(h=8, w=12, x=0, y=8), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='snmp_request_duration_sum/snmp_request_duration_count', legendFormat="__auto", - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Print Jobs Recieved', gridPos=GridPos(h=8, w=12, x=12, y=8), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='rate(print_jobs_recieved_total[$__rate_interval])', legendFormat="__auto", - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Error', gridPos=GridPos(h=8, w=12, x=0, y=16), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='snmp_error', legendFormat="__auto", - refId='A', ), ], ), diff --git a/grafana/provisioning/dashboards-py/sceinfra.dashboard.py b/grafana/provisioning/dashboards-py/sceinfra.dashboard.py index 67bdd88..dc76d55 100644 --- a/grafana/provisioning/dashboards-py/sceinfra.dashboard.py +++ b/grafana/provisioning/dashboards-py/sceinfra.dashboard.py @@ -1,10 +1,7 @@ from grafanalib.core import ( - Dashboard, Templating, Template, Threshold, - TimeSeries, - Target, GridPos, BarGauge, Stat, @@ -18,14 +15,13 @@ BITS_SEC, ) -from common import PROMETHEUS_DATASOURCE_NAME +from common import MyDashboard, MyTimeSeries, PromTarget -dashboard = Dashboard( +dashboard = MyDashboard( title='SCE Infra', uid='sceinfra', description='SCE services', - timezone='browser', panels=[ Stat( title='Container Uptime', @@ -36,49 +32,37 @@ ], format=SECONDS, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='time() - process_start_time_seconds', legendFormat='{{job}}', - refId='A', ), - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, - expr='time() - container_start_time_seconds{image=~\"clark.*|nginx|mongo\"}', + PromTarget( + expr='time() - container_start_time_seconds{image=~"clark.*|nginx|mongo"}', legendFormat='{{name}}', - refId='B', ), ], ), - TimeSeries( + MyTimeSeries( title='Metric Health', unit=NUMBER_FORMAT, gridPos=GridPos(h=8, w=12, x=0, y=16), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='up', legendFormat="{{instance}}", - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Container Last Seen (Clark Only)', unit=SECONDS, gridPos=GridPos(h=8, w=12, x=12, y=16), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, - expr='time() - container_last_seen{image=~\"clark.*|nginx|mongo\"}', + PromTarget( + expr='time() - container_last_seen{image=~"clark.*|nginx|mongo"}', legendFormat="{{image}}", - refId='A', ), ], ), diff --git a/grafana/provisioning/dashboards-py/sceta.dashboard.py b/grafana/provisioning/dashboards-py/sceta.dashboard.py index 0adae71..f41aa77 100644 --- a/grafana/provisioning/dashboards-py/sceta.dashboard.py +++ b/grafana/provisioning/dashboards-py/sceta.dashboard.py @@ -1,93 +1,71 @@ -from grafanalib.core import Dashboard, Templating, Template, TimeSeries, Target, GridPos +from grafanalib.core import Templating, Template, GridPos from grafanalib.formatunits import PERCENT_UNIT, SECONDS, NUMBER_FORMAT -from common import PROMETHEUS_DATASOURCE_NAME +from common import MyDashboard, MyTimeSeries, PromTarget -dashboard = Dashboard( +dashboard = MyDashboard( title='SCEta', uid='sceta', description='Transit prediction service metrics', - timezone='browser', panels=[ - TimeSeries( + MyTimeSeries( title='Cache Update Errors', unit=NUMBER_FORMAT, gridPos=GridPos(h=8, w=12, x=0, y=0), - lineWidth=2, - stacking={'group': 'A','mode': 'none'}, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='cache_update_errors_total', legendFormat="__auto", - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='HTTP Response Codes', gridPos=GridPos(h=8, w=12, x=12, y=0), unit=NUMBER_FORMAT, - lineWidth=2, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, - expr='http_code_total{job=\"sceta-server\", path!=\"/metrics\"}', + PromTarget( + expr='http_code_total{job="sceta-server", path!="/metrics"}', legendFormat='{{code}} {{path}}', - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Cache Age', unit=SECONDS, gridPos=GridPos(h=8, w=12, x=0, y=8), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='time() - cache_last_updated', legendFormat="__auto", - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='511 API Response Codes', gridPos=GridPos(h=8, w=12, x=12, y=8), unit=NUMBER_FORMAT, - lineWidth=2, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='api_response_codes_total', legendFormat="__auto", - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='511 API Latency', gridPos=GridPos(h=8, w=12, x=0, y=16), unit=SECONDS, - lineWidth=2, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='api_latency_sum / api_latency_count', legendFormat="__auto", - refId='A', ), ], ), diff --git a/grafana/provisioning/dashboards-py/scetv.dashboard.py b/grafana/provisioning/dashboards-py/scetv.dashboard.py index ddcebd3..c8b8d3f 100644 --- a/grafana/provisioning/dashboards-py/scetv.dashboard.py +++ b/grafana/provisioning/dashboards-py/scetv.dashboard.py @@ -1,10 +1,7 @@ from grafanalib.core import ( - Dashboard, Templating, Template, Threshold, - TimeSeries, - Target, GridPos, BarGauge, Stat, @@ -18,14 +15,13 @@ BITS_SEC, ) -from common import PROMETHEUS_DATASOURCE_NAME +from common import MyDashboard, MyTimeSeries, PromTarget -dashboard = Dashboard( +dashboard = MyDashboard( title='SCE TV', uid='scetv', description='SCE video streaming service', - timezone='browser', panels=[ BarGauge( title='HTTP Requests', @@ -35,11 +31,9 @@ ], gridPos=GridPos(h=8, w=12, x=0, y=0), targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, - expr='http_request_count_total{endpoint!=\"/metrics\"}', + PromTarget( + expr='http_request_count_total{endpoint!="/metrics"}', legendFormat='{{endpoint}}', - refId='A', ), ], ), @@ -52,139 +46,105 @@ ], format=SECONDS, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, - expr='time() - process_start_time_seconds{job=\"sce-tv\"}', + PromTarget( + expr='time() - process_start_time_seconds{job="sce-tv"}', legendFormat='{{job}}', - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Data Downloaded', unit=BYTES, gridPos=GridPos(h=8, w=12, x=0, y=8), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='data_downloaded_total', legendFormat="{{job}}", - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='API data rate', gridPos=GridPos(h=8, w=12, x=12, y=8), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', unit=BITS_SEC, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, - expr='data_downloaded_total{job=\"sce-tv\"} * 8 / download_time_sum', + PromTarget( + expr='data_downloaded_total{job="sce-tv"} * 8 / download_time_sum', legendFormat="__auto", - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Download Time', gridPos=GridPos(h=8, w=12, x=0, y=16), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', unit=SECONDS, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='download_time_sum', legendFormat="{{job}}", - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Cache Hit/Miss', gridPos=GridPos(h=8, w=12, x=12, y=16), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', unit=NUMBER_FORMAT, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='cache_miss_count_total', legendFormat="__auto", - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Total Videos Downloaded', gridPos=GridPos(h=8, w=12, x=0, y=24), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', unit=NUMBER_FORMAT, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='video_download_count_total', legendFormat="__auto", - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Cache Size Bytes', gridPos=GridPos(h=8, w=12, x=12, y=24), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', unit=BYTES, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='cache_size_bytes', legendFormat="__auto", - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Total YouTube Videos Played', gridPos=GridPos(h=8, w=12, x=0, y=32), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', unit=BYTES, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='video_count_total', legendFormat="__auto", - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Cache Size', gridPos=GridPos(h=8, w=12, x=12, y=32), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', unit=NUMBER_FORMAT, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, - expr='cache_size{job=\"sce-tv\"}', + PromTarget( + expr='cache_size{job="sce-tv"}', legendFormat="__auto", - refId='A', ), ], ), diff --git a/grafana/provisioning/dashboards-py/ssh.dashboard.py b/grafana/provisioning/dashboards-py/ssh.dashboard.py index 394c68c..93bd12f 100644 --- a/grafana/provisioning/dashboards-py/ssh.dashboard.py +++ b/grafana/provisioning/dashboards-py/ssh.dashboard.py @@ -1,7 +1,7 @@ -from grafanalib.core import Dashboard, Templating, Stat, TimeSeries, Target, GridPos +from grafanalib.core import Templating, Stat, GridPos from grafanalib.formatunits import SECONDS, TRUE_FALSE, DAYS -from common import PROMETHEUS_DATASOURCE_NAME +from common import MyDashboard, MyTimeSeries, PromTarget time_since_ssh_overrides=[ { @@ -37,63 +37,48 @@ } ] -dashboard = Dashboard( +dashboard = MyDashboard( title='SSH Tunnel Health', uid='ssh', description='Health of SSH Tunnel', - timezone='browser', panels=[ - TimeSeries( + MyTimeSeries( title='Time since last health check', unit=SECONDS, gridPos=GridPos(h=9, w=12, x=0, y=0), - lineWidth=2, - stacking={'group': 'A','mode': 'none'}, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='time() - last_health_check_request', legendFormat='{{job}}', - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Time since SSH tunnel reopened', unit=SECONDS, gridPos=GridPos(h=9, w=12, x=12, y=0), overrides=time_since_ssh_overrides, - lineWidth=2, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='time() - ssh_tunnel_last_opened', legendFormat='{{job}}', - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Container Health', unit=TRUE_FALSE, # idk if we should have it as true/false instead of 1/0 lol gridPos=GridPos(h=8, w=12, x=0, y=8), - lineWidth=2, - tooltipMode='all', tooltipSort='desc', targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, + PromTarget( expr='up{job=~"led-sign|delen|sce-printer"}', legendFormat="{{job}}", - refId='A', ), - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, - expr='up{instance=\"prometheus-clark-sshtunnel:9090\"}', + PromTarget( + expr='up{instance="prometheus-clark-sshtunnel:9090"}', legendFormat="{{job}}", - refId='B', ), ], ), @@ -105,11 +90,9 @@ reduceCalc='lastNotNull', thresholds=uptime_thresholds, targets=[ - Target( - datasource=PROMETHEUS_DATASOURCE_NAME, - expr='time() - process_start_time_seconds{job=~\"led-sign|delen|sce-printer\"}', + PromTarget( + expr='time() - process_start_time_seconds{job=~"led-sign|delen|sce-printer"}', legendFormat="{{job}}", - refId='A', ), ], ),