Skip to content

Commit b042dc8

Browse files
Fixed node health check prometheus query (#51)
1 parent 43a499c commit b042dc8

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

tools/system-health-check/health_check.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def check_pods_with_resources():
111111
AND
112112
(
113113
(
114-
100 * avg(1 - rate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance) <= """ + str(node_cpu_threshold) + """
114+
100 * avg(1 - rate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance) >= """ + str(node_cpu_threshold) + """
115115
)
116116
AND
117117
(
@@ -121,7 +121,7 @@ def check_pods_with_resources():
121121
AND
122122
(
123123
(
124-
100 * avg(1 - ((avg_over_time(node_memory_MemFree_bytes[5m]) + avg_over_time(node_memory_Cached_bytes[5m]) + avg_over_time(node_memory_Buffers_bytes[5m])) / avg_over_time(node_memory_MemTotal_bytes[5m] ))) by (instance) <= """ + str(node_memory_threshold) + """
124+
100 * avg(1 - ((avg_over_time(node_memory_MemFree_bytes[5m]) + avg_over_time(node_memory_Cached_bytes[5m]) + avg_over_time(node_memory_Buffers_bytes[5m])) / avg_over_time(node_memory_MemTotal_bytes[5m] ))) by (instance) >= """ + str(node_memory_threshold) + """
125125
)
126126
AND
127127
(
@@ -135,7 +135,7 @@ def check_pods_with_resources():
135135
)
136136
OR
137137
(
138-
((sum(kube_node_status_condition{condition="Ready", status="false"} * on (node) group_left(instance) label_replace(kube_node_info,"instance", "$1:9100", "internal_ip", "(.*)")) by (instance)) == 0)
138+
((sum(kube_node_status_condition{condition="Ready", status="true"} * on (node) group_left(instance) label_replace(kube_node_info,"instance", "$1:9100", "internal_ip", "(.*)")) by (instance)) == 0)
139139
)
140140
) == 0
141141
"""
@@ -301,4 +301,4 @@ def main():
301301
exit(1)
302302

303303
if __name__ == '__main__':
304-
main()
304+
main()

0 commit comments

Comments
 (0)