diff --git a/alerts/alerts.libsonnet b/alerts/alerts.libsonnet index 23f10aa..8cdb741 100644 --- a/alerts/alerts.libsonnet +++ b/alerts/alerts.libsonnet @@ -45,6 +45,38 @@ dashboard_url: $._config.openCostOverviewDashboardUrl, }, }, + { + alert: 'OpenCostAnomalyDetected', + expr: ||| + 1 - + ( + avg_over_time( + sum( + node_total_hourly_cost{ + %s + } + ) [7d:1h] + ) + / + avg_over_time( + sum( + node_total_hourly_cost{ + %s + } + ) [3h:30m] + ) + ) > %s + ||| % [$._config.openCostSelector, $._config.openCostSelector, $._config.alerts.anomaly.anomalyPercentageThreshold / 100], + labels: { + severity: 'warning', + }, + 'for': '10m', + annotations: { + summary: 'OpenCost Cost Anomaly Detected', + description: 'A significant increase in cluster costs has been detected. The average hourly cost over the last day exceeds the 7-day average by more than %s%%. This could indicate unexpected resource usage or cost-related changes in the cluster.' % $._config.alerts.anomaly.anomalyPercentageThreshold, + dashboard_url: $._config.openCostOverviewDashboardUrl, + }, + }, ], }, ]), diff --git a/config.libsonnet b/config.libsonnet index 8555503..6408f2a 100644 --- a/config.libsonnet +++ b/config.libsonnet @@ -24,8 +24,9 @@ local annotation = g.dashboard.annotation; enabled: true, monthlyCostThreshold: 200, }, - anomalies: { + anomaly: { enabled: true, + anomalyPercentageThreshold: 20, }, }, diff --git a/dashboards/opencost-namespace.libsonnet b/dashboards/opencost-namespace.libsonnet index 1e7570b..104a606 100644 --- a/dashboards/opencost-namespace.libsonnet +++ b/dashboards/opencost-namespace.libsonnet @@ -77,7 +77,7 @@ local pieQueryOptions = pieChartPanel.queryOptions; namespaceVariable, ], - local openCostNamespaceHourlyCostQuery = ||| + local openCostHourlyCostQuery = ||| sum( sum( container_memory_allocation_bytes{job=~"$job", namespace=~"$namespace"} @@ -109,14 +109,14 @@ local pieQueryOptions = pieChartPanel.queryOptions; ) by (namespace) |||, - local openCostNamespaceHourlyCostStatPanel = + local openCostHourlyCostStatPanel = statPanel.new( 'Hourly Cost', ) + stQueryOptions.withTargets( prometheus.new( '$datasource', - openCostNamespaceHourlyCostQuery, + openCostHourlyCostQuery, ) ) + stStandardOptions.withUnit('currencyUSD') + @@ -133,16 +133,16 @@ local pieQueryOptions = pieChartPanel.queryOptions; ]), - local openCostNamespaceDailyCostQuery = std.strReplace(openCostNamespaceHourlyCostQuery, '* 1', '* 24'), + local openCostDailyCostQuery = std.strReplace(openCostHourlyCostQuery, '* 1', '* 24'), - local openCostNamespaceDailyCostStatPanel = + local openCostDailyCostStatPanel = statPanel.new( 'Daily Cost', ) + stQueryOptions.withTargets( prometheus.new( '$datasource', - openCostNamespaceDailyCostQuery, + openCostDailyCostQuery, ) ) + stStandardOptions.withUnit('currencyUSD') + @@ -158,16 +158,16 @@ local pieQueryOptions = pieChartPanel.queryOptions; stStandardOptions.threshold.step.withColor('green'), ]), - local openCostNamespaceMonthlyCostQuery = std.strReplace(openCostNamespaceHourlyCostQuery, '* 1', '* 730'), + local openCostMonthlyCostQuery = std.strReplace(openCostHourlyCostQuery, '* 1', '* 730'), - local openCostNamespaceMonthlyCostStatPanel = + local openCostMonthlyCostStatPanel = statPanel.new( 'Monthly Cost', ) + stQueryOptions.withTargets( prometheus.new( '$datasource', - openCostNamespaceMonthlyCostQuery, + openCostMonthlyCostQuery, ) ) + stStandardOptions.withUnit('currencyUSD') + @@ -183,7 +183,7 @@ local pieQueryOptions = pieChartPanel.queryOptions; stStandardOptions.threshold.step.withColor('green'), ]), - local openCostNamespaceMonthlyRamCostQuery = ||| + local openCostMonthlyRamCostQuery = ||| sum( sum( container_memory_allocation_bytes{job=~"$job", namespace=~"$namespace"} @@ -201,14 +201,14 @@ local pieQueryOptions = pieChartPanel.queryOptions; ) |||, - local openCostNamespaceMonthlyRamCostStatPanel = + local openCostMonthlyRamCostStatPanel = statPanel.new( 'Monthly Ram Cost', ) + stQueryOptions.withTargets( prometheus.new( '$datasource', - openCostNamespaceMonthlyRamCostQuery, + openCostMonthlyRamCostQuery, ) ) + stStandardOptions.withUnit('currencyUSD') + @@ -224,7 +224,7 @@ local pieQueryOptions = pieChartPanel.queryOptions; stStandardOptions.threshold.step.withColor('green'), ]), - local openCostNamespaceMonthlyCpuCostQuery = ||| + local openCostMonthlyCpuCostQuery = ||| sum( sum( container_cpu_allocation{job=~"$job", namespace=~"$namespace"} @@ -242,14 +242,14 @@ local pieQueryOptions = pieChartPanel.queryOptions; ) |||, - local openCostNamespaceMonthlyCpuCostStatPanel = + local openCostMonthlyCpuCostStatPanel = statPanel.new( 'Monthly CPU Cost', ) + stQueryOptions.withTargets( prometheus.new( '$datasource', - openCostNamespaceMonthlyCpuCostQuery, + openCostMonthlyCpuCostQuery, ) ) + stStandardOptions.withUnit('currencyUSD') + @@ -265,31 +265,33 @@ local pieQueryOptions = pieChartPanel.queryOptions; stStandardOptions.threshold.step.withColor('green'), ]), - local openCostNamespaceMonthlyPVCostQuery = ||| + local openCostMonthlyPVCostQuery = ||| sum( sum( - label_replace( - kube_persistentvolume_capacity_bytes{job=~"$job"}, - "volumename", "$1", "persistentvolume", "(.*)" - ) / 1024 / 1024 / 1024 - ) by (volumename) + kube_persistentvolume_capacity_bytes{job=~"$job"} + / 1024 / 1024 / 1024 + ) by (persistentvolume) * sum( pv_hourly_cost{job=~"$job"} ) by (persistentvolume) - ) * on(volumename) group_left(namespace) ( - kube_persistentvolumeclaim_info{job=~"$job", namespace=~"$namespace"} + * on(persistentvolume) group_left(namespace) ( + label_replace( + kube_persistentvolumeclaim_info{job=~"$job", namespace=~"$namespace"}, + "persistentvolume", "$1", "volumename", "(.*)" + ) + ) ) * 730 |||, - local openCostNamespaceMonthlyPVCostStatPanel = + local openCostMonthlyPVCostStatPanel = statPanel.new( 'Monthly PV Cost', ) + stQueryOptions.withTargets( prometheus.new( '$datasource', - openCostNamespaceMonthlyPVCostQuery, + openCostMonthlyPVCostQuery, ) ) + stStandardOptions.withUnit('currencyUSD') + @@ -305,490 +307,497 @@ local pieQueryOptions = pieChartPanel.queryOptions; stStandardOptions.threshold.step.withColor('green'), ]), - // local openCostNodeMonthlyCpuCostQuery = ||| - // sum( - // kube_node_status_capacity{ - // job=~"$job", - // resource="cpu", - // unit="core" - // } - // ) by (node) - // * - // on(node) group_left(instance_type, arch) - // sum( - // node_cpu_hourly_cost{ - // job=~"$job" - // } - // ) by (node, instance_type, arch) - // * 730 - // |||, - // - // local openCostNodeMonthlyRamCostQuery = ||| - // sum( - // kube_node_status_capacity{ - // job=~"$job", - // resource="memory", - // unit="byte" - // } - // ) by (node) - // / 1024 / 1024 / 1024 - // * - // on(node) group_left(instance_type, arch) - // sum( - // node_ram_hourly_cost{ - // job=~"$job" - // } - // ) by (node, instance_type, arch) - // * 730 - // |||, - // - // local openCostHourlyCostTimeSeriesPanel = - // timeSeriesPanel.new( - // 'Hourly Cost', - // ) + - // tsQueryOptions.withTargets( - // [ - // prometheus.new( - // '$datasource', - // openCostHourlyCostQuery, - // ) + - // prometheus.withLegendFormat('Hourly Cost') + - // prometheus.withInterval('1m'), - // ] - // ) + - // tsStandardOptions.withUnit('currencyUSD') + - // tsLegend.withShowLegend(false) + - // tsCustom.withSpanNulls(false), - // - // local openCostDailyCostTimeSeriesPanel = - // timeSeriesPanel.new( - // 'Daily Cost', - // ) + - // tsQueryOptions.withTargets( - // [ - // prometheus.new( - // '$datasource', - // openCostDailyCostQuery, - // ) + - // prometheus.withLegendFormat('Daily Cost') + - // prometheus.withInterval('1m'), - // ] - // ) + - // tsStandardOptions.withUnit('currencyUSD') + - // tsLegend.withShowLegend(false) + - // tsCustom.withSpanNulls(false), - // - // local openCostMonthlyCostTimeSeriesPanel = - // timeSeriesPanel.new( - // 'Monthly Cost', - // ) + - // tsQueryOptions.withTargets( - // [ - // prometheus.new( - // '$datasource', - // openCostMonthlyCostQuery, - // ) + - // prometheus.withLegendFormat('Monthly Cost') + - // prometheus.withInterval('1m'), - // ] - // ) + - // tsStandardOptions.withUnit('currencyUSD') + - // tsLegend.withShowLegend(false) + - // tsCustom.withSpanNulls(false), - // - // - // local openCostResourceCostPieChartPanel = - // pieChartPanel.new( - // 'Cost by Resource' - // ) + - // pieQueryOptions.withTargets( - // [ - // prometheus.new( - // '$datasource', - // openCostMonthlyCpuCostQuery, - // ) + - // prometheus.withLegendFormat('CPU') + - // prometheus.withInstant(true), - // prometheus.new( - // '$datasource', - // openCostMonthlyRamCostQuery, - // ) + - // prometheus.withLegendFormat('RAM') + - // prometheus.withInstant(true), - // prometheus.new( - // '$datasource', - // openCostMonthlyPVCostQuery, - // ) + - // prometheus.withLegendFormat('PV') + - // prometheus.withInstant(true), - // ] - // ) + - // pieOptions.withPieType('pie') + - // pieStandardOptions.withUnit('currencyUSD') + - // pieOptions.legend.withAsTable(true) + - // pieOptions.legend.withPlacement('right') + - // pieOptions.legend.withDisplayMode('table') + - // pieOptions.legend.withValues(['value', 'percent']) + - // pieOptions.legend.withSortDesc(true), - // - // local openCostNamespaceMonthlyCostQuery = ||| - // topk(10, - // sum( - // sum( - // container_memory_allocation_bytes{job=~"$job"} - // ) - // by (namespace, instance) - // * on(instance) group_left() ( - // node_ram_hourly_cost{job=~"$job"} - // / 1024 / 1024 / 1024 * 730 - // + on(node,instance_type) group_left() - // label_replace - // ( - // kube_node_labels{job=~"$job"}, "instance_type", "$1", "label_node_kubernetes_io_instance_type", "(.*)" - // ) * 0 - // ) - // + - // sum( - // container_cpu_allocation{job=~"$job"} - // ) - // by (namespace,instance) - // * on(instance) group_left() ( - // node_cpu_hourly_cost{job=~"$job"} - // * 730 - // + on(node, instance_type) group_left() - // label_replace - // ( - // kube_node_labels{job=~"$job"}, "instance_type", "$1", "label_node_kubernetes_io_instance_type", "(.*)" - // ) * 0 - // ) - // ) by (namespace) - // ) - // |||, - // - // local openCostNamespaceCostPieChartPanel = - // pieChartPanel.new( - // 'Cost by Namespace' - // ) + - // pieQueryOptions.withTargets( - // prometheus.new( - // '$datasource', - // openCostNamespaceMonthlyCostQuery, - // ) + - // prometheus.withLegendFormat('{{ namespace }}') + - // prometheus.withInstant(true), - // ) + - // pieOptions.withPieType('pie') + - // pieStandardOptions.withUnit('currencyUSD') + - // pieOptions.legend.withAsTable(true) + - // pieOptions.legend.withPlacement('right') + - // pieOptions.legend.withDisplayMode('table') + - // pieOptions.legend.withValues(['value', 'percent']) + - // pieOptions.legend.withSortDesc(true), - // - // local openCostInstanceTypeCostQuery = ||| - // topk(10, - // sum( - // node_total_hourly_cost{ - // job=~"$job" - // } - // ) by (instance_type) * 730 - // ) - // |||, - // - // local openCostInstanceTypeCostPieChartPanel = - // pieChartPanel.new( - // 'Cost by Instance Type' - // ) + - // pieQueryOptions.withTargets( - // prometheus.new( - // '$datasource', - // openCostInstanceTypeCostQuery, - // ) + - // prometheus.withLegendFormat('{{ instance_type }}') + - // prometheus.withInstant(true), - // ) + - // pieOptions.withPieType('pie') + - // pieStandardOptions.withUnit('currencyUSD') + - // pieOptions.legend.withAsTable(true) + - // pieOptions.legend.withPlacement('right') + - // pieOptions.legend.withDisplayMode('table') + - // pieOptions.legend.withValues(['value', 'percent']) + - // pieOptions.legend.withSortDesc(true), - // - // local openCostNodeTotalCostQuery = ||| - // sum( - // node_total_hourly_cost{ - // job=~"$job" - // } - // ) by (node, instance_type, arch) - // * 730 - // |||, - // - // local openCostNodeTable = - // tablePanel.new( - // 'Nodes Monthly Cost', - // ) + - // tbStandardOptions.withUnit('currencyUSD') + - // tbOptions.withSortBy( - // tbOptions.sortBy.withDisplayName('Total Cost') + - // tbOptions.sortBy.withDesc(true) - // ) + - // tbOptions.footer.withEnablePagination(true) + - // tbQueryOptions.withTargets( - // [ - // prometheus.new( - // '$datasource', - // openCostNodeMonthlyCpuCostQuery, - // ) + - // prometheus.withInstant(true) + - // prometheus.withFormat('table'), - // prometheus.new( - // '$datasource', - // openCostNodeMonthlyRamCostQuery, - // ) + - // prometheus.withInstant(true) + - // prometheus.withFormat('table'), - // prometheus.new( - // '$datasource', - // openCostNodeTotalCostQuery, - // ) + - // prometheus.withInstant(true) + - // prometheus.withFormat('table'), - // ] - // ) + - // tbQueryOptions.withTransformations([ - // tbQueryOptions.transformation.withId( - // 'merge' - // ), - // tbQueryOptions.transformation.withId( - // 'organize' - // ) + - // tbQueryOptions.transformation.withOptions( - // { - // renameByName: { - // node: 'Node', - // instance_type: 'Instance Type', - // arch: 'Architecture', - // 'Value #A': 'CPU Cost', - // 'Value #B': 'RAM Cost', - // 'Value #C': 'Total Cost', - // }, - // indexByName: { - // node: 0, - // instance_type: 1, - // arch: 2, - // 'Value #A': 3, - // 'Value #B': 4, - // 'Value #C': 5, - // }, - // excludeByName: { - // Time: true, - // job: true, - // }, - // } - // ), - // ]), - // - // local openCostPVTotalGibQuery = ||| - // sum( - // kube_persistentvolume_capacity_bytes{ - // job=~"$job" - // } - // / 1024 / 1024 / 1024 - // ) by (persistentvolume) - // |||, - // - // local openCostPVMonthlyCostQuery = ||| - // sum( - // kube_persistentvolume_capacity_bytes{ - // job=~"$job" - // } - // / 1024 / 1024 / 1024 - // ) by (persistentvolume) - // * - // sum( - // pv_hourly_cost{ - // job=~"$job" - // } - // * 730 - // ) by (persistentvolume) - // |||, - // - // local openCostPVTable = - // tablePanel.new( - // 'Persistent Volumes Monthly Cost' - // ) + - // tbStandardOptions.withUnit('decgbytes') + - // tbOptions.withSortBy( - // tbOptions.sortBy.withDisplayName('Total Cost') + - // tbOptions.sortBy.withDesc(true) - // ) + - // tbOptions.footer.withEnablePagination(true) + - // tbQueryOptions.withTargets( - // [ - // prometheus.new( - // '$datasource', - // openCostPVTotalGibQuery, - // ) + - // prometheus.withInstant(true) + - // prometheus.withFormat('table'), - // prometheus.new( - // '$datasource', - // openCostPVMonthlyCostQuery, - // ) + - // prometheus.withInstant(true) + - // prometheus.withFormat('table'), - // ] - // ) + - // tbQueryOptions.withTransformations([ - // tbQueryOptions.transformation.withId( - // 'merge' - // ), - // tbQueryOptions.transformation.withId( - // 'organize' - // ) + - // tbQueryOptions.transformation.withOptions( - // { - // renameByName: { - // persistentvolume: 'Persistent Volume', - // 'Value #A': 'Total GiB', - // 'Value #B': 'Total Cost', - // }, - // indexByName: { - // persistentvolume: 0, - // 'Value #A': 1, - // 'Value #B': 2, - // }, - // excludeByName: { - // Time: true, - // job: true, - // }, - // } - // ), - // ]) + - // tbStandardOptions.withOverrides([ - // tbOverride.byName.new('Total Cost') + - // tbOverride.byName.withPropertiesFromOptions( - // tbStandardOptions.withUnit('currencyUSD') - // ), - // ]), - // - // local openCostNamespaceMonthlyCostQueryOffset7d = std.strReplace(openCostNamespaceMonthlyCostQuery, '{job=~"$job"}', '{job=~"$job"} offset 7d'), - // local openCostNamespaceMonthlyCostQueryOffset30d = std.strReplace(openCostNamespaceMonthlyCostQuery, '{job=~"$job"}', '{job=~"$job"} offset 30d'), - // - // local openCostNamespaceTable = - // tablePanel.new( - // 'Namespace Monthly Cost', - // ) + - // tbStandardOptions.withUnit('currencyUSD') + - // tbStandardOptions.thresholds.withSteps([ - // tbStandardOptions.threshold.step.withValue(0) + - // tbStandardOptions.threshold.step.withColor('green'), - // tbStandardOptions.threshold.step.withValue(5) + - // tbStandardOptions.threshold.step.withColor('yellow'), - // tbStandardOptions.threshold.step.withValue(10) + - // tbStandardOptions.threshold.step.withColor('red'), - // ]) + - // tbOptions.withSortBy( - // tbOptions.sortBy.withDisplayName('Total Cost (Today)') + - // tbOptions.sortBy.withDesc(true) - // ) + - // tbOptions.footer.withEnablePagination(true) + - // tbQueryOptions.withTargets( - // [ - // prometheus.new( - // '$datasource', - // openCostNamespaceMonthlyCostQuery, - // ) + - // prometheus.withInstant(true) + - // prometheus.withFormat('table'), - // prometheus.new( - // '$datasource', - // ||| - // %s - // / - // %s - // * 100 - // - 100 - // ||| % [ - // openCostNamespaceMonthlyCostQuery, - // openCostNamespaceMonthlyCostQueryOffset7d, - // ], - // ) + - // prometheus.withInstant(true) + - // prometheus.withFormat('table'), - // prometheus.new( - // '$datasource', - // ||| - // %s - // / - // %s - // * 100 - // - 100 - // ||| % [ - // openCostNamespaceMonthlyCostQuery, - // openCostNamespaceMonthlyCostQueryOffset30d, - // ], - // ) + - // prometheus.withInstant(true) + - // prometheus.withFormat('table'), - // ] - // ) + - // tbQueryOptions.withTransformations([ - // tbQueryOptions.transformation.withId( - // 'merge' - // ), - // tbQueryOptions.transformation.withId( - // 'organize' - // ) + - // tbQueryOptions.transformation.withOptions( - // { - // renameByName: { - // namespace: 'Namespace', - // 'Value #A': 'Total Cost (Today)', - // 'Value #B': 'Cost Difference (7d)', - // 'Value #C': 'Cost Difference (30d)', - // }, - // indexByName: { - // namespace: 0, - // 'Value #A': 1, - // 'Value #B': 2, - // 'Value #C': 3, - // }, - // excludeByName: { - // Time: true, - // job: true, - // }, - // } - // ), - // ]) + - // tbStandardOptions.withOverrides([ - // tbOverride.byName.new('Cost Difference (7d)') + - // tbOverride.byName.withPropertiesFromOptions( - // tbStandardOptions.withUnit('percent') + - // tbFieldConfig.defaults.custom.withCellOptions( - // { type: 'color-background' } // TODO(adinhodovic): Use jsonnet lib - // ) + - // tbStandardOptions.color.withMode('thresholds') - // ), - // tbOverride.byName.new('Cost Difference (30d)') + - // tbOverride.byName.withPropertiesFromOptions( - // tbStandardOptions.withUnit('percent') + - // tbFieldConfig.defaults.custom.withCellOptions( - // { type: 'color-background' } // TODO(adinhodovic): Use jsonnet lib - // ) + - // tbStandardOptions.color.withMode('thresholds') - // ), - // ]), - - local openCostNamespaceSummaryRow = + local openCostDailyCostTimeSeriesPanel = + timeSeriesPanel.new( + 'Daily Cost', + ) + + tsQueryOptions.withTargets( + [ + prometheus.new( + '$datasource', + openCostDailyCostQuery, + ) + + prometheus.withLegendFormat('Daily Cost') + + prometheus.withInterval('1m'), + ] + ) + + tsStandardOptions.withUnit('currencyUSD') + + tsLegend.withShowLegend(false) + + tsCustom.withSpanNulls(false), + + local openCostMonthlyCostTimeSeriesPanel = + timeSeriesPanel.new( + 'Monthly Cost', + ) + + tsQueryOptions.withTargets( + [ + prometheus.new( + '$datasource', + openCostMonthlyCostQuery, + ) + + prometheus.withLegendFormat('Monthly Cost') + + prometheus.withInterval('1m'), + ] + ) + + tsStandardOptions.withUnit('currencyUSD') + + tsLegend.withShowLegend(false) + + tsCustom.withSpanNulls(false), + + local openCostResourceCostPieChartPanel = + pieChartPanel.new( + 'Cost by Resource' + ) + + pieQueryOptions.withTargets( + [ + prometheus.new( + '$datasource', + openCostMonthlyCpuCostQuery, + ) + + prometheus.withLegendFormat('CPU') + + prometheus.withInstant(true), + prometheus.new( + '$datasource', + openCostMonthlyRamCostQuery, + ) + + prometheus.withLegendFormat('RAM') + + prometheus.withInstant(true), + prometheus.new( + '$datasource', + openCostMonthlyPVCostQuery, + ) + + prometheus.withLegendFormat('PV') + + prometheus.withInstant(true), + ] + ) + + pieOptions.withPieType('pie') + + pieStandardOptions.withUnit('currencyUSD') + + pieOptions.legend.withAsTable(true) + + pieOptions.legend.withPlacement('right') + + pieOptions.legend.withDisplayMode('table') + + pieOptions.legend.withValues(['value', 'percent']) + + pieOptions.legend.withSortDesc(true), + + local openCostPodMonthlyCostQuery = ||| + topk(10, + sum( + sum(container_memory_allocation_bytes{job="$job", namespace=~"$namespace"}) by (instance, pod) + * on(instance) group_left() ( + node_ram_hourly_cost / 1024 / 1024 / 1024 * 730 + + on(node, instance_type) group_left() + label_replace + ( + kube_node_labels{job="$job"}, "instance_type", "$1", "label_node_kubernetes_io_instance_type", "(.*)" + ) * 0 + ) + + + sum(container_cpu_allocation{job="$job", namespace=~"$namespace"}) by (instance, pod) + * on(instance) group_left() ( + node_cpu_hourly_cost{job="$job"} * 730 + + on(node,instance_type) group_left() + label_replace + ( + kube_node_labels{job="$job"}, "instance_type", "$1", "label_node_kubernetes_io_instance_type", "(.*)" + ) * 0 + ) + ) by (pod) + ) + |||, + + local openCostPodMonthlyCostQueryOffset7d = std.strReplace(openCostPodMonthlyCostQuery, '{job=~"$job"}', '{job=~"$job"} offset 7d'), + local openCostPodMonthlyCostQueryOffset30d = std.strReplace(openCostPodMonthlyCostQuery, '{job=~"$job"}', '{job=~"$job"} offset 30d'), + + local openCostPodTable = + tablePanel.new( + 'Pod Monthly Cost', + ) + + tbStandardOptions.withUnit('currencyUSD') + + tbStandardOptions.thresholds.withSteps([ + tbStandardOptions.threshold.step.withValue(0) + + tbStandardOptions.threshold.step.withColor('green'), + tbStandardOptions.threshold.step.withValue(5) + + tbStandardOptions.threshold.step.withColor('yellow'), + tbStandardOptions.threshold.step.withValue(10) + + tbStandardOptions.threshold.step.withColor('red'), + ]) + + tbOptions.withSortBy( + tbOptions.sortBy.withDisplayName('Total Cost (Today)') + + tbOptions.sortBy.withDesc(true) + ) + + tbOptions.footer.withEnablePagination(true) + + tbQueryOptions.withTargets( + [ + prometheus.new( + '$datasource', + openCostPodMonthlyCostQuery, + ) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new( + '$datasource', + ||| + %s + / + %s + * 100 + - 100 + ||| % [ + openCostPodMonthlyCostQuery, + openCostPodMonthlyCostQueryOffset7d, + ], + ) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new( + '$datasource', + ||| + %s + / + %s + * 100 + - 100 + ||| % [ + openCostPodMonthlyCostQuery, + openCostPodMonthlyCostQueryOffset30d, + ], + ) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + ] + ) + + tbQueryOptions.withTransformations([ + tbQueryOptions.transformation.withId( + 'merge' + ), + tbQueryOptions.transformation.withId( + 'organize' + ) + + tbQueryOptions.transformation.withOptions( + { + renameByName: { + pod: 'Pod', + 'Value #A': 'Total Cost (Today)', + 'Value #B': 'Cost Difference (7d)', + 'Value #C': 'Cost Difference (30d)', + }, + indexByName: { + pod: 0, + 'Value #A': 1, + 'Value #B': 2, + 'Value #C': 3, + }, + excludeByName: { + Time: true, + job: true, + }, + } + ), + ]) + + tbStandardOptions.withOverrides([ + tbOverride.byName.new('Cost Difference (7d)') + + tbOverride.byName.withPropertiesFromOptions( + tbStandardOptions.withUnit('percent') + + tbFieldConfig.defaults.custom.withCellOptions( + { type: 'color-background' } // TODO(adinhodovic): Use jsonnet lib + ) + + tbStandardOptions.color.withMode('thresholds') + ), + tbOverride.byName.new('Cost Difference (30d)') + + tbOverride.byName.withPropertiesFromOptions( + tbStandardOptions.withUnit('percent') + + tbFieldConfig.defaults.custom.withCellOptions( + { type: 'color-background' } // TODO(adinhodovic): Use jsonnet lib + ) + + tbStandardOptions.color.withMode('thresholds') + ), + ]), + + local openCostPodCostPieChartPanel = + pieChartPanel.new( + 'Cost by Pod' + ) + + pieQueryOptions.withTargets( + prometheus.new( + '$datasource', + openCostPodMonthlyCostQuery, + ) + + prometheus.withLegendFormat('{{ pod }}') + + prometheus.withInstant(true), + ) + + pieOptions.withPieType('pie') + + pieStandardOptions.withUnit('currencyUSD') + + pieOptions.legend.withAsTable(true) + + pieOptions.legend.withPlacement('right') + + pieOptions.legend.withDisplayMode('table') + + pieOptions.legend.withValues(['value', 'percent']) + + pieOptions.legend.withSortDesc(true), + + local openCostContainerMonthlyCostQuery = ||| + topk(10, + sum( + sum(container_memory_allocation_bytes{job="$job", namespace=~"$namespace"}) by (instance, container) + * on(instance) group_left() ( + node_ram_hourly_cost / 1024 / 1024 / 1024 * 730 + + on(node,instance_type) group_left() + label_replace + ( + kube_node_labels{job="$job"}, "instance_type", "$1", "label_node_kubernetes_io_instance_type", "(.*)" + ) * 0 + ) + + + sum(container_cpu_allocation{job="$job", namespace=~"$namespace"}) by (instance, container) + * on(instance) group_left() ( + node_cpu_hourly_cost{job="$job"} * 730 + + on(node,instance_type) group_left() + label_replace + ( + kube_node_labels{job="$job"}, "instance_type", "$1", "label_node_kubernetes_io_instance_type", "(.*)" + ) * 0 + ) + ) by (container) + ) + |||, + + local openCostContainerMonthlyCostQueryOffset7d = std.strReplace(openCostContainerMonthlyCostQuery, '{job=~"$job"}', '{job=~"$job"} offset 7d'), + local openCostContainerMonthlyCostQueryOffset30d = std.strReplace(openCostContainerMonthlyCostQuery, '{job=~"$job"}', '{job=~"$job"} offset 30d'), + + local openCostContainerTable = + tablePanel.new( + 'Container Monthly Cost', + ) + + tbStandardOptions.withUnit('currencyUSD') + + tbStandardOptions.thresholds.withSteps([ + tbStandardOptions.threshold.step.withValue(0) + + tbStandardOptions.threshold.step.withColor('green'), + tbStandardOptions.threshold.step.withValue(5) + + tbStandardOptions.threshold.step.withColor('yellow'), + tbStandardOptions.threshold.step.withValue(10) + + tbStandardOptions.threshold.step.withColor('red'), + ]) + + tbOptions.withSortBy( + tbOptions.sortBy.withDisplayName('Total Cost (Today)') + + tbOptions.sortBy.withDesc(true) + ) + + tbOptions.footer.withEnablePagination(true) + + tbQueryOptions.withTargets( + [ + prometheus.new( + '$datasource', + openCostContainerMonthlyCostQuery, + ) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new( + '$datasource', + ||| + %s + / + %s + * 100 + - 100 + ||| % [ + openCostContainerMonthlyCostQuery, + openCostContainerMonthlyCostQueryOffset7d, + ], + ) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new( + '$datasource', + ||| + %s + / + %s + * 100 + - 100 + ||| % [ + openCostContainerMonthlyCostQuery, + openCostContainerMonthlyCostQueryOffset30d, + ], + ) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + ] + ) + + tbQueryOptions.withTransformations([ + tbQueryOptions.transformation.withId( + 'merge' + ), + tbQueryOptions.transformation.withId( + 'organize' + ) + + tbQueryOptions.transformation.withOptions( + { + renameByName: { + container: 'Container', + 'Value #A': 'Total Cost (Today)', + 'Value #B': 'Cost Difference (7d)', + 'Value #C': 'Cost Difference (30d)', + }, + indexByName: { + container: 0, + 'Value #A': 1, + 'Value #B': 2, + 'Value #C': 3, + }, + excludeByName: { + Time: true, + job: true, + }, + } + ), + ]) + + tbStandardOptions.withOverrides([ + tbOverride.byName.new('Cost Difference (7d)') + + tbOverride.byName.withPropertiesFromOptions( + tbStandardOptions.withUnit('percent') + + tbFieldConfig.defaults.custom.withCellOptions( + { type: 'color-background' } // TODO(adinhodovic): Use jsonnet lib + ) + + tbStandardOptions.color.withMode('thresholds') + ), + tbOverride.byName.new('Cost Difference (30d)') + + tbOverride.byName.withPropertiesFromOptions( + tbStandardOptions.withUnit('percent') + + tbFieldConfig.defaults.custom.withCellOptions( + { type: 'color-background' } // TODO(adinhodovic): Use jsonnet lib + ) + + tbStandardOptions.color.withMode('thresholds') + ), + ]), + + local openCostContainerCostPieChartPanel = + pieChartPanel.new( + 'Cost by Container' + ) + + pieQueryOptions.withTargets( + prometheus.new( + '$datasource', + openCostContainerMonthlyCostQuery, + ) + + prometheus.withLegendFormat('{{ container }}') + + prometheus.withInstant(true), + ) + + pieOptions.withPieType('pie') + + pieStandardOptions.withUnit('currencyUSD') + + pieOptions.legend.withAsTable(true) + + pieOptions.legend.withPlacement('right') + + pieOptions.legend.withDisplayMode('table') + + pieOptions.legend.withValues(['value', 'percent']) + + pieOptions.legend.withSortDesc(true), + + local openCostPVTotalGibByPvQuery = ||| + sum( + kube_persistentvolume_capacity_bytes{job=~"$job"} + / 1024 / 1024 / 1024 + ) by (persistentvolume) + * on(persistentvolume) group_left(namespace) ( + label_replace( + kube_persistentvolumeclaim_info{job=~"$job", namespace=~"$namespace"}, + "persistentvolume", "$1", "volumename", "(.*)" + ) + ) + |||, + + local openCostPVMonthlyCostByPvQuery = std.strReplace(openCostMonthlyPVCostQuery, '* 730', 'by (persistentvolume) * 730'), + + local openCostPvTable = + tablePanel.new( + 'Persistent Volumes Monthly Cost' + ) + + tbStandardOptions.withUnit('decgbytes') + + tbOptions.withSortBy( + tbOptions.sortBy.withDisplayName('Total Cost') + + tbOptions.sortBy.withDesc(true) + ) + + tbOptions.footer.withEnablePagination(true) + + tbQueryOptions.withTargets( + [ + prometheus.new( + '$datasource', + openCostPVTotalGibByPvQuery, + ) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new( + '$datasource', + openCostPVMonthlyCostByPvQuery, + ) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + ] + ) + + tbQueryOptions.withTransformations([ + tbQueryOptions.transformation.withId( + 'merge' + ), + tbQueryOptions.transformation.withId( + 'organize' + ) + + tbQueryOptions.transformation.withOptions( + { + renameByName: { + persistentvolume: 'Persistent Volume', + 'Value #A': 'Total GiB', + 'Value #B': 'Total Cost', + }, + indexByName: { + persistentvolume: 0, + 'Value #A': 1, + 'Value #B': 2, + }, + excludeByName: { + Time: true, + job: true, + namespace: true, + }, + } + ), + ]) + + tbStandardOptions.withOverrides([ + tbOverride.byName.new('Total Cost') + + tbOverride.byName.withPropertiesFromOptions( + tbStandardOptions.withUnit('currencyUSD') + ), + ]), + + local openCostPvCostPieChartPanel = + pieChartPanel.new( + 'Cost by PV' + ) + + pieQueryOptions.withTargets( + prometheus.new( + '$datasource', + openCostPVMonthlyCostByPvQuery, + ) + + prometheus.withLegendFormat('{{ persistentvolume }}') + + prometheus.withInstant(true), + ) + + pieOptions.withPieType('pie') + + pieStandardOptions.withUnit('currencyUSD') + + pieOptions.legend.withAsTable(true) + + pieOptions.legend.withPlacement('right') + + pieOptions.legend.withDisplayMode('table') + + pieOptions.legend.withValues(['value', 'percent']) + + pieOptions.legend.withSortDesc(true), + + local openCostSummaryRow = row.new( - title='Namespace Summary', + title=' Summary', ), - local openCostCloudResourcesRow = + local openCostPodRow = row.new( - title='Cloud Resources', + title='Pod Summary', + ), + + local openCostContainerRow = + row.new( + title='Container Summary', + ), + + local openCostPvRow = + row.new( + title='PV Summary', ), 'opencost-mixin-namespace.json': @@ -796,7 +805,7 @@ local pieQueryOptions = pieChartPanel.queryOptions; dashboard.new( 'OpenCost / Namespace', ) + - dashboard.withDescription('A dashboard that monitors OpenCost and focuses on namespace costs. It is created using the [OpenCost-mixin](https://github.com/adinhodovic/opencost-mixin).') + + dashboard.withDescription('A dashboard that monitors OpenCost and focuses on namespace costs. It is created using the [opencost-mixin](https://github.com/adinhodovic/opencost-mixin).') + dashboard.withUid($._config.openCostNamespaceDashboardUid) + dashboard.withTags($._config.tags) + dashboard.withTimezone('utc') + @@ -812,7 +821,7 @@ local pieQueryOptions = pieChartPanel.queryOptions; ) + dashboard.withPanels( [ - openCostNamespaceSummaryRow + + openCostSummaryRow + row.gridPos.withX(0) + row.gridPos.withY(0) + row.gridPos.withW(24) + @@ -820,66 +829,85 @@ local pieQueryOptions = pieChartPanel.queryOptions; ] + grid.makeGrid( [ - openCostNamespaceHourlyCostStatPanel, - openCostNamespaceDailyCostStatPanel, - openCostNamespaceMonthlyCostStatPanel, - openCostNamespaceMonthlyCpuCostStatPanel, - openCostNamespaceMonthlyRamCostStatPanel, - openCostNamespaceMonthlyPVCostStatPanel, + openCostHourlyCostStatPanel, + openCostDailyCostStatPanel, + openCostMonthlyCostStatPanel, + openCostMonthlyCpuCostStatPanel, + openCostMonthlyRamCostStatPanel, + openCostMonthlyPVCostStatPanel, ], panelWidth=4, panelHeight=3, startY=1 - ) - // grid.makeGrid( - // [ - // openCostHourlyCostTimeSeriesPanel, - // openCostDailyCostTimeSeriesPanel, - // openCostMonthlyCostTimeSeriesPanel, - // ], - // panelWidth=8, - // panelHeight=5, - // startY=5 - // ) + - // grid.makeGrid( - // [ - // openCostResourceCostPieChartPanel, - // openCostNamespaceCostPieChartPanel, - // openCostInstanceTypeCostPieChartPanel, - // ], - // panelWidth=8, - // panelHeight=5, - // startY=5 - // ) + - // [ - // openCostCloudResourcesRow + - // row.gridPos.withX(0) + - // row.gridPos.withY(15) + - // row.gridPos.withW(24) + - // row.gridPos.withH(1), - // ] + - // [ - // openCostNodeTable + - // tablePanel.gridPos.withX(0) + - // tablePanel.gridPos.withY(16) + - // tablePanel.gridPos.withW(16) + - // tablePanel.gridPos.withH(10), - // openCostPVTable + - // tablePanel.gridPos.withX(16) + - // tablePanel.gridPos.withY(16) + - // tablePanel.gridPos.withW(8) + - // tablePanel.gridPos.withH(10), - // openCostNamespaceSummaryRow + - // row.gridPos.withX(0) + - // row.gridPos.withY(17) + - // row.gridPos.withW(24) + - // row.gridPos.withH(1), - // openCostNamespaceTable + - // tablePanel.gridPos.withX(0) + - // tablePanel.gridPos.withY(18) + - // tablePanel.gridPos.withW(24) + - // tablePanel.gridPos.withH(12), - // ] + ) + + [ + openCostDailyCostTimeSeriesPanel + + timeSeriesPanel.gridPos.withX(0) + + timeSeriesPanel.gridPos.withY(4) + + timeSeriesPanel.gridPos.withW(9) + + timeSeriesPanel.gridPos.withH(5), + openCostMonthlyCostTimeSeriesPanel + + timeSeriesPanel.gridPos.withX(9) + + timeSeriesPanel.gridPos.withY(4) + + timeSeriesPanel.gridPos.withW(9) + + timeSeriesPanel.gridPos.withH(5), + openCostResourceCostPieChartPanel + + pieChartPanel.gridPos.withX(18) + + pieChartPanel.gridPos.withY(4) + + pieChartPanel.gridPos.withW(6) + + pieChartPanel.gridPos.withH(5), + ] + + [ + openCostPodRow + + row.gridPos.withX(0) + + row.gridPos.withY(9) + + row.gridPos.withW(24) + + row.gridPos.withH(1), + openCostPodTable + + tablePanel.gridPos.withX(0) + + tablePanel.gridPos.withY(10) + + tablePanel.gridPos.withW(18) + + tablePanel.gridPos.withH(10), + openCostPodCostPieChartPanel + + pieChartPanel.gridPos.withX(18) + + pieChartPanel.gridPos.withY(10) + + pieChartPanel.gridPos.withW(6) + + pieChartPanel.gridPos.withH(10), + ] + + [ + openCostContainerRow + + row.gridPos.withX(0) + + row.gridPos.withY(20) + + row.gridPos.withW(24) + + row.gridPos.withH(1), + openCostContainerTable + + tablePanel.gridPos.withX(0) + + tablePanel.gridPos.withY(21) + + tablePanel.gridPos.withW(18) + + tablePanel.gridPos.withH(10), + openCostContainerCostPieChartPanel + + pieChartPanel.gridPos.withX(18) + + pieChartPanel.gridPos.withY(21) + + pieChartPanel.gridPos.withW(6) + + pieChartPanel.gridPos.withH(10), + ] + + [ + openCostPvRow + + row.gridPos.withX(0) + + row.gridPos.withY(31) + + row.gridPos.withW(24) + + row.gridPos.withH(1), + openCostPvTable + + tablePanel.gridPos.withX(0) + + tablePanel.gridPos.withY(32) + + tablePanel.gridPos.withW(18) + + tablePanel.gridPos.withH(10), + openCostPvCostPieChartPanel + + pieChartPanel.gridPos.withX(18) + + pieChartPanel.gridPos.withY(32) + + pieChartPanel.gridPos.withW(6) + + pieChartPanel.gridPos.withH(10), + ] ) + if $._config.annotation.enabled then dashboard.withAnnotations($._config.customAnnotation) diff --git a/dashboards/opencost-overview.libsonnet b/dashboards/opencost-overview.libsonnet index c38b67e..7d05635 100644 --- a/dashboards/opencost-overview.libsonnet +++ b/dashboards/opencost-overview.libsonnet @@ -32,6 +32,7 @@ local tbOptions = tablePanel.options; local tbStandardOptions = tablePanel.standardOptions; local tbQueryOptions = tablePanel.queryOptions; local tbFieldConfig = tablePanel.fieldConfig; +local tbPanelOptions = tablePanel.panelOptions; local tbOverride = tbStandardOptions.override; // Pie Chart @@ -60,21 +61,9 @@ local pieQueryOptions = pieChartPanel.queryOptions; query.refresh.onLoad() + query.refresh.onTime(), - local namespaceVariable = - query.new( - 'namespace', - 'label_values(kube_namespace_labels, namespace)' - ) + - query.withDatasourceFromVariable(datasourceVariable) + - query.withSort(1) + - query.generalOptions.withLabel('Namespace') + - query.refresh.onLoad() + - query.refresh.onTime(), - local variables = [ datasourceVariable, jobVariable, - namespaceVariable, ], local openCostDailyCostQuery = ||| @@ -390,6 +379,128 @@ local pieQueryOptions = pieChartPanel.queryOptions; tsLegend.withShowLegend(false) + tsCustom.withSpanNulls(false), + local openCostTotalCostVariance7dQuery = ||| + 1 - + ( + avg_over_time( + sum(node_total_hourly_cost{job="$job"}) [7d:1h] + ) + / + avg_over_time( + sum(node_total_hourly_cost{job="$job"}) [1d:1h] + ) + ) + |||, + + local openCostTotalCostVariance30dQuery = ||| + 1 - + ( + avg_over_time( + sum(node_total_hourly_cost{job="$job"}) [30d:1h] + ) + / + avg_over_time( + sum(node_total_hourly_cost{job="$job"}) [1d:1h] + ) + ) + |||, + + local openCostTotalCostVarianceTimeSeriesPanel = + timeSeriesPanel.new( + 'Total Cost Variance', + ) + + tsQueryOptions.withTargets( + [ + prometheus.new( + '$datasource', + openCostTotalCostVariance7dQuery, + ) + + prometheus.withLegendFormat('Current hourly cost vs. 7-day average') + + prometheus.withInterval('10m'), + prometheus.new( + '$datasource', + openCostTotalCostVariance30dQuery, + ) + + prometheus.withLegendFormat('Current hourly cost vs. 30-day average') + + prometheus.withInterval('10m'), + ] + ) + + tsStandardOptions.withUnit('percentunit') + + tsLegend.withShowLegend(true) + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc') + + tsCustom.withSpanNulls(false), + + local openCostCpuCostVariance30dQuery = ||| + 1 - + ( + avg_over_time( + %s [30d:1h] + ) + / + avg_over_time( + %s [1d:1h] + ) + ) + ||| % [openCostMonthlyCpuCostQuery, openCostMonthlyCpuCostQuery], + + local openCostRamCostVariance30dQuery = ||| + 1 - + ( + avg_over_time( + %s [30d:1h] + ) + / + avg_over_time( + %s [1d:1h] + ) + ) + ||| % [openCostMonthlyRamCostQuery, openCostMonthlyRamCostQuery], + + local openCostPVCostVariance30dQuery = ||| + 1 - + ( + avg_over_time( + (%s) [30d:1h] + ) + / + avg_over_time( + (%s) [1d:1h] + ) + ) + ||| % [openCostMonthlyPVCostQuery, openCostMonthlyPVCostQuery], + + local openCostResourceCostVarianceTimeSeriesPanel = + timeSeriesPanel.new( + 'Resource Cost Variance', + ) + + tsQueryOptions.withTargets( + [ + prometheus.new( + '$datasource', + openCostCpuCostVariance30dQuery, + ) + + prometheus.withLegendFormat('Current CPU hourly cost vs. 30-day average') + + prometheus.withInterval('10m'), + prometheus.new( + '$datasource', + openCostRamCostVariance30dQuery, + ) + + prometheus.withLegendFormat('Current RAM hourly cost vs. 30-day average') + + prometheus.withInterval('10m'), + prometheus.new( + '$datasource', + openCostPVCostVariance30dQuery, + ) + + prometheus.withLegendFormat('Current PV hourly cost vs. 30-day average') + + prometheus.withInterval('10m'), + ] + ) + + tsStandardOptions.withUnit('percentunit') + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc') + + tsLegend.withShowLegend(true) + + tsCustom.withSpanNulls(false), local openCostResourceCostPieChartPanel = pieChartPanel.new( @@ -773,6 +884,19 @@ local pieQueryOptions = pieChartPanel.queryOptions; ) + tbStandardOptions.color.withMode('thresholds') ), + ]) + + tbStandardOptions.withOverrides([ + tbOverride.byName.new('Namespace') + + tbOverride.byName.withPropertiesFromOptions( + tbStandardOptions.withLinks( + tbPanelOptions.link.withTitle('Go To Namespace') + + tbPanelOptions.link.withType('dashboard') + + tbPanelOptions.link.withUrl( + '/d/%s/opencost-namespace?var-job=$job&var-namespace=${__data.fields.Namespace}' % $._config.openCostNamespaceDashboardUid + ) + + tbPanelOptions.link.withTargetBlank(true) + ) + ), ]), local openCostClusterSummaryRow = @@ -795,7 +919,7 @@ local pieQueryOptions = pieChartPanel.queryOptions; dashboard.new( 'OpenCost / Overview', ) + - dashboard.withDescription('A dashboard that monitors OpenCost and focuses on giving a overview for OpenCost. It is created using the [OpenCost-mixin](https://github.com/adinhodovic/opencost-mixin).') + + dashboard.withDescription('A dashboard that monitors OpenCost and focuses on giving a overview for OpenCost. It is created using the [opencost-mixin](https://github.com/adinhodovic/opencost-mixin).') + dashboard.withUid($._config.openCostOverviewDashboardUid) + dashboard.withTags($._config.tags) + dashboard.withTimezone('utc') + @@ -848,34 +972,43 @@ local pieQueryOptions = pieChartPanel.queryOptions; ], panelWidth=8, panelHeight=5, - startY=5 + startY=10 + ) + + grid.makeGrid( + [ + openCostTotalCostVarianceTimeSeriesPanel, + openCostResourceCostVarianceTimeSeriesPanel, + ], + panelWidth=12, + panelHeight=5, + startY=15 ) + [ openCostCloudResourcesRow + row.gridPos.withX(0) + - row.gridPos.withY(15) + + row.gridPos.withY(20) + row.gridPos.withW(24) + row.gridPos.withH(1), ] + [ openCostNodeTable + tablePanel.gridPos.withX(0) + - tablePanel.gridPos.withY(16) + + tablePanel.gridPos.withY(21) + tablePanel.gridPos.withW(16) + tablePanel.gridPos.withH(10), openCostPVTable + tablePanel.gridPos.withX(16) + - tablePanel.gridPos.withY(16) + + tablePanel.gridPos.withY(21) + tablePanel.gridPos.withW(8) + tablePanel.gridPos.withH(10), openCostNamespaceSummaryRow + row.gridPos.withX(0) + - row.gridPos.withY(17) + + row.gridPos.withY(31) + row.gridPos.withW(24) + row.gridPos.withH(1), openCostNamespaceTable + tablePanel.gridPos.withX(0) + - tablePanel.gridPos.withY(18) + + tablePanel.gridPos.withY(32) + tablePanel.gridPos.withW(24) + tablePanel.gridPos.withH(12), ] diff --git a/dashboards_out/opencost-mixin-namespace.json b/dashboards_out/opencost-mixin-namespace.json new file mode 100644 index 0000000..3a3177e --- /dev/null +++ b/dashboards_out/opencost-mixin-namespace.json @@ -0,0 +1,1106 @@ +{ + "__inputs": [ ], + "__requires": [ ], + "description": "A dashboard that monitors OpenCost and focuses on namespace costs. It is created using the [opencost-mixin](https://github.com/adinhodovic/opencost-mixin).", + "editable": true, + "links": [ + { + "tags": [ + "opencost", + "opencost-mixin" + ], + "targetBlank": true, + "title": "OpenCost", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "title": " Summary", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 2, + "thresholds": { + "steps": [ + { + "color": "red", + "value": 0 + }, + { + "color": "green", + "value": 0.10000000000000001 + } + ] + }, + "unit": "currencyUSD" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "graphMode": "none", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "showPercentChange": true + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n sum(\n container_memory_allocation_bytes{job=~\"$job\", namespace=~\"$namespace\"}\n )\n by (namespace, instance)\n * on(instance) group_left() (\n node_ram_hourly_cost{job=~\"$job\"}\n / 1024 / 1024 / 1024 * 1\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=~\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(\n container_cpu_allocation{job=~\"$job\", namespace=~\"$namespace\"}\n )\n by (namespace,instance)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=~\"$job\"}\n * 1\n + on(node, instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=~\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n) by (namespace)\n" + } + ], + "title": "Hourly Cost", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 2, + "thresholds": { + "steps": [ + { + "color": "red", + "value": 0 + }, + { + "color": "green", + "value": 0.10000000000000001 + } + ] + }, + "unit": "currencyUSD" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 3, + "options": { + "graphMode": "none", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "showPercentChange": true + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n sum(\n container_memory_allocation_bytes{job=~\"$job\", namespace=~\"$namespace\"}\n )\n by (namespace, instance)\n * on(instance) group_left() (\n node_ram_hourly_cost{job=~\"$job\"}\n / 1024 / 1024 / 1024 * 24\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=~\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(\n container_cpu_allocation{job=~\"$job\", namespace=~\"$namespace\"}\n )\n by (namespace,instance)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=~\"$job\"}\n * 24\n + on(node, instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=~\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n) by (namespace)\n" + } + ], + "title": "Daily Cost", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 2, + "thresholds": { + "steps": [ + { + "color": "red", + "value": 0 + }, + { + "color": "green", + "value": 0.10000000000000001 + } + ] + }, + "unit": "currencyUSD" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 4, + "options": { + "graphMode": "none", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "showPercentChange": true + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n sum(\n container_memory_allocation_bytes{job=~\"$job\", namespace=~\"$namespace\"}\n )\n by (namespace, instance)\n * on(instance) group_left() (\n node_ram_hourly_cost{job=~\"$job\"}\n / 1024 / 1024 / 1024 * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=~\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(\n container_cpu_allocation{job=~\"$job\", namespace=~\"$namespace\"}\n )\n by (namespace,instance)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=~\"$job\"}\n * 730\n + on(node, instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=~\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n) by (namespace)\n" + } + ], + "title": "Monthly Cost", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 2, + "thresholds": { + "steps": [ + { + "color": "red", + "value": 0 + }, + { + "color": "green", + "value": 0.10000000000000001 + } + ] + }, + "unit": "currencyUSD" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 5, + "options": { + "graphMode": "none", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "showPercentChange": true + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n sum(\n container_cpu_allocation{job=~\"$job\", namespace=~\"$namespace\"}\n )\n by (namespace,instance)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=~\"$job\"}\n * 730\n + on(node, instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=~\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n)\n" + } + ], + "title": "Monthly CPU Cost", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 2, + "thresholds": { + "steps": [ + { + "color": "red", + "value": 0 + }, + { + "color": "green", + "value": 0.10000000000000001 + } + ] + }, + "unit": "currencyUSD" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 6, + "options": { + "graphMode": "none", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "showPercentChange": true + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n sum(\n container_memory_allocation_bytes{job=~\"$job\", namespace=~\"$namespace\"}\n )\n by (namespace, instance)\n * on(instance) group_left() (\n node_ram_hourly_cost{job=~\"$job\"}\n / 1024 / 1024 / 1024 * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=~\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n)\n" + } + ], + "title": "Monthly Ram Cost", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 2, + "thresholds": { + "steps": [ + { + "color": "red", + "value": 0 + }, + { + "color": "green", + "value": 0.10000000000000001 + } + ] + }, + "unit": "currencyUSD" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 7, + "options": { + "graphMode": "none", + "percentChangeColorMode": "inverted", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "showPercentChange": true + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n sum(\n kube_persistentvolume_capacity_bytes{job=~\"$job\"}\n / 1024 / 1024 / 1024\n ) by (persistentvolume)\n *\n sum(\n pv_hourly_cost{job=~\"$job\"}\n ) by (persistentvolume)\n * on(persistentvolume) group_left(namespace) (\n label_replace(\n kube_persistentvolumeclaim_info{job=~\"$job\", namespace=~\"$namespace\"},\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\"\n )\n )\n) * 730\n" + } + ], + "title": "Monthly PV Cost", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "spanNulls": false + }, + "unit": "currencyUSD" + } + }, + "gridPos": { + "h": 5, + "w": 9, + "x": 0, + "y": 4 + }, + "id": 8, + "options": { + "legend": { + "showLegend": false + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n sum(\n container_memory_allocation_bytes{job=~\"$job\", namespace=~\"$namespace\"}\n )\n by (namespace, instance)\n * on(instance) group_left() (\n node_ram_hourly_cost{job=~\"$job\"}\n / 1024 / 1024 / 1024 * 24\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=~\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(\n container_cpu_allocation{job=~\"$job\", namespace=~\"$namespace\"}\n )\n by (namespace,instance)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=~\"$job\"}\n * 24\n + on(node, instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=~\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n) by (namespace)\n", + "interval": "1m", + "legendFormat": "Daily Cost" + } + ], + "title": "Daily Cost", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "spanNulls": false + }, + "unit": "currencyUSD" + } + }, + "gridPos": { + "h": 5, + "w": 9, + "x": 9, + "y": 4 + }, + "id": 9, + "options": { + "legend": { + "showLegend": false + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n sum(\n container_memory_allocation_bytes{job=~\"$job\", namespace=~\"$namespace\"}\n )\n by (namespace, instance)\n * on(instance) group_left() (\n node_ram_hourly_cost{job=~\"$job\"}\n / 1024 / 1024 / 1024 * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=~\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(\n container_cpu_allocation{job=~\"$job\", namespace=~\"$namespace\"}\n )\n by (namespace,instance)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=~\"$job\"}\n * 730\n + on(node, instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=~\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n) by (namespace)\n", + "interval": "1m", + "legendFormat": "Monthly Cost" + } + ], + "title": "Monthly Cost", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "currencyUSD" + } + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 4 + }, + "id": 10, + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "sortDesc": true, + "values": [ + "value", + "percent" + ] + }, + "pieType": "pie" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n sum(\n container_cpu_allocation{job=~\"$job\", namespace=~\"$namespace\"}\n )\n by (namespace,instance)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=~\"$job\"}\n * 730\n + on(node, instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=~\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n)\n", + "instant": true, + "legendFormat": "CPU" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n sum(\n container_memory_allocation_bytes{job=~\"$job\", namespace=~\"$namespace\"}\n )\n by (namespace, instance)\n * on(instance) group_left() (\n node_ram_hourly_cost{job=~\"$job\"}\n / 1024 / 1024 / 1024 * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=~\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n)\n", + "instant": true, + "legendFormat": "RAM" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n sum(\n kube_persistentvolume_capacity_bytes{job=~\"$job\"}\n / 1024 / 1024 / 1024\n ) by (persistentvolume)\n *\n sum(\n pv_hourly_cost{job=~\"$job\"}\n ) by (persistentvolume)\n * on(persistentvolume) group_left(namespace) (\n label_replace(\n kube_persistentvolumeclaim_info{job=~\"$job\", namespace=~\"$namespace\"},\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\"\n )\n )\n) * 730\n", + "instant": true, + "legendFormat": "PV" + } + ], + "title": "Cost by Resource", + "type": "piechart" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 11, + "title": "Pod Summary", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "thresholds": { + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "currencyUSD" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Cost Difference (7d)" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "thresholds" + } + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + }, + { + "id": "unit", + "value": "percent" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cost Difference (30d)" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "thresholds" + } + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + }, + { + "id": "unit", + "value": "percent" + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 18, + "x": 0, + "y": 10 + }, + "id": 12, + "options": { + "footer": { + "enablePagination": true + }, + "sortBy": [ + { + "desc": true, + "displayName": "Total Cost (Today)" + } + ] + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "topk(10,\n sum(\n sum(container_memory_allocation_bytes{job=\"$job\", namespace=~\"$namespace\"}) by (instance, pod)\n * on(instance) group_left() (\n node_ram_hourly_cost / 1024 / 1024 / 1024 * 730\n + on(node, instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(container_cpu_allocation{job=\"$job\", namespace=~\"$namespace\"}) by (instance, pod)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=\"$job\"} * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n ) by (pod)\n)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "topk(10,\n sum(\n sum(container_memory_allocation_bytes{job=\"$job\", namespace=~\"$namespace\"}) by (instance, pod)\n * on(instance) group_left() (\n node_ram_hourly_cost / 1024 / 1024 / 1024 * 730\n + on(node, instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(container_cpu_allocation{job=\"$job\", namespace=~\"$namespace\"}) by (instance, pod)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=\"$job\"} * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n ) by (pod)\n)\n\n/\ntopk(10,\n sum(\n sum(container_memory_allocation_bytes{job=\"$job\", namespace=~\"$namespace\"}) by (instance, pod)\n * on(instance) group_left() (\n node_ram_hourly_cost / 1024 / 1024 / 1024 * 730\n + on(node, instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(container_cpu_allocation{job=\"$job\", namespace=~\"$namespace\"}) by (instance, pod)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=\"$job\"} * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n ) by (pod)\n)\n\n* 100\n- 100\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "topk(10,\n sum(\n sum(container_memory_allocation_bytes{job=\"$job\", namespace=~\"$namespace\"}) by (instance, pod)\n * on(instance) group_left() (\n node_ram_hourly_cost / 1024 / 1024 / 1024 * 730\n + on(node, instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(container_cpu_allocation{job=\"$job\", namespace=~\"$namespace\"}) by (instance, pod)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=\"$job\"} * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n ) by (pod)\n)\n\n/\ntopk(10,\n sum(\n sum(container_memory_allocation_bytes{job=\"$job\", namespace=~\"$namespace\"}) by (instance, pod)\n * on(instance) group_left() (\n node_ram_hourly_cost / 1024 / 1024 / 1024 * 730\n + on(node, instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(container_cpu_allocation{job=\"$job\", namespace=~\"$namespace\"}) by (instance, pod)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=\"$job\"} * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n ) by (pod)\n)\n\n* 100\n- 100\n", + "format": "table", + "instant": true + } + ], + "title": "Pod Monthly Cost", + "transformations": [ + { + "id": "merge" + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "job": true + }, + "indexByName": { + "Value #A": 1, + "Value #B": 2, + "Value #C": 3, + "pod": 0 + }, + "renameByName": { + "Value #A": "Total Cost (Today)", + "Value #B": "Cost Difference (7d)", + "Value #C": "Cost Difference (30d)", + "pod": "Pod" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "currencyUSD" + } + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 18, + "y": 10 + }, + "id": 13, + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "sortDesc": true, + "values": [ + "value", + "percent" + ] + }, + "pieType": "pie" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "topk(10,\n sum(\n sum(container_memory_allocation_bytes{job=\"$job\", namespace=~\"$namespace\"}) by (instance, pod)\n * on(instance) group_left() (\n node_ram_hourly_cost / 1024 / 1024 / 1024 * 730\n + on(node, instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(container_cpu_allocation{job=\"$job\", namespace=~\"$namespace\"}) by (instance, pod)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=\"$job\"} * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n ) by (pod)\n)\n", + "instant": true, + "legendFormat": "{{ pod }}" + } + ], + "title": "Cost by Pod", + "type": "piechart" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 14, + "title": "Container Summary", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "thresholds": { + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "currencyUSD" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Cost Difference (7d)" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "thresholds" + } + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + }, + { + "id": "unit", + "value": "percent" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cost Difference (30d)" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "thresholds" + } + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + }, + { + "id": "unit", + "value": "percent" + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 18, + "x": 0, + "y": 21 + }, + "id": 15, + "options": { + "footer": { + "enablePagination": true + }, + "sortBy": [ + { + "desc": true, + "displayName": "Total Cost (Today)" + } + ] + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "topk(10,\n sum(\n sum(container_memory_allocation_bytes{job=\"$job\", namespace=~\"$namespace\"}) by (instance, container)\n * on(instance) group_left() (\n node_ram_hourly_cost / 1024 / 1024 / 1024 * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(container_cpu_allocation{job=\"$job\", namespace=~\"$namespace\"}) by (instance, container)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=\"$job\"} * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n ) by (container)\n)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "topk(10,\n sum(\n sum(container_memory_allocation_bytes{job=\"$job\", namespace=~\"$namespace\"}) by (instance, container)\n * on(instance) group_left() (\n node_ram_hourly_cost / 1024 / 1024 / 1024 * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(container_cpu_allocation{job=\"$job\", namespace=~\"$namespace\"}) by (instance, container)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=\"$job\"} * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n ) by (container)\n)\n\n/\ntopk(10,\n sum(\n sum(container_memory_allocation_bytes{job=\"$job\", namespace=~\"$namespace\"}) by (instance, container)\n * on(instance) group_left() (\n node_ram_hourly_cost / 1024 / 1024 / 1024 * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(container_cpu_allocation{job=\"$job\", namespace=~\"$namespace\"}) by (instance, container)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=\"$job\"} * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n ) by (container)\n)\n\n* 100\n- 100\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "topk(10,\n sum(\n sum(container_memory_allocation_bytes{job=\"$job\", namespace=~\"$namespace\"}) by (instance, container)\n * on(instance) group_left() (\n node_ram_hourly_cost / 1024 / 1024 / 1024 * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(container_cpu_allocation{job=\"$job\", namespace=~\"$namespace\"}) by (instance, container)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=\"$job\"} * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n ) by (container)\n)\n\n/\ntopk(10,\n sum(\n sum(container_memory_allocation_bytes{job=\"$job\", namespace=~\"$namespace\"}) by (instance, container)\n * on(instance) group_left() (\n node_ram_hourly_cost / 1024 / 1024 / 1024 * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(container_cpu_allocation{job=\"$job\", namespace=~\"$namespace\"}) by (instance, container)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=\"$job\"} * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n ) by (container)\n)\n\n* 100\n- 100\n", + "format": "table", + "instant": true + } + ], + "title": "Container Monthly Cost", + "transformations": [ + { + "id": "merge" + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "job": true + }, + "indexByName": { + "Value #A": 1, + "Value #B": 2, + "Value #C": 3, + "container": 0 + }, + "renameByName": { + "Value #A": "Total Cost (Today)", + "Value #B": "Cost Difference (7d)", + "Value #C": "Cost Difference (30d)", + "container": "Container" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "currencyUSD" + } + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 18, + "y": 21 + }, + "id": 16, + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "sortDesc": true, + "values": [ + "value", + "percent" + ] + }, + "pieType": "pie" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "topk(10,\n sum(\n sum(container_memory_allocation_bytes{job=\"$job\", namespace=~\"$namespace\"}) by (instance, container)\n * on(instance) group_left() (\n node_ram_hourly_cost / 1024 / 1024 / 1024 * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n +\n sum(container_cpu_allocation{job=\"$job\", namespace=~\"$namespace\"}) by (instance, container)\n * on(instance) group_left() (\n node_cpu_hourly_cost{job=\"$job\"} * 730\n + on(node,instance_type) group_left()\n label_replace\n (\n kube_node_labels{job=\"$job\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.*)\"\n ) * 0\n )\n ) by (container)\n)\n", + "instant": true, + "legendFormat": "{{ container }}" + } + ], + "title": "Cost by Container", + "type": "piechart" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 17, + "title": "PV Summary", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "decgbytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total Cost" + }, + "properties": [ + { + "id": "unit", + "value": "currencyUSD" + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 18, + "x": 0, + "y": 32 + }, + "id": 18, + "options": { + "footer": { + "enablePagination": true + }, + "sortBy": [ + { + "desc": true, + "displayName": "Total Cost" + } + ] + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n kube_persistentvolume_capacity_bytes{job=~\"$job\"}\n / 1024 / 1024 / 1024\n) by (persistentvolume)\n* on(persistentvolume) group_left(namespace) (\n label_replace(\n kube_persistentvolumeclaim_info{job=~\"$job\", namespace=~\"$namespace\"},\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\"\n )\n)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n sum(\n kube_persistentvolume_capacity_bytes{job=~\"$job\"}\n / 1024 / 1024 / 1024\n ) by (persistentvolume)\n *\n sum(\n pv_hourly_cost{job=~\"$job\"}\n ) by (persistentvolume)\n * on(persistentvolume) group_left(namespace) (\n label_replace(\n kube_persistentvolumeclaim_info{job=~\"$job\", namespace=~\"$namespace\"},\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\"\n )\n )\n) by (persistentvolume) * 730\n", + "format": "table", + "instant": true + } + ], + "title": "Persistent Volumes Monthly Cost", + "transformations": [ + { + "id": "merge" + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "job": true, + "namespace": true + }, + "indexByName": { + "Value #A": 1, + "Value #B": 2, + "persistentvolume": 0 + }, + "renameByName": { + "Value #A": "Total GiB", + "Value #B": "Total Cost", + "persistentvolume": "Persistent Volume" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "currencyUSD" + } + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 18, + "y": 32 + }, + "id": 19, + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "sortDesc": true, + "values": [ + "value", + "percent" + ] + }, + "pieType": "pie" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n sum(\n kube_persistentvolume_capacity_bytes{job=~\"$job\"}\n / 1024 / 1024 / 1024\n ) by (persistentvolume)\n *\n sum(\n pv_hourly_cost{job=~\"$job\"}\n ) by (persistentvolume)\n * on(persistentvolume) group_left(namespace) (\n label_replace(\n kube_persistentvolumeclaim_info{job=~\"$job\", namespace=~\"$namespace\"},\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\"\n )\n )\n) by (persistentvolume) * 730\n", + "instant": true, + "legendFormat": "{{ persistentvolume }}" + } + ], + "title": "Cost by PV", + "type": "piechart" + } + ], + "schemaVersion": 39, + "tags": [ + "opencost", + "opencost-mixin" + ], + "templating": { + "list": [ + { + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "label": "Job", + "name": "job", + "query": "label_values(opencost_build_info, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "label": "Namespace", + "name": "namespace", + "query": "label_values(kube_namespace_labels, namespace)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timezone": "utc", + "title": "OpenCost / Namespace", + "uid": "opencost-mixin-namespace-jkwq" +} diff --git a/dashboards_out/opencost-overview.json b/dashboards_out/opencost-overview.json index 57c415e..70648f5 100644 --- a/dashboards_out/opencost-overview.json +++ b/dashboards_out/opencost-overview.json @@ -1,7 +1,7 @@ { "__inputs": [ ], "__requires": [ ], - "description": "A dashboard that monitors OpenCost and focuses on giving a overview for OpenCost. It is created using the [OpenCost-mixin](https://github.com/adinhodovic/opencost-mixin).", + "description": "A dashboard that monitors OpenCost and focuses on giving a overview for OpenCost. It is created using the [opencost-mixin](https://github.com/adinhodovic/opencost-mixin).", "editable": true, "links": [ { @@ -34,6 +34,7 @@ }, "fieldConfig": { "defaults": { + "decimals": 2, "thresholds": { "steps": [ { @@ -86,6 +87,7 @@ }, "fieldConfig": { "defaults": { + "decimals": 2, "thresholds": { "steps": [ { @@ -138,6 +140,7 @@ }, "fieldConfig": { "defaults": { + "decimals": 2, "thresholds": { "steps": [ { @@ -190,6 +193,7 @@ }, "fieldConfig": { "defaults": { + "decimals": 2, "thresholds": { "steps": [ { @@ -242,6 +246,7 @@ }, "fieldConfig": { "defaults": { + "decimals": 2, "thresholds": { "steps": [ { @@ -294,6 +299,7 @@ }, "fieldConfig": { "defaults": { + "decimals": 2, "thresholds": { "steps": [ { @@ -473,7 +479,7 @@ "h": 5, "w": 8, "x": 0, - "y": 5 + "y": 10 }, "id": 11, "options": { @@ -536,7 +542,7 @@ "h": 5, "w": 8, "x": 8, - "y": 5 + "y": 10 }, "id": 12, "options": { @@ -581,7 +587,7 @@ "h": 5, "w": 8, "x": 16, - "y": 5 + "y": 10 }, "id": 13, "options": { @@ -612,15 +618,130 @@ "title": "Cost by Instance Type", "type": "piechart" }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "spanNulls": false + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 14, + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "1 -\n(\n avg_over_time(\n sum(node_total_hourly_cost{job=\"$job\"}) [1d:1h]\n )\n /\n avg_over_time(\n sum(node_total_hourly_cost{job=\"$job\"}) [7d:1h]\n )\n)\n", + "interval": "10m", + "legendFormat": "Current hourly cost vs. 7-day average" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "1 -\n(\n avg_over_time(\n sum(node_total_hourly_cost{job=\"$job\"}) [1d:1h]\n )\n /\n avg_over_time(\n sum(node_total_hourly_cost{job=\"$job\"}) [30d:1h]\n )\n)\n", + "interval": "10m", + "legendFormat": "Current hourly cost vs. 30-day average" + } + ], + "title": "Total Cost Variance", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "spanNulls": false + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 15, + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "1 -\n(\n avg_over_time(\n sum(\n sum(\n kube_node_status_capacity{\n job=~\"$job\",\n resource=\"cpu\",\n unit=\"core\"\n }\n ) by (node)\n *\n sum(\n node_cpu_hourly_cost{\n job=~\"$job\"\n }\n ) by (node)\n * 730\n)\n [1d:1h]\n )\n /\n avg_over_time(\n sum(\n sum(\n kube_node_status_capacity{\n job=~\"$job\",\n resource=\"cpu\",\n unit=\"core\"\n }\n ) by (node)\n *\n sum(\n node_cpu_hourly_cost{\n job=~\"$job\"\n }\n ) by (node)\n * 730\n)\n [30d:1h]\n )\n)\n", + "interval": "10m", + "legendFormat": "Current CPU hourly cost vs. 30-day average" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "1 -\n(\n avg_over_time(\n sum(\n sum(\n kube_node_status_capacity{\n job=~\"$job\",\n resource=\"memory\",\n unit=\"byte\"\n }\n ) by (node)\n / 1024 / 1024 / 1024\n *\n sum(\n node_ram_hourly_cost{\n job=~\"$job\"\n }\n ) by (node)\n * 730\n)\n [1d:1h]\n )\n /\n avg_over_time(\n sum(\n sum(\n kube_node_status_capacity{\n job=~\"$job\",\n resource=\"memory\",\n unit=\"byte\"\n }\n ) by (node)\n / 1024 / 1024 / 1024\n *\n sum(\n node_ram_hourly_cost{\n job=~\"$job\"\n }\n ) by (node)\n * 730\n)\n [30d:1h]\n )\n)\n", + "interval": "10m", + "legendFormat": "Current RAM hourly cost vs. 30-day average" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "1 -\n(\n avg_over_time(\n (sum(\n sum(\n kube_persistentvolume_capacity_bytes{\n job=~\"$job\"\n }\n / 1024 / 1024 / 1024\n ) by (persistentvolume)\n *\n sum(\n pv_hourly_cost{\n job=~\"$job\"\n }\n ) by (persistentvolume)\n) * 730\n) [1d:1h]\n )\n /\n avg_over_time(\n (sum(\n sum(\n kube_persistentvolume_capacity_bytes{\n job=~\"$job\"\n }\n / 1024 / 1024 / 1024\n ) by (persistentvolume)\n *\n sum(\n pv_hourly_cost{\n job=~\"$job\"\n }\n ) by (persistentvolume)\n) * 730\n) [30d:1h]\n )\n)\n", + "interval": "10m", + "legendFormat": "Current PV hourly cost vs. 30-day average" + } + ], + "title": "Resource Cost Variance", + "type": "timeseries" + }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 15 + "y": 20 }, - "id": 14, + "id": 16, "title": "Cloud Resources", "type": "row" }, @@ -638,9 +759,9 @@ "h": 10, "w": 16, "x": 0, - "y": 16 + "y": 21 }, - "id": 15, + "id": 17, "options": { "footer": { "enablePagination": true @@ -743,9 +864,9 @@ "h": 10, "w": 8, "x": 16, - "y": 16 + "y": 21 }, - "id": 16, + "id": 18, "options": { "footer": { "enablePagination": true @@ -811,9 +932,9 @@ "h": 1, "w": 24, "x": 0, - "y": 17 + "y": 31 }, - "id": 17, + "id": 19, "title": "Namespace Summary", "type": "row" }, @@ -846,48 +967,19 @@ { "matcher": { "id": "byName", - "options": "Cost Difference (7d)" + "options": "Namespace" }, "properties": [ { - "id": "color", - "value": { - "mode": "thresholds" - } - }, - { - "id": "custom.cellOptions", - "value": { - "type": "color-background" - } - }, - { - "id": "unit", - "value": "percent" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cost Difference (30d)" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "thresholds" - } - }, - { - "id": "custom.cellOptions", - "value": { - "type": "color-background" - } - }, - { - "id": "unit", - "value": "percent" + "id": "links", + "value": [ + { + "targetBlank": true, + "title": "Go To Namespace", + "type": "dashboard", + "url": "/d/opencost-mixin-namespace-jkwq/opencost-namespace?var-job=$job&var-namespace=${__data.fields.Namespace}" + } + ] } ] } @@ -897,9 +989,9 @@ "h": 12, "w": 24, "x": 0, - "y": 18 + "y": 32 }, - "id": 18, + "id": 20, "options": { "footer": { "enablePagination": true @@ -995,18 +1087,6 @@ "refresh": 2, "sort": 1, "type": "query" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "label": "Namespace", - "name": "namespace", - "query": "label_values(kube_namespace_labels, namespace)", - "refresh": 2, - "sort": 1, - "type": "query" } ] }, diff --git a/prometheus_alerts.yaml b/prometheus_alerts.yaml index 149bc78..d3d8959 100644 --- a/prometheus_alerts.yaml +++ b/prometheus_alerts.yaml @@ -37,3 +37,30 @@ "for": "30m" "labels": "severity": "warning" + - "alert": "OpenCostAnomalyDetected" + "annotations": + "dashboard_url": "https://grafana.com/d/opencost-mixin-kover-jkwq/opencost-overview" + "description": "A significant increase in cluster costs has been detected. The average hourly cost over the last day exceeds the 7-day average by more than 20%. This could indicate unexpected resource usage or cost-related changes in the cluster." + "summary": "OpenCost Cost Anomaly Detected" + "expr": | + 1 - + ( + avg_over_time( + sum( + node_total_hourly_cost{ + job=~"opencost" + } + ) [7d:1h] + ) + / + avg_over_time( + sum( + node_total_hourly_cost{ + job=~"opencost" + } + ) [3h:30m] + ) + ) > 0.20000000000000001 + "for": "10m" + "labels": + "severity": "warning" diff --git a/tests.yaml b/tests.yaml index e864fd6..3f9b652 100644 --- a/tests.yaml +++ b/tests.yaml @@ -34,3 +34,17 @@ tests: summary: "OpenCost Monthly Budget Exceeded" description: "The monthly budget for the cluster has been exceeded. Consider scaling down resources or increasing the budget." dashboard_url: "https://grafana.com/d/opencost-mixin-kover-jkwq/opencost-overview" + - interval: 1h + input_series: + - series: 'node_total_hourly_cost{job="opencost", node="test-node"}' + values: "1x3 2000x3" + alert_rule_test: + - eval_time: 6h + alertname: OpenCostAnomalyDetected + exp_alerts: + - exp_labels: + severity: warning + exp_annotations: + summary: "OpenCost Cost Anomaly Detected" + description: "A significant increase in cluster costs has been detected. The average hourly cost over the last day exceeds the 7-day average by more than 20%. This could indicate unexpected resource usage or cost-related changes in the cluster." + dashboard_url: "https://grafana.com/d/opencost-mixin-kover-jkwq/opencost-overview"