From 000ba787784a0d3a0b840b9384a08eea3614336b Mon Sep 17 00:00:00 2001 From: Alexi Doak <109488926+doakalexi@users.noreply.github.com> Date: Mon, 23 Jan 2023 13:27:39 -0500 Subject: [PATCH] [ResponseOps][flapping] change action behavior when flapping (#147810) Resolves https://github.com/elastic/kibana/issues/143445 ## Summary This pr changes the behavior of alerting actions respecting flapping. To recover a flapping alert the alert must be reported as recovered for 4 (this will be configurable eventually) consecutive runs before the framework makes the alert as recovered. Link to a spreadsheet that helps vizualize the changes: https://docs.google.com/spreadsheets/d/1amoUDFx-Sdxirnabd_E4OSnyur57sLhY6Ce79oDYgJA/edit#gid=0 Also Resolves https://github.com/elastic/kibana/issues/147205 to recover those alerts ### Checklist - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios ### To verify - Create an alert and let it run until it gets into a flapping state (changes from active/recovered at least 4 times) - Once flapping, verify the following cases: - Verify that a recovered alert will be returned as an active notification 3 times before finally recovering - Verify that an alert that does not recover 3 times before an active alert is reported never reports as a recovered alert Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com> Co-authored-by: Ying Mao --- .../plugins/alerting/common/alert_instance.ts | 1 + .../alerting/server/alert/alert.test.ts | 44 +++++- x-pack/plugins/alerting/server/alert/alert.ts | 15 ++ .../legacy_alerts_client.test.ts | 28 +++- .../alerts_client/legacy_alerts_client.ts | 39 +++-- .../alerting/server/lib/flapping_utils.ts | 2 +- .../lib/get_alerts_for_notification.test.ts | 146 ++++++++++++++++++ .../server/lib/get_alerts_for_notification.ts | 69 +++++++++ x-pack/plugins/alerting/server/lib/index.ts | 2 + .../server/lib/trim_recovered_alerts.test.ts | 78 ++++++++++ .../server/lib/trim_recovered_alerts.ts | 76 +++++++++ .../alerting/server/task_runner/fixtures.ts | 1 + .../server/task_runner/task_runner.test.ts | 3 + .../metric_threshold_executor.test.ts | 2 + .../utils/create_lifecycle_executor.test.ts | 28 +++- .../server/utils/create_lifecycle_executor.ts | 26 +++- .../utils/get_alerts_for_notification.test.ts | 85 ++++++++++ .../utils/get_alerts_for_notification.ts | 36 +++++ .../get_updated_flapping_history.test.ts | 4 + .../spaces_only/tests/alerting/event_log.ts | 29 +++- .../tests/alerting/event_log_alerts.ts | 4 +- 21 files changed, 688 insertions(+), 30 deletions(-) create mode 100644 x-pack/plugins/alerting/server/lib/get_alerts_for_notification.test.ts create mode 100644 x-pack/plugins/alerting/server/lib/get_alerts_for_notification.ts create mode 100644 x-pack/plugins/alerting/server/lib/trim_recovered_alerts.test.ts create mode 100644 x-pack/plugins/alerting/server/lib/trim_recovered_alerts.ts create mode 100644 x-pack/plugins/rule_registry/server/utils/get_alerts_for_notification.test.ts create mode 100644 x-pack/plugins/rule_registry/server/utils/get_alerts_for_notification.ts diff --git a/x-pack/plugins/alerting/common/alert_instance.ts b/x-pack/plugins/alerting/common/alert_instance.ts index a72479f42b523..7aad3a05de6f2 100644 --- a/x-pack/plugins/alerting/common/alert_instance.ts +++ b/x-pack/plugins/alerting/common/alert_instance.ts @@ -36,6 +36,7 @@ const metaSchema = t.partial({ flappingHistory: t.array(t.boolean), // flapping flag that indicates whether the alert is flapping flapping: t.boolean, + pendingRecoveredCount: t.number, }); export type AlertInstanceMeta = t.TypeOf; diff --git a/x-pack/plugins/alerting/server/alert/alert.test.ts b/x-pack/plugins/alerting/server/alert/alert.test.ts index b668918e1a826..479c5a4a55412 100644 --- a/x-pack/plugins/alerting/server/alert/alert.test.ts +++ b/x-pack/plugins/alerting/server/alert/alert.test.ts @@ -414,11 +414,12 @@ describe('toJSON', () => { }, flappingHistory: [false, true], flapping: false, + pendingRecoveredCount: 2, }, } ); expect(JSON.stringify(alertInstance)).toEqual( - '{"state":{"foo":true},"meta":{"lastScheduledActions":{"date":"1970-01-01T00:00:00.000Z","group":"default"},"flappingHistory":[false,true],"flapping":false}}' + '{"state":{"foo":true},"meta":{"lastScheduledActions":{"date":"1970-01-01T00:00:00.000Z","group":"default"},"flappingHistory":[false,true],"flapping":false,"pendingRecoveredCount":2}}' ); }); }); @@ -433,6 +434,7 @@ describe('toRaw', () => { group: 'default', }, flappingHistory: [false, true, true], + pendingRecoveredCount: 2, }, }; const alertInstance = new Alert( @@ -529,3 +531,43 @@ describe('getFlapping', () => { expect(alert.getFlapping()).toEqual(true); }); }); + +describe('incrementPendingRecoveredCount', () => { + test('correctly increments pendingRecoveredCount', () => { + const alert = new Alert('1', { + meta: { pendingRecoveredCount: 3 }, + }); + alert.incrementPendingRecoveredCount(); + expect(alert.getPendingRecoveredCount()).toEqual(4); + }); + + test('correctly increments pendingRecoveredCount when it is not already defined', () => { + const alert = new Alert('1'); + alert.incrementPendingRecoveredCount(); + expect(alert.getPendingRecoveredCount()).toEqual(1); + }); +}); + +describe('getPendingRecoveredCount', () => { + test('returns pendingRecoveredCount', () => { + const alert = new Alert('1', { + meta: { pendingRecoveredCount: 3 }, + }); + expect(alert.getPendingRecoveredCount()).toEqual(3); + }); + + test('defines and returns pendingRecoveredCount when it is not already defined', () => { + const alert = new Alert('1'); + expect(alert.getPendingRecoveredCount()).toEqual(0); + }); +}); + +describe('resetPendingRecoveredCount', () => { + test('resets pendingRecoveredCount to 0', () => { + const alert = new Alert('1', { + meta: { pendingRecoveredCount: 3 }, + }); + alert.resetPendingRecoveredCount(); + expect(alert.getPendingRecoveredCount()).toEqual(0); + }); +}); diff --git a/x-pack/plugins/alerting/server/alert/alert.ts b/x-pack/plugins/alerting/server/alert/alert.ts index 7974f1906f65c..361d3d0c52258 100644 --- a/x-pack/plugins/alerting/server/alert/alert.ts +++ b/x-pack/plugins/alerting/server/alert/alert.ts @@ -225,4 +225,19 @@ export class Alert< getFlapping() { return this.meta.flapping || false; } + + incrementPendingRecoveredCount() { + if (!this.meta.pendingRecoveredCount) { + this.meta.pendingRecoveredCount = 0; + } + this.meta.pendingRecoveredCount++; + } + + getPendingRecoveredCount() { + return this.meta.pendingRecoveredCount || 0; + } + + resetPendingRecoveredCount() { + this.meta.pendingRecoveredCount = 0; + } } diff --git a/x-pack/plugins/alerting/server/alerts_client/legacy_alerts_client.test.ts b/x-pack/plugins/alerting/server/alerts_client/legacy_alerts_client.test.ts index e7e62ec848ea3..dddfe857cfd44 100644 --- a/x-pack/plugins/alerting/server/alerts_client/legacy_alerts_client.test.ts +++ b/x-pack/plugins/alerting/server/alerts_client/legacy_alerts_client.test.ts @@ -12,7 +12,7 @@ import { createAlertFactory, getPublicAlertFactory } from '../alert/create_alert import { Alert } from '../alert/alert'; import { alertingEventLoggerMock } from '../lib/alerting_event_logger/alerting_event_logger.mock'; import { ruleRunMetricsStoreMock } from '../lib/rule_run_metrics_store.mock'; -import { processAlerts, setFlapping } from '../lib'; +import { getAlertsForNotification, processAlerts, setFlapping } from '../lib'; import { logAlerts } from '../task_runner/log_alerts'; const scheduleActions = jest.fn(); @@ -61,6 +61,12 @@ jest.mock('../lib', () => { }; }); +jest.mock('../lib/get_alerts_for_notification', () => { + return { + getAlertsForNotification: jest.fn(), + }; +}); + jest.mock('../task_runner/log_alerts', () => ({ logAlerts: jest.fn() })); let logger: ReturnType; @@ -195,6 +201,15 @@ describe('Legacy Alerts Client', () => { currentRecoveredAlerts: {}, recoveredAlerts: {}, }); + (getAlertsForNotification as jest.Mock).mockReturnValue({ + newAlerts: {}, + activeAlerts: { + '1': new Alert('1', testAlert1), + '2': new Alert('2', testAlert2), + }, + currentRecoveredAlerts: {}, + recoveredAlerts: {}, + }); const alertsClient = new LegacyAlertsClient({ logger, maxAlerts: 1000, @@ -240,6 +255,17 @@ describe('Legacy Alerts Client', () => { {} ); + expect(getAlertsForNotification).toHaveBeenCalledWith( + 'default', + {}, + { + '1': new Alert('1', testAlert1), + '2': new Alert('2', testAlert2), + }, + {}, + {} + ); + expect(logAlerts).toHaveBeenCalledWith({ logger, alertingEventLogger, diff --git a/x-pack/plugins/alerting/server/alerts_client/legacy_alerts_client.ts b/x-pack/plugins/alerting/server/alerts_client/legacy_alerts_client.ts index c4fa1b8565ca3..4c32c1cbcc928 100644 --- a/x-pack/plugins/alerting/server/alerts_client/legacy_alerts_client.ts +++ b/x-pack/plugins/alerting/server/alerts_client/legacy_alerts_client.ts @@ -5,16 +5,22 @@ * 2.0. */ import { Logger } from '@kbn/core/server'; -import { cloneDeep } from 'lodash'; +import { cloneDeep, merge } from 'lodash'; import { Alert } from '../alert/alert'; import { AlertFactory, createAlertFactory, getPublicAlertFactory, } from '../alert/create_alert_factory'; -import { determineAlertsToReturn, processAlerts, setFlapping } from '../lib'; +import { + determineAlertsToReturn, + processAlerts, + setFlapping, + getAlertsForNotification, +} from '../lib'; import { AlertingEventLogger } from '../lib/alerting_event_logger/alerting_event_logger'; import { RuleRunMetricsStore } from '../lib/rule_run_metrics_store'; +import { trimRecoveredAlerts } from '../lib/trim_recovered_alerts'; import { UntypedNormalizedRuleType } from '../rule_type_registry'; import { logAlerts } from '../task_runner/log_alerts'; import { @@ -134,17 +140,32 @@ export class LegacyAlertsClient< processedAlertsRecovered ); - this.processedAlerts.new = processedAlertsNew; - this.processedAlerts.active = processedAlertsActive; - this.processedAlerts.recovered = processedAlertsRecovered; - this.processedAlerts.recoveredCurrent = processedAlertsRecoveredCurrent; + const { trimmedAlertsRecovered, earlyRecoveredAlerts } = trimRecoveredAlerts( + this.options.logger, + processedAlertsRecovered, + this.options.maxAlerts + ); + + const alerts = getAlertsForNotification( + this.options.ruleType.defaultActionGroupId, + processedAlertsNew, + processedAlertsActive, + trimmedAlertsRecovered, + processedAlertsRecoveredCurrent + ); + alerts.currentRecoveredAlerts = merge(alerts.currentRecoveredAlerts, earlyRecoveredAlerts); + + this.processedAlerts.new = alerts.newAlerts; + this.processedAlerts.active = alerts.activeAlerts; + this.processedAlerts.recovered = alerts.recoveredAlerts; + this.processedAlerts.recoveredCurrent = alerts.currentRecoveredAlerts; logAlerts({ logger: this.options.logger, alertingEventLogger: eventLogger, - newAlerts: processedAlertsNew, - activeAlerts: processedAlertsActive, - recoveredAlerts: processedAlertsRecoveredCurrent, + newAlerts: alerts.newAlerts, + activeAlerts: alerts.activeAlerts, + recoveredAlerts: alerts.currentRecoveredAlerts, ruleLogPrefix: ruleLabel, ruleRunMetricsStore, canSetRecoveryContext: this.options.ruleType.doesSetRecoveryContext ?? false, diff --git a/x-pack/plugins/alerting/server/lib/flapping_utils.ts b/x-pack/plugins/alerting/server/lib/flapping_utils.ts index 8427e02880844..30e7059b2bc8f 100644 --- a/x-pack/plugins/alerting/server/lib/flapping_utils.ts +++ b/x-pack/plugins/alerting/server/lib/flapping_utils.ts @@ -6,7 +6,7 @@ */ const MAX_CAPACITY = 20; -const MAX_FLAP_COUNT = 4; +export const MAX_FLAP_COUNT = 4; export function updateFlappingHistory(flappingHistory: boolean[], state: boolean) { const updatedFlappingHistory = flappingHistory.concat(state).slice(MAX_CAPACITY * -1); diff --git a/x-pack/plugins/alerting/server/lib/get_alerts_for_notification.test.ts b/x-pack/plugins/alerting/server/lib/get_alerts_for_notification.test.ts new file mode 100644 index 0000000000000..ecf792518ce8b --- /dev/null +++ b/x-pack/plugins/alerting/server/lib/get_alerts_for_notification.test.ts @@ -0,0 +1,146 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { getAlertsForNotification } from '.'; +import { Alert } from '../alert'; + +describe('getAlertsForNotification', () => { + test('should set pendingRecoveredCount to zero for all active alerts', () => { + const alert1 = new Alert('1', { meta: { flapping: true, pendingRecoveredCount: 3 } }); + const alert2 = new Alert('2', { meta: { flapping: false } }); + + const { newAlerts, activeAlerts } = getAlertsForNotification( + 'default', + { + '1': alert1, + }, + { + '1': alert1, + '2': alert2, + }, + {}, + {} + ); + expect(newAlerts).toMatchInlineSnapshot(` + Object { + "1": Object { + "meta": Object { + "flapping": true, + "flappingHistory": Array [], + "pendingRecoveredCount": 0, + }, + "state": Object {}, + }, + } + `); + expect(activeAlerts).toMatchInlineSnapshot(` + Object { + "1": Object { + "meta": Object { + "flapping": true, + "flappingHistory": Array [], + "pendingRecoveredCount": 0, + }, + "state": Object {}, + }, + "2": Object { + "meta": Object { + "flapping": false, + "flappingHistory": Array [], + "pendingRecoveredCount": 0, + }, + "state": Object {}, + }, + } + `); + }); + + test('should return flapping pending recovered alerts as active alerts', () => { + const alert1 = new Alert('1', { meta: { flapping: true, pendingRecoveredCount: 3 } }); + const alert2 = new Alert('2', { meta: { flapping: false } }); + const alert3 = new Alert('3', { meta: { flapping: true } }); + + const { newAlerts, activeAlerts, recoveredAlerts, currentRecoveredAlerts } = + getAlertsForNotification( + 'default', + {}, + {}, + { + '1': alert1, + '2': alert2, + '3': alert3, + }, + { + '1': alert1, + '2': alert2, + '3': alert3, + } + ); + + expect(newAlerts).toMatchInlineSnapshot(`Object {}`); + expect(activeAlerts).toMatchInlineSnapshot(` + Object { + "3": Object { + "meta": Object { + "flapping": true, + "flappingHistory": Array [], + "pendingRecoveredCount": 1, + }, + "state": Object {}, + }, + } + `); + expect(Object.values(activeAlerts).map((a) => a.getScheduledActionOptions())) + .toMatchInlineSnapshot(` + Array [ + Object { + "actionGroup": "default", + "context": Object {}, + "state": Object {}, + }, + ] + `); + expect(recoveredAlerts).toMatchInlineSnapshot(` + Object { + "1": Object { + "meta": Object { + "flapping": true, + "flappingHistory": Array [], + "pendingRecoveredCount": 0, + }, + "state": Object {}, + }, + "2": Object { + "meta": Object { + "flapping": false, + "flappingHistory": Array [], + }, + "state": Object {}, + }, + } + `); + expect(currentRecoveredAlerts).toMatchInlineSnapshot(` + Object { + "1": Object { + "meta": Object { + "flapping": true, + "flappingHistory": Array [], + "pendingRecoveredCount": 0, + }, + "state": Object {}, + }, + "2": Object { + "meta": Object { + "flapping": false, + "flappingHistory": Array [], + }, + "state": Object {}, + }, + } + `); + }); +}); diff --git a/x-pack/plugins/alerting/server/lib/get_alerts_for_notification.ts b/x-pack/plugins/alerting/server/lib/get_alerts_for_notification.ts new file mode 100644 index 0000000000000..bbfb65ad77b7d --- /dev/null +++ b/x-pack/plugins/alerting/server/lib/get_alerts_for_notification.ts @@ -0,0 +1,69 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { keys } from 'lodash'; +import { Alert } from '../alert'; +import { AlertInstanceState, AlertInstanceContext } from '../types'; +import { MAX_FLAP_COUNT } from './flapping_utils'; + +export function getAlertsForNotification< + State extends AlertInstanceState, + Context extends AlertInstanceContext, + ActionGroupIds extends string, + RecoveryActionGroupId extends string +>( + actionGroupId: string, + newAlerts: Record> = {}, + activeAlerts: Record> = {}, + recoveredAlerts: Record> = {}, + currentRecoveredAlerts: Record> = {} +) { + for (const id of keys(activeAlerts)) { + const alert = activeAlerts[id]; + alert.resetPendingRecoveredCount(); + } + + for (const id of keys(currentRecoveredAlerts)) { + const alert = recoveredAlerts[id]; + const flapping = alert.getFlapping(); + if (flapping) { + alert.incrementPendingRecoveredCount(); + + if (alert.getPendingRecoveredCount() < MAX_FLAP_COUNT) { + // keep the context and previous actionGroupId if available + const context = alert.getContext(); + const lastActionGroupId = alert.getLastScheduledActions()?.group; + + const newAlert = new Alert(id, alert.toRaw()); + // unset the end time in the alert state + const state = newAlert.getState(); + delete state.end; + newAlert.replaceState(state); + + // schedule actions for the new active alert + newAlert.scheduleActions( + (lastActionGroupId ? lastActionGroupId : actionGroupId) as ActionGroupIds, + context + ); + activeAlerts[id] = newAlert; + + // remove from recovered alerts + delete recoveredAlerts[id]; + delete currentRecoveredAlerts[id]; + } else { + alert.resetPendingRecoveredCount(); + } + } + } + + return { + newAlerts, + activeAlerts, + recoveredAlerts, + currentRecoveredAlerts, + }; +} diff --git a/x-pack/plugins/alerting/server/lib/index.ts b/x-pack/plugins/alerting/server/lib/index.ts index 8b1416df007fa..8f8dccadaaa77 100644 --- a/x-pack/plugins/alerting/server/lib/index.ts +++ b/x-pack/plugins/alerting/server/lib/index.ts @@ -41,3 +41,5 @@ export * from './snooze'; export { setFlapping } from './set_flapping'; export { determineAlertsToReturn } from './determine_alerts_to_return'; export { updateFlappingHistory, isFlapping } from './flapping_utils'; +export { getAlertsForNotification } from './get_alerts_for_notification'; +export { trimRecoveredAlerts } from './trim_recovered_alerts'; diff --git a/x-pack/plugins/alerting/server/lib/trim_recovered_alerts.test.ts b/x-pack/plugins/alerting/server/lib/trim_recovered_alerts.test.ts new file mode 100644 index 0000000000000..3c42a5664477c --- /dev/null +++ b/x-pack/plugins/alerting/server/lib/trim_recovered_alerts.test.ts @@ -0,0 +1,78 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { loggingSystemMock } from '@kbn/core-logging-server-mocks'; +import { Alert } from '../alert'; +import { getEarlyRecoveredAlerts, trimRecoveredAlerts } from './trim_recovered_alerts'; + +describe('trimRecoveredAlerts', () => { + const logger = loggingSystemMock.createLogger(); + + const alert1 = new Alert('1', { meta: { flappingHistory: [true, true, true, true] } }); + const alert2 = new Alert('2', { meta: { flappingHistory: new Array(20).fill(false) } }); + const alert3 = new Alert('3', { meta: { flappingHistory: [true, true] } }); + const alert4 = { + key: '4', + flappingHistory: [true, true, true, true], + }; + const alert5 = { + key: '5', + flappingHistory: new Array(20).fill(false), + }; + const alert6 = { + key: '6', + flappingHistory: [true, true], + }; + + test('should remove longest recovered alerts', () => { + const recoveredAlerts = { + '1': alert1, + '2': alert2, + '3': alert3, + }; + + const trimmedAlerts = trimRecoveredAlerts(logger, recoveredAlerts, 2); + expect(trimmedAlerts).toEqual({ + trimmedAlertsRecovered: { 1: alert1, 3: alert3 }, + earlyRecoveredAlerts: { + 2: new Alert('2', { + meta: { flappingHistory: new Array(20).fill(false), flapping: false }, + }), + }, + }); + }); + + test('should not remove alerts if the num of recovered alerts is not at the limit', () => { + const recoveredAlerts = { + '1': alert1, + '2': alert2, + '3': alert3, + }; + + const trimmedAlerts = trimRecoveredAlerts(logger, recoveredAlerts, 3); + expect(trimmedAlerts).toEqual({ + trimmedAlertsRecovered: recoveredAlerts, + earlyRecoveredAlerts: {}, + }); + }); + + test('getEarlyRecoveredAlerts should return longest recovered alerts', () => { + const recoveredAlerts = [alert4, alert5, alert6]; + const trimmedAlerts = getEarlyRecoveredAlerts(logger, recoveredAlerts, 2); + expect(trimmedAlerts).toEqual([alert5]); + + expect(logger.warn).toBeCalledWith( + 'Recovered alerts have exceeded the max alert limit of 2 : dropping 1 alert.' + ); + }); + + test('getEarlyRecoveredAlerts should not return alerts if the num of recovered alerts is not at the limit', () => { + const recoveredAlerts = [alert4, alert5]; + const trimmedAlerts = getEarlyRecoveredAlerts(logger, recoveredAlerts, 2); + expect(trimmedAlerts).toEqual([]); + }); +}); diff --git a/x-pack/plugins/alerting/server/lib/trim_recovered_alerts.ts b/x-pack/plugins/alerting/server/lib/trim_recovered_alerts.ts new file mode 100644 index 0000000000000..779bf0023a3c3 --- /dev/null +++ b/x-pack/plugins/alerting/server/lib/trim_recovered_alerts.ts @@ -0,0 +1,76 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { Logger } from '@kbn/logging'; +import { map } from 'lodash'; +import { Alert } from '../alert'; +import { AlertInstanceState, AlertInstanceContext } from '../types'; + +interface TrimmedRecoveredAlertsResult< + State extends AlertInstanceState, + Context extends AlertInstanceContext, + RecoveryActionGroupIds extends string +> { + trimmedAlertsRecovered: Record>; + earlyRecoveredAlerts: Record>; +} + +export interface TrimRecoveredOpts { + key: string; + flappingHistory: boolean[]; +} + +export function trimRecoveredAlerts< + State extends AlertInstanceState, + Context extends AlertInstanceContext, + RecoveryActionGroupIds extends string +>( + logger: Logger, + recoveredAlerts: Record> = {}, + maxAlerts: number +): TrimmedRecoveredAlertsResult { + const alerts = map(recoveredAlerts, (value, key) => { + return { + key, + flappingHistory: value.getFlappingHistory() || [], + }; + }); + const earlyRecoveredAlertOpts = getEarlyRecoveredAlerts(logger, alerts, maxAlerts); + const earlyRecoveredAlerts: Record> = {}; + earlyRecoveredAlertOpts.forEach((opt) => { + const alert = recoveredAlerts[opt.key]; + alert.setFlapping(false); + earlyRecoveredAlerts[opt.key] = recoveredAlerts[opt.key]; + + delete recoveredAlerts[opt.key]; + }); + return { + trimmedAlertsRecovered: recoveredAlerts, + earlyRecoveredAlerts, + }; +} + +export function getEarlyRecoveredAlerts( + logger: Logger, + recoveredAlerts: TrimRecoveredOpts[], + maxAlerts: number +) { + let earlyRecoveredAlerts: TrimRecoveredOpts[] = []; + if (recoveredAlerts.length > maxAlerts) { + recoveredAlerts.sort((a, b) => { + return a.flappingHistory.length - b.flappingHistory.length; + }); + + earlyRecoveredAlerts = recoveredAlerts.slice(maxAlerts); + logger.warn( + `Recovered alerts have exceeded the max alert limit of ${maxAlerts} : dropping ${ + earlyRecoveredAlerts.length + } ${earlyRecoveredAlerts.length > 1 ? 'alerts' : 'alert'}.` + ); + } + return earlyRecoveredAlerts; +} diff --git a/x-pack/plugins/alerting/server/task_runner/fixtures.ts b/x-pack/plugins/alerting/server/task_runner/fixtures.ts index a92b4f6850e4a..7784247ca75f3 100644 --- a/x-pack/plugins/alerting/server/task_runner/fixtures.ts +++ b/x-pack/plugins/alerting/server/task_runner/fixtures.ts @@ -347,6 +347,7 @@ export const generateAlertInstance = ( }, flappingHistory, flapping: false, + pendingRecoveredCount: 0, }, state: { bar: false, diff --git a/x-pack/plugins/alerting/server/task_runner/task_runner.test.ts b/x-pack/plugins/alerting/server/task_runner/task_runner.test.ts index c0dcd9c74135b..e27bb167d45e8 100644 --- a/x-pack/plugins/alerting/server/task_runner/task_runner.test.ts +++ b/x-pack/plugins/alerting/server/task_runner/task_runner.test.ts @@ -2619,6 +2619,7 @@ describe('Task Runner', () => { }, flappingHistory: [true], flapping: false, + pendingRecoveredCount: 0, }, state: { duration: '0', @@ -2784,6 +2785,7 @@ describe('Task Runner', () => { }, flappingHistory: [true], flapping: false, + pendingRecoveredCount: 0, }, state: { duration: '0', @@ -2798,6 +2800,7 @@ describe('Task Runner', () => { }, flappingHistory: [true], flapping: false, + pendingRecoveredCount: 0, }, state: { duration: '0', diff --git a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts index 9a6daf86be040..409dede158515 100644 --- a/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts +++ b/x-pack/plugins/infra/server/lib/alerting/metric_threshold/metric_threshold_executor.test.ts @@ -81,6 +81,7 @@ const mockOptions = { started: '2020-01-01T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, TEST_ALERT_1: { alertId: 'TEST_ALERT_1', @@ -88,6 +89,7 @@ const mockOptions = { started: '2020-01-02T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, }, trackedAlertsRecovered: {}, diff --git a/x-pack/plugins/rule_registry/server/utils/create_lifecycle_executor.test.ts b/x-pack/plugins/rule_registry/server/utils/create_lifecycle_executor.test.ts index 8e847b4c376fc..2902533c145f1 100644 --- a/x-pack/plugins/rule_registry/server/utils/create_lifecycle_executor.test.ts +++ b/x-pack/plugins/rule_registry/server/utils/create_lifecycle_executor.test.ts @@ -197,6 +197,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-01T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, TEST_ALERT_1: { alertId: 'TEST_ALERT_1', @@ -204,6 +205,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-02T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, }, trackedAlertsRecovered: {}, @@ -318,6 +320,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-01T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, TEST_ALERT_1: { alertId: 'TEST_ALERT_1', @@ -325,6 +328,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-02T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, }, trackedAlertsRecovered: {}, @@ -550,6 +554,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-01T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, TEST_ALERT_1: { alertId: 'TEST_ALERT_1', @@ -557,6 +562,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-02T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, }, trackedAlertsRecovered: {}, @@ -660,6 +666,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-01T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, TEST_ALERT_1: { alertId: 'TEST_ALERT_1', @@ -667,6 +674,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-02T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, }, trackedAlerts: {}, @@ -765,6 +773,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-01T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, TEST_ALERT_1: { alertId: 'TEST_ALERT_1', @@ -772,6 +781,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-02T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, }, trackedAlertsRecovered: {}, @@ -871,6 +881,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-02T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, }, trackedAlertsRecovered: { @@ -880,6 +891,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-01T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, }, }, @@ -1016,6 +1028,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-01T12:00:00.000Z', flappingHistory: flapping, flapping: false, + pendingRecoveredCount: 0, }, TEST_ALERT_1: { alertId: 'TEST_ALERT_1', @@ -1023,6 +1036,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-02T12:00:00.000Z', flappingHistory: [false, false], flapping: false, + pendingRecoveredCount: 0, }, TEST_ALERT_2: { alertId: 'TEST_ALERT_2', @@ -1030,6 +1044,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-01T12:00:00.000Z', flappingHistory: flapping, flapping: true, + pendingRecoveredCount: 0, }, TEST_ALERT_3: { alertId: 'TEST_ALERT_3', @@ -1037,6 +1052,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-02T12:00:00.000Z', flappingHistory: [false, false], flapping: true, + pendingRecoveredCount: 0, }, }, trackedAlertsRecovered: {}, @@ -1179,6 +1195,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-01T12:00:00.000Z', flappingHistory: [true, true, true, true], flapping: false, + pendingRecoveredCount: 0, }, TEST_ALERT_1: { alertId: 'TEST_ALERT_1', @@ -1186,6 +1203,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-02T12:00:00.000Z', flappingHistory: notFlapping, flapping: false, + pendingRecoveredCount: 0, }, TEST_ALERT_2: { alertId: 'TEST_ALERT_2', @@ -1193,6 +1211,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-02T12:00:00.000Z', flappingHistory: [true, true], flapping: true, + pendingRecoveredCount: 0, }, TEST_ALERT_3: { alertId: 'TEST_ALERT_3', @@ -1200,6 +1219,7 @@ describe('createLifecycleExecutor', () => { started: '2020-01-02T12:00:00.000Z', flappingHistory: notFlapping, flapping: false, + pendingRecoveredCount: 0, }, }, trackedAlertsRecovered: {}, @@ -1215,8 +1235,8 @@ describe('createLifecycleExecutor', () => { { index: { _id: 'TEST_ALERT_0_UUID' } }, expect.objectContaining({ [ALERT_INSTANCE_ID]: 'TEST_ALERT_0', - [ALERT_STATUS]: ALERT_STATUS_RECOVERED, - [EVENT_ACTION]: 'close', + [ALERT_STATUS]: ALERT_STATUS_ACTIVE, + [EVENT_ACTION]: 'active', [EVENT_KIND]: 'signal', [ALERT_FLAPPING]: true, }), @@ -1231,8 +1251,8 @@ describe('createLifecycleExecutor', () => { { index: { _id: 'TEST_ALERT_2_UUID' } }, expect.objectContaining({ [ALERT_INSTANCE_ID]: 'TEST_ALERT_2', - [ALERT_STATUS]: ALERT_STATUS_RECOVERED, - [EVENT_ACTION]: 'close', + [ALERT_STATUS]: ALERT_STATUS_ACTIVE, + [EVENT_ACTION]: 'active', [EVENT_KIND]: 'signal', [ALERT_FLAPPING]: true, }), diff --git a/x-pack/plugins/rule_registry/server/utils/create_lifecycle_executor.ts b/x-pack/plugins/rule_registry/server/utils/create_lifecycle_executor.ts index 6dcafd8b2bb2c..7e16fcdd8cdf3 100644 --- a/x-pack/plugins/rule_registry/server/utils/create_lifecycle_executor.ts +++ b/x-pack/plugins/rule_registry/server/utils/create_lifecycle_executor.ts @@ -47,6 +47,7 @@ import { fetchExistingAlerts } from './fetch_existing_alerts'; import { getCommonAlertFields } from './get_common_alert_fields'; import { getUpdatedFlappingHistory } from './get_updated_flapping_history'; import { fetchAlertByAlertUUID } from './fetch_alert_by_uuid'; +import { getAlertsForNotification } from './get_alerts_for_notification'; type ImplicitTechnicalFieldName = CommonAlertFieldNameLatest | CommonAlertIdFieldNameLatest; @@ -104,6 +105,7 @@ const trackedAlertStateRt = rt.type({ flappingHistory: rt.array(rt.boolean), // flapping flag that indicates whether the alert is flapping flapping: rt.boolean, + pendingRecoveredCount: rt.number, }); export type TrackedLifecycleAlertState = rt.TypeOf; @@ -276,6 +278,7 @@ export const createLifecycleExecutor = alertUuid, started, flapping: isCurrentlyFlapping, + pendingRecoveredCount, } = !isNew ? state.trackedAlerts[alertId] : { @@ -284,6 +287,7 @@ export const createLifecycleExecutor = flapping: state.trackedAlertsRecovered[alertId] ? state.trackedAlertsRecovered[alertId].flapping : false, + pendingRecoveredCount: 0, }; const flapping = isFlapping(flappingHistory, isCurrentlyFlapping); @@ -317,13 +321,17 @@ export const createLifecycleExecutor = event, flappingHistory, flapping, + pendingRecoveredCount, }; }); const trackedEventsToIndex = makeEventsDataMapFor(trackedAlertIds); const newEventsToIndex = makeEventsDataMapFor(newAlertIds); const trackedRecoveredEventsToIndex = makeEventsDataMapFor(trackedAlertRecoveredIds); - const allEventsToIndex = [...trackedEventsToIndex, ...newEventsToIndex]; + const allEventsToIndex = [ + ...getAlertsForNotification(trackedEventsToIndex), + ...newEventsToIndex, + ]; // Only write alerts if: // - writing is enabled @@ -354,11 +362,14 @@ export const createLifecycleExecutor = const nextTrackedAlerts = Object.fromEntries( allEventsToIndex .filter(({ event }) => event[ALERT_STATUS] !== ALERT_STATUS_RECOVERED) - .map(({ event, flappingHistory, flapping }) => { + .map(({ event, flappingHistory, flapping, pendingRecoveredCount }) => { const alertId = event[ALERT_INSTANCE_ID]!; const alertUuid = event[ALERT_UUID]!; const started = new Date(event[ALERT_START]!).toISOString(); - return [alertId, { alertId, alertUuid, started, flappingHistory, flapping }]; + return [ + alertId, + { alertId, alertUuid, started, flappingHistory, flapping, pendingRecoveredCount }, + ]; }) ); @@ -370,13 +381,16 @@ export const createLifecycleExecutor = // this is a space saving effort that will stop tracking a recovered alert if it wasn't flapping and doesn't have state changes // in the last max capcity number of executions event[ALERT_STATUS] === ALERT_STATUS_RECOVERED && - (flapping || flappingHistory.filter((f) => f).length > 0) + (flapping || flappingHistory.filter((f: boolean) => f).length > 0) ) - .map(({ event, flappingHistory, flapping }) => { + .map(({ event, flappingHistory, flapping, pendingRecoveredCount }) => { const alertId = event[ALERT_INSTANCE_ID]!; const alertUuid = event[ALERT_UUID]!; const started = new Date(event[ALERT_START]!).toISOString(); - return [alertId, { alertId, alertUuid, started, flappingHistory, flapping }]; + return [ + alertId, + { alertId, alertUuid, started, flappingHistory, flapping, pendingRecoveredCount }, + ]; }) ); diff --git a/x-pack/plugins/rule_registry/server/utils/get_alerts_for_notification.test.ts b/x-pack/plugins/rule_registry/server/utils/get_alerts_for_notification.test.ts new file mode 100644 index 0000000000000..08a6c90eda022 --- /dev/null +++ b/x-pack/plugins/rule_registry/server/utils/get_alerts_for_notification.test.ts @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { ALERT_STATUS_ACTIVE, ALERT_STATUS_RECOVERED } from '@kbn/rule-data-utils'; +import { getAlertsForNotification } from './get_alerts_for_notification'; + +describe('getAlertsForNotification', () => { + const alert1 = { + event: { + 'kibana.alert.status': ALERT_STATUS_RECOVERED, + }, + flapping: true, + pendingRecoveredCount: 3, + }; + const alert2 = { + event: { + 'kibana.alert.status': ALERT_STATUS_RECOVERED, + }, + flapping: false, + }; + const alert3 = { + event: { + 'kibana.alert.status': ALERT_STATUS_RECOVERED, + }, + flapping: true, + }; + const alert4 = { + event: { + 'kibana.alert.status': ALERT_STATUS_ACTIVE, + }, + pendingRecoveredCount: 4, + flappingHistory: [true, true], + }; + + test('should set pendingRecoveredCount to zero for all active alerts', () => { + const trackedEvents = [alert4]; + expect(getAlertsForNotification(trackedEvents)).toMatchInlineSnapshot(` + Array [ + Object { + "event": Object { + "kibana.alert.status": "active", + }, + "flappingHistory": Array [ + true, + true, + ], + "pendingRecoveredCount": 0, + }, + ] + `); + }); + + test('should not remove alerts if the num of recovered alerts is not at the limit', () => { + const trackedEvents = [alert1, alert2, alert3]; + expect(getAlertsForNotification(trackedEvents)).toMatchInlineSnapshot(` + Array [ + Object { + "event": Object { + "kibana.alert.status": "recovered", + }, + "flapping": true, + "pendingRecoveredCount": 0, + }, + Object { + "event": Object { + "kibana.alert.status": "recovered", + }, + "flapping": false, + }, + Object { + "event": Object { + "event.action": "active", + "kibana.alert.status": "active", + }, + "flapping": true, + "pendingRecoveredCount": 1, + }, + ] + `); + }); +}); diff --git a/x-pack/plugins/rule_registry/server/utils/get_alerts_for_notification.ts b/x-pack/plugins/rule_registry/server/utils/get_alerts_for_notification.ts new file mode 100644 index 0000000000000..75d07642c5e53 --- /dev/null +++ b/x-pack/plugins/rule_registry/server/utils/get_alerts_for_notification.ts @@ -0,0 +1,36 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { MAX_FLAP_COUNT } from '@kbn/alerting-plugin/server/lib/flapping_utils'; +import { + ALERT_END, + ALERT_STATUS, + ALERT_STATUS_ACTIVE, + ALERT_STATUS_RECOVERED, + EVENT_ACTION, +} from '@kbn/rule-data-utils'; + +export function getAlertsForNotification(trackedEventsToIndex: any[]) { + return trackedEventsToIndex.map((trackedEvent) => { + if (trackedEvent.event[ALERT_STATUS] === ALERT_STATUS_ACTIVE) { + trackedEvent.pendingRecoveredCount = 0; + } else if (trackedEvent.event[ALERT_STATUS] === ALERT_STATUS_RECOVERED) { + if (trackedEvent.flapping) { + const count = trackedEvent.pendingRecoveredCount || 0; + trackedEvent.pendingRecoveredCount = count + 1; + if (trackedEvent.pendingRecoveredCount < MAX_FLAP_COUNT) { + trackedEvent.event[ALERT_STATUS] = ALERT_STATUS_ACTIVE; + trackedEvent.event[EVENT_ACTION] = 'active'; + delete trackedEvent.event[ALERT_END]; + } else { + trackedEvent.pendingRecoveredCount = 0; + } + } + } + return trackedEvent; + }); +} diff --git a/x-pack/plugins/rule_registry/server/utils/get_updated_flapping_history.test.ts b/x-pack/plugins/rule_registry/server/utils/get_updated_flapping_history.test.ts index af5eb3e3698fe..2194d37360f14 100644 --- a/x-pack/plugins/rule_registry/server/utils/get_updated_flapping_history.test.ts +++ b/x-pack/plugins/rule_registry/server/utils/get_updated_flapping_history.test.ts @@ -35,6 +35,7 @@ describe('getUpdatedFlappingHistory', () => { started: '2020-01-01T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, }, trackedAlertsRecovered: {}, @@ -57,6 +58,7 @@ describe('getUpdatedFlappingHistory', () => { started: '2020-01-01T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, }, trackedAlerts: {}, @@ -81,6 +83,7 @@ describe('getUpdatedFlappingHistory', () => { started: '2020-01-01T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, }, trackedAlertsRecovered: {}, @@ -106,6 +109,7 @@ describe('getUpdatedFlappingHistory', () => { started: '2020-01-01T12:00:00.000Z', flappingHistory: [], flapping: false, + pendingRecoveredCount: 0, }, }, }; diff --git a/x-pack/test/alerting_api_integration/spaces_only/tests/alerting/event_log.ts b/x-pack/test/alerting_api_integration/spaces_only/tests/alerting/event_log.ts index 6f1c816feeffd..73968703bf9bb 100644 --- a/x-pack/test/alerting_api_integration/spaces_only/tests/alerting/event_log.ts +++ b/x-pack/test/alerting_api_integration/spaces_only/tests/alerting/event_log.ts @@ -581,9 +581,9 @@ export default function eventLogTests({ getService }: FtrProviderContext) { // make sure the counts of the # of events per type are as expected ['execute-start', { gte: 25 }], ['execute', { gte: 25 }], - ['execute-action', { equal: 23 }], - ['new-instance', { equal: 3 }], - ['active-instance', { gte: 23 }], + ['execute-action', { equal: 25 }], + ['new-instance', { equal: 2 }], + ['active-instance', { gte: 25 }], ['recovered-instance', { equal: 2 }], ]), }); @@ -596,7 +596,9 @@ export default function eventLogTests({ getService }: FtrProviderContext) { event?.event?.action === 'recovered-instance' ) .map((event) => event?.kibana?.alert?.flapping); - const result = [false, false, false].concat(new Array(21).fill(true)).concat([false]); + const result = [false, false, false] + .concat(new Array(20).fill(true)) + .concat([false, false, false, false]); expect(flapping).to.eql(result); }); @@ -655,9 +657,9 @@ export default function eventLogTests({ getService }: FtrProviderContext) { // make sure the counts of the # of events per type are as expected ['execute-start', { gte: 20 }], ['execute', { gte: 20 }], - ['execute-action', { equal: 3 }], + ['execute-action', { equal: 9 }], ['new-instance', { equal: 3 }], - ['active-instance', { gte: 3 }], + ['active-instance', { gte: 9 }], ['recovered-instance', { equal: 3 }], ]), }); @@ -670,7 +672,20 @@ export default function eventLogTests({ getService }: FtrProviderContext) { event?.event?.action === 'recovered-instance' ) .map((event) => event?.kibana?.alert?.flapping); - expect(flapping).to.eql([false, false, false, true, false, false]); + expect(flapping).to.eql([ + false, + false, + false, + true, + true, + true, + true, + true, + true, + true, + true, + true, + ]); }); }); } diff --git a/x-pack/test/alerting_api_integration/spaces_only/tests/alerting/event_log_alerts.ts b/x-pack/test/alerting_api_integration/spaces_only/tests/alerting/event_log_alerts.ts index 86754ee31d14f..e5d69507e8c6b 100644 --- a/x-pack/test/alerting_api_integration/spaces_only/tests/alerting/event_log_alerts.ts +++ b/x-pack/test/alerting_api_integration/spaces_only/tests/alerting/event_log_alerts.ts @@ -132,7 +132,9 @@ export default function eventLogAlertTests({ getService }: FtrProviderContext) { break; } } - expect(flapping).to.eql(new Array(instanceEvents.length - 1).fill(false).concat(true)); + expect(flapping).to.eql( + new Array(instanceEvents.length - 4).fill(false).concat([true, true, true, true]) + ); }); }); }