Skip to content

Commit

Permalink
[ResponseOps][flapping] change action behavior when flapping (#147810)
Browse files Browse the repository at this point in the history
Resolves #143445

## Summary

This pr changes the behavior of alerting actions respecting flapping. 
To recover a flapping alert the alert must be reported as recovered for
4 (this will be configurable eventually) consecutive runs before the
framework makes the alert as recovered.

Link to a spreadsheet that helps vizualize the changes:
https://docs.google.com/spreadsheets/d/1amoUDFx-Sdxirnabd_E4OSnyur57sLhY6Ce79oDYgJA/edit#gid=0

Also Resolves #147205 to recover
those alerts


### Checklist
- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios


### To verify
- Create an alert and let it run until it gets into a flapping state
(changes from active/recovered at least 4 times)
- Once flapping, verify the following cases:
- Verify that a recovered alert will be returned as an active
notification 3 times before finally recovering
- Verify that an alert that does not recover 3 times before an active
alert is reported never reports as a recovered alert

Co-authored-by: kibanamachine <[email protected]>
Co-authored-by: Ying Mao <[email protected]>
  • Loading branch information
3 people authored Jan 23, 2023
1 parent a78fece commit 000ba78
Show file tree
Hide file tree
Showing 21 changed files with 688 additions and 30 deletions.
1 change: 1 addition & 0 deletions x-pack/plugins/alerting/common/alert_instance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ const metaSchema = t.partial({
flappingHistory: t.array(t.boolean),
// flapping flag that indicates whether the alert is flapping
flapping: t.boolean,
pendingRecoveredCount: t.number,
});
export type AlertInstanceMeta = t.TypeOf<typeof metaSchema>;

Expand Down
44 changes: 43 additions & 1 deletion x-pack/plugins/alerting/server/alert/alert.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -414,11 +414,12 @@ describe('toJSON', () => {
},
flappingHistory: [false, true],
flapping: false,
pendingRecoveredCount: 2,
},
}
);
expect(JSON.stringify(alertInstance)).toEqual(
'{"state":{"foo":true},"meta":{"lastScheduledActions":{"date":"1970-01-01T00:00:00.000Z","group":"default"},"flappingHistory":[false,true],"flapping":false}}'
'{"state":{"foo":true},"meta":{"lastScheduledActions":{"date":"1970-01-01T00:00:00.000Z","group":"default"},"flappingHistory":[false,true],"flapping":false,"pendingRecoveredCount":2}}'
);
});
});
Expand All @@ -433,6 +434,7 @@ describe('toRaw', () => {
group: 'default',
},
flappingHistory: [false, true, true],
pendingRecoveredCount: 2,
},
};
const alertInstance = new Alert<AlertInstanceState, AlertInstanceContext, DefaultActionGroupId>(
Expand Down Expand Up @@ -529,3 +531,43 @@ describe('getFlapping', () => {
expect(alert.getFlapping()).toEqual(true);
});
});

describe('incrementPendingRecoveredCount', () => {
test('correctly increments pendingRecoveredCount', () => {
const alert = new Alert<AlertInstanceState, AlertInstanceContext, DefaultActionGroupId>('1', {
meta: { pendingRecoveredCount: 3 },
});
alert.incrementPendingRecoveredCount();
expect(alert.getPendingRecoveredCount()).toEqual(4);
});

test('correctly increments pendingRecoveredCount when it is not already defined', () => {
const alert = new Alert<AlertInstanceState, AlertInstanceContext, DefaultActionGroupId>('1');
alert.incrementPendingRecoveredCount();
expect(alert.getPendingRecoveredCount()).toEqual(1);
});
});

describe('getPendingRecoveredCount', () => {
test('returns pendingRecoveredCount', () => {
const alert = new Alert<AlertInstanceState, AlertInstanceContext, DefaultActionGroupId>('1', {
meta: { pendingRecoveredCount: 3 },
});
expect(alert.getPendingRecoveredCount()).toEqual(3);
});

test('defines and returns pendingRecoveredCount when it is not already defined', () => {
const alert = new Alert<AlertInstanceState, AlertInstanceContext, DefaultActionGroupId>('1');
expect(alert.getPendingRecoveredCount()).toEqual(0);
});
});

describe('resetPendingRecoveredCount', () => {
test('resets pendingRecoveredCount to 0', () => {
const alert = new Alert<AlertInstanceState, AlertInstanceContext, DefaultActionGroupId>('1', {
meta: { pendingRecoveredCount: 3 },
});
alert.resetPendingRecoveredCount();
expect(alert.getPendingRecoveredCount()).toEqual(0);
});
});
15 changes: 15 additions & 0 deletions x-pack/plugins/alerting/server/alert/alert.ts
Original file line number Diff line number Diff line change
Expand Up @@ -225,4 +225,19 @@ export class Alert<
getFlapping() {
return this.meta.flapping || false;
}

incrementPendingRecoveredCount() {
if (!this.meta.pendingRecoveredCount) {
this.meta.pendingRecoveredCount = 0;
}
this.meta.pendingRecoveredCount++;
}

getPendingRecoveredCount() {
return this.meta.pendingRecoveredCount || 0;
}

resetPendingRecoveredCount() {
this.meta.pendingRecoveredCount = 0;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import { createAlertFactory, getPublicAlertFactory } from '../alert/create_alert
import { Alert } from '../alert/alert';
import { alertingEventLoggerMock } from '../lib/alerting_event_logger/alerting_event_logger.mock';
import { ruleRunMetricsStoreMock } from '../lib/rule_run_metrics_store.mock';
import { processAlerts, setFlapping } from '../lib';
import { getAlertsForNotification, processAlerts, setFlapping } from '../lib';
import { logAlerts } from '../task_runner/log_alerts';

const scheduleActions = jest.fn();
Expand Down Expand Up @@ -61,6 +61,12 @@ jest.mock('../lib', () => {
};
});

jest.mock('../lib/get_alerts_for_notification', () => {
return {
getAlertsForNotification: jest.fn(),
};
});

jest.mock('../task_runner/log_alerts', () => ({ logAlerts: jest.fn() }));

let logger: ReturnType<typeof loggingSystemMock['createLogger']>;
Expand Down Expand Up @@ -195,6 +201,15 @@ describe('Legacy Alerts Client', () => {
currentRecoveredAlerts: {},
recoveredAlerts: {},
});
(getAlertsForNotification as jest.Mock).mockReturnValue({
newAlerts: {},
activeAlerts: {
'1': new Alert<AlertInstanceContext, AlertInstanceContext>('1', testAlert1),
'2': new Alert<AlertInstanceContext, AlertInstanceContext>('2', testAlert2),
},
currentRecoveredAlerts: {},
recoveredAlerts: {},
});
const alertsClient = new LegacyAlertsClient({
logger,
maxAlerts: 1000,
Expand Down Expand Up @@ -240,6 +255,17 @@ describe('Legacy Alerts Client', () => {
{}
);

expect(getAlertsForNotification).toHaveBeenCalledWith(
'default',
{},
{
'1': new Alert<AlertInstanceContext, AlertInstanceContext>('1', testAlert1),
'2': new Alert<AlertInstanceContext, AlertInstanceContext>('2', testAlert2),
},
{},
{}
);

expect(logAlerts).toHaveBeenCalledWith({
logger,
alertingEventLogger,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,22 @@
* 2.0.
*/
import { Logger } from '@kbn/core/server';
import { cloneDeep } from 'lodash';
import { cloneDeep, merge } from 'lodash';
import { Alert } from '../alert/alert';
import {
AlertFactory,
createAlertFactory,
getPublicAlertFactory,
} from '../alert/create_alert_factory';
import { determineAlertsToReturn, processAlerts, setFlapping } from '../lib';
import {
determineAlertsToReturn,
processAlerts,
setFlapping,
getAlertsForNotification,
} from '../lib';
import { AlertingEventLogger } from '../lib/alerting_event_logger/alerting_event_logger';
import { RuleRunMetricsStore } from '../lib/rule_run_metrics_store';
import { trimRecoveredAlerts } from '../lib/trim_recovered_alerts';
import { UntypedNormalizedRuleType } from '../rule_type_registry';
import { logAlerts } from '../task_runner/log_alerts';
import {
Expand Down Expand Up @@ -134,17 +140,32 @@ export class LegacyAlertsClient<
processedAlertsRecovered
);

this.processedAlerts.new = processedAlertsNew;
this.processedAlerts.active = processedAlertsActive;
this.processedAlerts.recovered = processedAlertsRecovered;
this.processedAlerts.recoveredCurrent = processedAlertsRecoveredCurrent;
const { trimmedAlertsRecovered, earlyRecoveredAlerts } = trimRecoveredAlerts(
this.options.logger,
processedAlertsRecovered,
this.options.maxAlerts
);

const alerts = getAlertsForNotification<State, Context, ActionGroupIds, RecoveryActionGroupId>(
this.options.ruleType.defaultActionGroupId,
processedAlertsNew,
processedAlertsActive,
trimmedAlertsRecovered,
processedAlertsRecoveredCurrent
);
alerts.currentRecoveredAlerts = merge(alerts.currentRecoveredAlerts, earlyRecoveredAlerts);

this.processedAlerts.new = alerts.newAlerts;
this.processedAlerts.active = alerts.activeAlerts;
this.processedAlerts.recovered = alerts.recoveredAlerts;
this.processedAlerts.recoveredCurrent = alerts.currentRecoveredAlerts;

logAlerts({
logger: this.options.logger,
alertingEventLogger: eventLogger,
newAlerts: processedAlertsNew,
activeAlerts: processedAlertsActive,
recoveredAlerts: processedAlertsRecoveredCurrent,
newAlerts: alerts.newAlerts,
activeAlerts: alerts.activeAlerts,
recoveredAlerts: alerts.currentRecoveredAlerts,
ruleLogPrefix: ruleLabel,
ruleRunMetricsStore,
canSetRecoveryContext: this.options.ruleType.doesSetRecoveryContext ?? false,
Expand Down
2 changes: 1 addition & 1 deletion x-pack/plugins/alerting/server/lib/flapping_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
*/

const MAX_CAPACITY = 20;
const MAX_FLAP_COUNT = 4;
export const MAX_FLAP_COUNT = 4;

export function updateFlappingHistory(flappingHistory: boolean[], state: boolean) {
const updatedFlappingHistory = flappingHistory.concat(state).slice(MAX_CAPACITY * -1);
Expand Down
146 changes: 146 additions & 0 deletions x-pack/plugins/alerting/server/lib/get_alerts_for_notification.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { getAlertsForNotification } from '.';
import { Alert } from '../alert';

describe('getAlertsForNotification', () => {
test('should set pendingRecoveredCount to zero for all active alerts', () => {
const alert1 = new Alert('1', { meta: { flapping: true, pendingRecoveredCount: 3 } });
const alert2 = new Alert('2', { meta: { flapping: false } });

const { newAlerts, activeAlerts } = getAlertsForNotification(
'default',
{
'1': alert1,
},
{
'1': alert1,
'2': alert2,
},
{},
{}
);
expect(newAlerts).toMatchInlineSnapshot(`
Object {
"1": Object {
"meta": Object {
"flapping": true,
"flappingHistory": Array [],
"pendingRecoveredCount": 0,
},
"state": Object {},
},
}
`);
expect(activeAlerts).toMatchInlineSnapshot(`
Object {
"1": Object {
"meta": Object {
"flapping": true,
"flappingHistory": Array [],
"pendingRecoveredCount": 0,
},
"state": Object {},
},
"2": Object {
"meta": Object {
"flapping": false,
"flappingHistory": Array [],
"pendingRecoveredCount": 0,
},
"state": Object {},
},
}
`);
});

test('should return flapping pending recovered alerts as active alerts', () => {
const alert1 = new Alert('1', { meta: { flapping: true, pendingRecoveredCount: 3 } });
const alert2 = new Alert('2', { meta: { flapping: false } });
const alert3 = new Alert('3', { meta: { flapping: true } });

const { newAlerts, activeAlerts, recoveredAlerts, currentRecoveredAlerts } =
getAlertsForNotification(
'default',
{},
{},
{
'1': alert1,
'2': alert2,
'3': alert3,
},
{
'1': alert1,
'2': alert2,
'3': alert3,
}
);

expect(newAlerts).toMatchInlineSnapshot(`Object {}`);
expect(activeAlerts).toMatchInlineSnapshot(`
Object {
"3": Object {
"meta": Object {
"flapping": true,
"flappingHistory": Array [],
"pendingRecoveredCount": 1,
},
"state": Object {},
},
}
`);
expect(Object.values(activeAlerts).map((a) => a.getScheduledActionOptions()))
.toMatchInlineSnapshot(`
Array [
Object {
"actionGroup": "default",
"context": Object {},
"state": Object {},
},
]
`);
expect(recoveredAlerts).toMatchInlineSnapshot(`
Object {
"1": Object {
"meta": Object {
"flapping": true,
"flappingHistory": Array [],
"pendingRecoveredCount": 0,
},
"state": Object {},
},
"2": Object {
"meta": Object {
"flapping": false,
"flappingHistory": Array [],
},
"state": Object {},
},
}
`);
expect(currentRecoveredAlerts).toMatchInlineSnapshot(`
Object {
"1": Object {
"meta": Object {
"flapping": true,
"flappingHistory": Array [],
"pendingRecoveredCount": 0,
},
"state": Object {},
},
"2": Object {
"meta": Object {
"flapping": false,
"flappingHistory": Array [],
},
"state": Object {},
},
}
`);
});
});
Loading

0 comments on commit 000ba78

Please sign in to comment.