Skip to content

Commit

Permalink
Merge pull request #77 from Financial-Times/keen-health
Browse files Browse the repository at this point in the history
Keen health checks
  • Loading branch information
adgad authored Jan 30, 2018
2 parents 13d073d + c836bad commit 06c0a17
Show file tree
Hide file tree
Showing 9 changed files with 271 additions and 3 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
/node_modules/

/npm-debug.log
package-lock.json
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ node_modules/@financial-times/n-gage/index.mk:
.PHONY: test

test-unit:
FT_GRAPHITE_KEY=123 HEROKU_AUTH_TOKEN=token mocha
KEEN_READ_KEY=123 KEEN_PROJECT_ID=abc FT_GRAPHITE_KEY=123 HEROKU_AUTH_TOKEN=token mocha

test-int:
mocha int-tests/ -r loadvars.js
Expand Down
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ To Add more health checks create a new file in the `config` directory. It shoul
* name, severity, businessImpact, technicalSummary and panicGuide are all required. See the [specification](https://docs.google.com/document/edit?id=1ftlkDj1SUXvKvKJGvoMoF1GnSUInCNPnNGomqTpJaFk) for details
* interval: time between checks in milliseconds or any string compatible with [ms](https://www.npmjs.com/package/ms) [default: 1minute]
* type: The type of check (see below)
* officeHoursOnly: [default: false] For queries that will probably fail out of hours (e.g. Internet Explorer usage, B2B stuff), set this to true and the check will pass on weekends and outside office hours. Use sparingly.

## Healthcheck types and options

Expand Down Expand Up @@ -91,3 +92,17 @@ _Note: this assumes that `AWS_ACCESS_KEY` & `AWS_SECRET_ACCESS_KEY` are implictl

* cloudWatchRegion = [default 'eu-west-1'] AWS region the metrics are stored
* cloudWatchAlarmName = [required] Name of the CloudWatch alarm to check

### keenThreshold
Checks whether the result of a keen query for a metric has crossed a threshold

_Note: this assumes that `KEEN_READ_KEY` & `KEEN_PROJECT_ID` are implicitly available as environment variables on process.env_

* query: [required] Query to run to get a count, in the format of [keen-query](https://github.com/Financial-Times/keen-query).
* threshold: [required] Value to check the metric against
* timeframe: [default: 'this_60_minutes'] timeframe to run keen query against.
* direction: [default: 'below'] Direction on which to trigger the healthcheck;
- 'above' = alert if value goes above the threshold
- 'below' = alert if value goes below the threshold

_Warning_: Keen sometimes has a lag before ingesting, particularly during high traffic periods. It's recommended to have a minimum timeframe of 60 minutes, if not more.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"@financial-times/n-raven": "^2.1.0",
"aws-sdk": "^2.6.10",
"fetchres": "^1.5.1",
"keen-query": "^3.2.7",
"moment": "^2.15.1",
"ms": "^2.0.0",
"node-fetch": "^1.5.1"
Expand Down
14 changes: 13 additions & 1 deletion src/checks/check.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ const ms = require('ms');
const logger = require('@financial-times/n-logger').default;
const raven = require('@financial-times/n-raven');

const isOfficeHoursNow = () => {
const date = new Date();
const hour = date.getHours();
const day = date.getDay();
return (day !== 0 && day !== 6) && (hour > 8 && hour < 18); //exclude saturday, sunday and out of office hours
};

class Check {

constructor (opts) {
Expand All @@ -25,6 +32,7 @@ an init method returning a Promise`)
this.severity = opts.severity;
this.businessImpact = opts.businessImpact;
this.technicalSummary = opts.technicalSummary;
this.officeHoursOnly = opts.officeHoursOnly;
this.interval = typeof opts.interval === 'string' ? ms(opts.interval) : (opts.interval || 60000);
this.panicGuide = opts.panicGuide;
this.status = status.PENDING;
Expand Down Expand Up @@ -72,7 +80,11 @@ an init method returning a Promise`)
// in child healthcheck classes
checkOutput: this.status === status.ERRORED ? 'Healthcheck failed to execute' : this.checkOutput
};
if (this.lastUpdated) {

if (this.officeHoursOnly && !isOfficeHoursNow()) {
output.ok = true;
output.checkOutput = 'This check is not set to run outside of office hours';
} else if (this.lastUpdated) {
output.lastUpdated = this.lastUpdated.toISOString();
let shouldHaveRun = Date.now() - (this.interval + 1000);
if(this.lastUpdated.getTime() < shouldHaveRun){
Expand Down
3 changes: 2 additions & 1 deletion src/checks/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ module.exports = {
graphiteThreshold: require('./graphiteThreshold.check'),
graphiteWorking: require('./graphiteWorking.check'),
cloudWatchAlarm: require('./cloudWatchAlarm.check'),
cloudWatchThreshold: require('./cloudWatchThreshold.check')
cloudWatchThreshold: require('./cloudWatchThreshold.check'),
keenThreshold: require('./keenThreshold.check')
};
75 changes: 75 additions & 0 deletions src/checks/keenThreshold.check.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
'use strict';

const logger = require('@financial-times/n-logger').default;
const status = require('./status');
const Check = require('./check');
const KeenQuery = require('keen-query');
const ms = require('ms');

const logEventPrefix = 'KEEN_THRESHOLD_CHECK';

// Detects when the value of a metric climbs above/below a threshold value

class KeenThresholdCheck extends Check {

constructor(options){
super(options);
this.threshold = options.threshold;
this.direction = options.direction || 'below';

this.timeframe = options.timeframe || 'this_60_minutes';

this.keenProjectId = process.env.KEEN_PROJECT_ID;
this.keenReadKey = process.env.KEEN_READ_KEY;
if (!(this.keenProjectId && this.keenReadKey)) {
throw new Error('You must set KEEN_PROJECT_ID and KEEN_READ_KEY environment variables');
}

KeenQuery.setConfig({
KEEN_PROJECT_ID: this.keenProjectId,
KEEN_READ_KEY: this.keenReadKey,
KEEN_HOST: 'https://keen-proxy.ft.com/3.0'
});

if (!options.query) {
throw new Error(`You must pass in a query for the "${options.name}" check - e.g., "page:view->filter(context.app=article)->count()"`);
}


this.query = options.query;
//Default to 10 minute interval for keen checks so we don't overwhelm it
this.interval = options.interval || 10 * 60 * 1000;

this.checkOutput = 'Keen threshold check has not yet run';
}

tick() {
return KeenQuery.build(this.query)
.filter('user.subscriptions.isStaff!=true')
.filter('user.geo.isFinancialTimesOffice!=true')
.filter('device.isRobot!=true')
.relTime(this.timeframe)
.print()
.then(result => {
if(result && result.rows) {
let data = Number(result.rows[0][1]);
let failed = this.direction === 'above' ?
data && data > this.threshold :
data && data < this.threshold;
this.status = failed ? status.FAILED : status.PASSED;
this.checkOutput = `Got ${data} ${this.timeframe.split('_').join(' ').replace('this', 'in the last')}, expected not to be ${this.direction} the threshold of ${this.threshold}
${this.query}
`;
}
})
.catch(err => {
logger.error({ event: `${logEventPrefix}_ERROR`, url: this.query }, err);
this.status = status.FAILED;
this.checkOutput = 'Keen threshold check failed to fetch data: ' + err.message;
});
}

}

module.exports = KeenThresholdCheck;
17 changes: 17 additions & 0 deletions test/fixtures/config/keenThresholdFixture.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
'use strict';
module.exports = {
name: 'keen',
descriptions : '',
checks : [
{
type: 'keenThreshold',
query: 'page:view->count()',
name: 'Some keen value is above some threshold',
severity: 2,
threshold: 4,
businessImpact: 'catastrophic',
technicalSummary: 'god knows',
panicGuide: 'Don\'t Panic'
}
]
};
146 changes: 146 additions & 0 deletions test/keenThreshold.check.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
'use strict';

const expect = require('chai').expect;
const fixture = require('./fixtures/config/keenThresholdFixture').checks[0];
const proxyquire = require('proxyquire').noCallThru().noPreserveCache();
const sinon = require('sinon');

function getCheckConfig (conf) {
return Object.assign({}, fixture, conf || {});
}

let mockKeenQuery;
let Check;


// Mocks a pair of calls to keen for sample and baseline data
function mockKeen (results) {

mockKeenQuery = {
setConfig: sinon.stub(),
build: sinon.stub().returnsThis(),
filter: sinon.stub().returnsThis(),
relTime: sinon.stub().returnsThis(),
print: sinon.stub().returns(Promise.resolve(results))
};

Check = proxyquire('../src/checks/keenThreshold.check', {'keen-query': mockKeenQuery});
}

describe('Keen Threshold Check', function(){

let check;

afterEach(function(){
check.stop();
});

context('Upper threshold enforced', function () {

it('Should be healthy if result above upper threshold', function (done) {
mockKeen({
rows: [
['something', 100]
]
});
check = new Check(getCheckConfig({
threshold: 11
}));
check.start();
setTimeout(() => {

expect(mockKeenQuery.build.firstCall.args[0]).to.contain('page:view->count()');
expect(mockKeenQuery.relTime.firstCall.args[0]).to.contain('this_60_minutes');
expect(check.getStatus().ok).to.be.true;
done();
});
});


it('should be unhealthy if result is below upper threshold', done => {
mockKeen({
rows: [
['something', 10]
]
});
check = new Check(getCheckConfig({
threshold: 11
}));
check.start();
setTimeout(() => {
expect(check.getStatus().ok).to.be.false;
done();
});
});

});

context('Lower threshold enforced', function () {

it('Should be healthy if all datapoints are above lower threshold', function (done) {
mockKeen({
rows: [
['something', 10]
]
});
check = new Check(getCheckConfig({
threshold: 5,
direction: 'below'
}));
check.start();
setTimeout(() => {
expect(check.getStatus().ok).to.be.true;
done();
});
});

it('Should be healthy if any datapoints are equal to lower threshold', function (done) {
mockKeen({
rows: [
['something', 10]
]
});
check = new Check(getCheckConfig({
threshold: 10,
direction: 'below'
}));
check.start();
setTimeout(() => {
expect(check.getStatus().ok).to.be.true;
done();
});
});

it('should be unhealthy if any datapoints are below lower threshold', done => {
mockKeen({
rows: [
['something', 5]
]
});
check = new Check(getCheckConfig({
threshold: 10,
direction: 'below'
}));
check.start();
setTimeout(() => {
expect(check.getStatus().ok).to.be.false;
done();
});
});

});

it('Should be possible to configure sample period', function(done){
mockKeen();
check = new Check(getCheckConfig({
timeframe: 'this_2_days'
}));
check.start();
setTimeout(() => {
expect(mockKeenQuery.build.firstCall.args[0]).to.contain('page:view->count()');
expect(mockKeenQuery.relTime.firstCall.args[0]).to.contain('this_2_days');
done();
});
});

});

0 comments on commit 06c0a17

Please sign in to comment.