Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide generalized aggregation #77

Merged
merged 1 commit into from
Jan 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ For details on how each kind of chart is rendered, take a look at [`charts.js`](
| `series ` | array of strings | only include these data series and drop all others (referenced by TSV table headings) |
| `visibleSeries ` | array of strings | only show the listed data series and hide all others initially (referenced by TSV table headings) |
| `sliceData ` | array `[t0, t1]` | slice the data from the TSV file as if `data.slice(t0, t1)` was called |
| `aggregate ` | weekly | if set to `weekly`, aggregate the data by week by computing the sum of the values within each week |
| `aggregate ` | dictionary (see below) | defines how data should be aggregated (default: `undefined`, which leaves the data untouched) |
| `aggregate.period` | `week`, `month` | specifies the range over which the data shall be aggregated |
| `aggregate.method` | `sum`, `mean`, `min`, `max`, `first`, `last`, `median` | specifies the aggregation method; `first` and `last` select the chronologically first or last data point present in each period, respectively |
| `showRawDataLink` | `true`, `false` | show the link to download the chart’s raw data (default: `true`) |

##### List Charts
Expand Down
146 changes: 108 additions & 38 deletions docs/assets/js/charts.js
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,111 @@ function createSpinner(canvas)
};
}

function aggregateTimeData(data, aggregationConfig)
{
if (!(data instanceof Array))
throw 'expected data array as input';

if (data.length < 1)
return;

// Turn date strings into proper date objects
for (let i = 0; i < data.length; i++)
data[i]['date'] = d3.isoParse(data[i]['date']);

// Sort data, just in case it isn’t already
data.sort((row1, row2) => row1['date'] - row2['date']);

const dateStart = data[0]['date'];
// Ranges are exclusive, so add one more day to include the last date
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/last date/last day/?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Refering to the “last date” was intentional, because this actually is about dates. However, “day” isn’t wrong either … not sure whether that’s worth rephrasing though.

const dateEnd = d3.utcDay.offset(data[data.length - 1]['date'], 1);

let period;

switch (aggregationConfig['period'])
{
case 'week':
period = d3.utcMonday;
break;
case 'month':
period = d3.utcMonth;
break;
default:
throw 'unknown aggregation period "' + aggregationConfig['period'] + '"';
}

// Don't use incomplete periods at the beginning and the end of the data
const t0 = period.ceil(dateStart);
// In d3, ranges include the start value but exclude the end value.
// We want to include the last period as well, so add one more period
const t1 = period.offset(period.floor(dateEnd), 1);
const periods = period.range(t0, t1);

let aggregatedData = Array();

for (let i = 0; i < periods.length - 1; i++)
{
const t0 = periods[i];
const t1 = periods[i + 1];

// Note that this assumes complete data in the period.
// Should data points be missing, aggregation methods such as the sum will lead to results that can't be
// compared to periods with complete data.
// Hence, the maintainers of the data need to ensure that the input is well-formed
const dates = data.filter(row => row['date'] >= t0 && row['date'] < t1);

let row = Object();
row['date'] = t0;

$.each(Object.keys(data[0]),
function(keyID, key)
{
// Exclude the date itself from aggregation
if (key == 'date')
return;

if (dates.length == 0)
{
row[key] = undefined;
return;
}

const accessor = (row => row[key]);

switch (aggregationConfig['method'])
{
case 'sum':
row[key] = d3.sum(dates, accessor);
break;
case 'mean':
row[key] = d3.mean(dates, accessor);
break;
case 'median':
row[key] = d3.median(dates, accessor);
break;
case 'first':
row[key] = dates[0][key];
break;
case 'last':
row[key] = dates[dates.length - 1][key];
break;
case 'min':
row[key] = d3.min(dates, accessor);
break;
case 'max':
row[key] = d3.max(dates, accessor);
break;
default:
throw 'unknown aggregation method "' + aggregationConfig['method'] + '"';
}
});

aggregatedData.push(row);
}

return aggregatedData;
}

function createHistoryChart(canvas)
{
const url = $(canvas).data('url');
Expand Down Expand Up @@ -169,47 +274,12 @@ function createHistoryChart(canvas)

const context = canvas.getContext('2d');

if (readConfig($(canvas), 'aggregate') == 'weekly')
{
let aggregatedData = Array();
data.sort(
function(row1, row2)
{
let date1 = new Date(row1['date']);
let date2 = new Date(row2['date']);
return date1 - date2;
});

let currentRow = Object();

for (let i = 0; i < data.length; i++)
{
if (i % 7 == 0)
$.each(Object.keys(data[i]).slice(1),
function(keyID, key)
{
currentRow[key] = 0;
});

currentRow['date'] = data[i]['date'];

$.each(Object.keys(data[i]).slice(1),
function(keyID, key)
{
currentRow[key] += data[i][key];
});

if (i % 7 == 6)
// Store a copy of the aggregated data
aggregatedData.push($.extend({}, currentRow));
}

data = aggregatedData;
}

if (hasConfig($(canvas), 'sliceData'))
data = data.slice(readConfig($(canvas), 'sliceData')[0], readConfig($(canvas), 'sliceData')[1]);

if (hasConfig($(canvas), 'aggregate'))
data = aggregateTimeData(data, $(canvas).data('config').aggregate);

const originalDataSeries = Object.keys(data[0]).slice(1);

const dataSeries = hasConfig($(canvas), 'series')
Expand Down
5 changes: 4 additions & 1 deletion docs/pr-total.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ <h3>Pull Requests (Total, by Week)</h3>
"visibleSeries": [
"merged"
],
"aggregate": "weekly"
"aggregate": {
"period": "week",
"method": "sum"
}
}'></canvas>
<div class="info-box">
<p>
Expand Down
1 change: 1 addition & 0 deletions docs/pr-usage.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ <h3>Pull Request Usage</h3>
<canvas
data-url="{{ site.dataURL }}/pull-request-usage.tsv"
data-type="history"
data-config='{"aggregate": {"period": "month", "method": "first"}}'
></canvas>
<div class="info-box">
<p>
Expand Down
3 changes: 3 additions & 0 deletions docs/spec/.eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,8 @@
"node": true,
"jasmine": true,
"jquery": true
},
"globals": {
"d3": false
}
}
138 changes: 137 additions & 1 deletion docs/spec/charts.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
/* global createChordChart, createHistoryChart, createList, createTable, createSpinner */
/* global
aggregateTimeData,
createChordChart,
createHistoryChart,
createList,
createTable,
createSpinner,
*/

describe('global charts.js', function()
{
Expand Down Expand Up @@ -62,4 +69,133 @@ describe('global charts.js', function()
});
});
});
describe('aggregation for time series', function()
{
// Generate data from startDate to endDate (both inclusive) with a generator functor
function generateData(startDate, endDate, generator)
{
let dates = d3.utcDay.range(d3.isoParse(startDate), d3.utcDay.offset(d3.isoParse(endDate), 1));
let data = Array();

for (let i = 0; i < dates.length; i++)
data.push({'date': dates[i], 'value': generator(i)});

return data;
}

// Integer range generator
function integerRangeGenerator(start, modulo)
{
if (modulo)
return (i => (start + i) % modulo);

return (i => start + i);
}

const dateToString = d3.utcFormat('%Y-%m-%d');

it('should aggregate over weeks correctly', function()
{
const aggregationConfig = {'period': 'week', 'method': 'max'};
const generator = integerRangeGenerator(0, 28);
// 2018-01-01 is a Monday, and 2018-09-30 is a Sunday
const data = generateData('2018-01-01', '2018-09-30', generator);
const aggregatedData = aggregateTimeData(data, aggregationConfig);

expect(aggregatedData.length = 39);
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-01');
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-08');
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-01-15');
expect(dateToString(aggregatedData[37]['date'])).toEqual('2018-09-17');
expect(dateToString(aggregatedData[38]['date'])).toEqual('2018-09-24');
expect(aggregatedData[0]['value']).toEqual(6);
expect(aggregatedData[1]['value']).toEqual(13);
expect(aggregatedData[2]['value']).toEqual(20);
expect(aggregatedData[4]['value']).toEqual(6);
expect(aggregatedData[5]['value']).toEqual(13);
expect(aggregatedData[36]['value']).toEqual(6);
expect(aggregatedData[37]['value']).toEqual(13);
expect(aggregatedData[38]['value']).toEqual(20);
});

it('should not have off-by-one errors (1)', function()
{
const aggregationConfig = {'period': 'week', 'method': 'max'};
const generator = integerRangeGenerator(27, 28);
// 2017-12-31 is a Sunday, and 2018-10-01 is a Monday
const data = generateData('2017-12-31', '2018-10-01', generator);
const aggregatedData = aggregateTimeData(data, aggregationConfig);

expect(aggregatedData.length = 39);
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-01');
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-08');
expect(dateToString(aggregatedData[2]['date'])).toEqual('2018-01-15');
expect(dateToString(aggregatedData[37]['date'])).toEqual('2018-09-17');
expect(dateToString(aggregatedData[38]['date'])).toEqual('2018-09-24');
expect(aggregatedData[0]['value']).toEqual(6);
expect(aggregatedData[1]['value']).toEqual(13);
expect(aggregatedData[2]['value']).toEqual(20);
expect(aggregatedData[4]['value']).toEqual(6);
expect(aggregatedData[5]['value']).toEqual(13);
expect(aggregatedData[36]['value']).toEqual(6);
expect(aggregatedData[37]['value']).toEqual(13);
expect(aggregatedData[38]['value']).toEqual(20);
});

it('should not have off-by-one errors (2)', function()
{
const aggregationConfig = {'period': 'week', 'method': 'max'};
const generator = integerRangeGenerator(1, 28);
// 2018-01-02 is a Tuesday, and 2018-09-29 is a Saturday
const data = generateData('2018-01-02', '2018-09-29', generator);
const aggregatedData = aggregateTimeData(data, aggregationConfig);

expect(aggregatedData.length = 37);
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-08');
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-01-15');
expect(dateToString(aggregatedData[35]['date'])).toEqual('2018-09-10');
expect(dateToString(aggregatedData[36]['date'])).toEqual('2018-09-17');
expect(aggregatedData[0]['value']).toEqual(13);
expect(aggregatedData[1]['value']).toEqual(20);
expect(aggregatedData[3]['value']).toEqual(6);
expect(aggregatedData[4]['value']).toEqual(13);
expect(aggregatedData[35]['value']).toEqual(6);
expect(aggregatedData[36]['value']).toEqual(13);
});

it('should aggregate sums correctly', function()
{
const aggregationConfig = {'period': 'week', 'method': 'sum'};
const generator = integerRangeGenerator(0, 10);
// 2018-01-01 is a Monday, and 2018-09-30 is a Sunday
const data = generateData('2018-01-01', '2018-09-30', generator);
const aggregatedData = aggregateTimeData(data, aggregationConfig);

expect(aggregatedData.length = 39);
expect(aggregatedData[0]['value']).toEqual(21);
expect(aggregatedData[1]['value']).toEqual(30);
expect(aggregatedData[2]['value']).toEqual(39);
expect(aggregatedData[36]['value']).toEqual(35);
expect(aggregatedData[37]['value']).toEqual(24);
expect(aggregatedData[38]['value']).toEqual(33);
});

it('should aggregate over months correctly', function()
{
const aggregationConfig = {'period': 'month', 'method': 'first'};
const generator = integerRangeGenerator(9, 10);
const data = generateData('2017-12-31', '2019-01-01', generator);
const aggregatedData = aggregateTimeData(data, aggregationConfig);

expect(aggregatedData.length = 12);
expect(dateToString(aggregatedData[0]['date'])).toEqual('2018-01-01');
expect(dateToString(aggregatedData[1]['date'])).toEqual('2018-02-01');
expect(dateToString(aggregatedData[10]['date'])).toEqual('2018-11-01');
expect(dateToString(aggregatedData[11]['date'])).toEqual('2018-12-01');
expect(aggregatedData[0]['value']).toEqual(0);
expect(aggregatedData[1]['value']).toEqual(1);
expect(aggregatedData[10]['value']).toEqual(4);
expect(aggregatedData[11]['value']).toEqual(4);
});
});
});