Skip to content

Commit

Permalink
feat(queries processing): optimize high loaded queries with $nin clause
Browse files Browse the repository at this point in the history
Closes #95
  • Loading branch information
buchslava committed Sep 25, 2018
1 parent 324e2e6 commit cf3405e
Show file tree
Hide file tree
Showing 11 changed files with 47 additions and 19 deletions.
2 changes: 1 addition & 1 deletion dist/test-cases-concepts.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/test-cases-concepts.js.map

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/test-cases-entities.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/test-cases-entities.js.map

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/vizabi-ddfcsv-reader.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/vizabi-ddfcsv-reader.js.map

Large diffs are not rendered by default.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
"@types/core-js": "2.5.0",
"@types/node": "10.5.7",
"aws-sdk": "2.308.0",
"ddf-query-validator": "1.0.4",
"ddf-query-validator": "1.1.0",
"fetch-polyfill": "0.8.2",
"lodash.clonedeep": "4.5.0",
"lodash.compact": "3.0.1",
Expand Down
21 changes: 19 additions & 2 deletions src/resource-selection-optimizer/in-clause-under-conjunction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ const Papa = require('papaparse');
const WHERE_KEYWORD = 'where';
const JOIN_KEYWORD = 'join';
const KEY_IN = '$in';
const KEY_NIN = '$nin';
const KEY_AND = '$and';

const getFirstConditionClause = clause => head(values(clause));
Expand Down Expand Up @@ -165,6 +166,17 @@ export class InClauseUnderConjunction implements IResourceSelectionOptimizer {
}

private getFilesGroupsQueryClause(): InClauseUnderConjunction {
const getEntitiesExcept = (entityValuesToExclude: string[]): string[] => {
const result = [];

for (const entityKey of this.flow.entityValueToDomainHash.keys()) {
if (!includes(entityValuesToExclude, entityKey)) {
result.push(entityKey);
}
}

return result;
};
const filesGroupsByClause = new Map();

for (const clause of this.flow.processableClauses) {
Expand All @@ -173,7 +185,8 @@ export class InClauseUnderConjunction implements IResourceSelectionOptimizer {
datapoints: new Set(),
concepts: new Set()
};
const entityValuesFromClause = getFirstConditionClause(clause).$in;
const firstConditionClause = getFirstConditionClause(clause);
const entityValuesFromClause = firstConditionClause[KEY_IN] || getEntitiesExcept(firstConditionClause[KEY_NIN]);

for (const entityValueFromClause of entityValuesFromClause) {
filesGroupByClause.entities.add(this.flow.entityValueToFileHash.get(entityValueFromClause));
Expand Down Expand Up @@ -222,6 +235,10 @@ export class InClauseUnderConjunction implements IResourceSelectionOptimizer {
}
}

if (!this.flow.filesGroupsByClause.get(appropriateClauseKey)) {
return [];
}

return [
...Array.from(this.flow.filesGroupsByClause.get(appropriateClauseKey).concepts),
...Array.from(this.flow.filesGroupsByClause.get(appropriateClauseKey).entities),
Expand All @@ -241,7 +258,7 @@ export class InClauseUnderConjunction implements IResourceSelectionOptimizer {
// foo: { '$in': ['bar', 'baz'] } will NOT be processed
const conditionKey = head(keys(clause[key]));

if (conditionKey === KEY_IN) {
if (conditionKey === KEY_IN || conditionKey === KEY_NIN) {
result.push(clause);
}
}
Expand Down
11 changes: 4 additions & 7 deletions test/hl.spec.ts → test/high-load.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,19 @@ import { getDDFCsvReaderObject } from '../src/index';
const expect = chai.expect;
const sandbox = sinon.createSandbox();

describe('HL', () => {
describe('High load queries', () => {
afterEach(() => {
sandbox.restore();
});

describe('optimization', () => {
xit(`---------------------`, async () => {
describe('on population dataset', () => {
it(`query with $nin clause should be processed correctly`, async () => {
const reader = getDDFCsvReaderObject();

reader.init({
path: './test/fixtures/ddf--gapminder--population.big/master-HEAD'
});

// console.time('foo');

const result = await reader.read({
language: 'en',
from: 'datapoints',
Expand Down Expand Up @@ -89,8 +87,7 @@ describe('HL', () => {
]
});

// console.log(result.length);
// console.timeEnd('foo');
expect(result.length).to.be.equal(14300);
});
});
});

0 comments on commit cf3405e

Please sign in to comment.