Skip to content

Commit

Permalink
Add nested field support to KQL (#47070)
Browse files Browse the repository at this point in the history
This PR adds a new syntax to KQL for querying nested fields.

Nested fields can be queried in two different ways:

Parts of the query may only match a single nested doc (bool inside nested). This is what most people want when querying on a nested field.
Parts of the query may match different nested docs (nested inside bool). This is how a regular object field works but nested fields can be queried in the same way. Although generally less useful, there are occasions where one might want to query a nested field in this way.
The new KQL syntax supports both.
  • Loading branch information
Bargs authored Oct 30, 2019
1 parent 636fa27 commit 1dcec9d
Show file tree
Hide file tree
Showing 66 changed files with 1,954 additions and 476 deletions.
84 changes: 84 additions & 0 deletions docs/discover/kuery.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,87 @@ set these terms will be matched against all fields. For example, a query for `re
in the response field, but a query for just `200` will search for 200 across all fields in your index.
============

===== Nested Field Support

KQL supports querying on {ref}/nested.html[nested fields] through a special syntax. You can query nested fields in subtly different
ways, depending on the results you want, so crafting nested queries requires extra thought.

One main consideration is how to match parts of the nested query to the individual nested documents.
There are two main approaches to take:

* *Parts of the query may only match a single nested document.* This is what most users want when querying on a nested field.
* *Parts of the query can match different nested documents.* This is how a regular object field works.
Although generally less useful, there might be occasions where you want to query a nested field in this way.

Let's take a look at the first approach. In the following document, `items` is a nested field:

[source,json]
----------------------------------
{
"grocery_name": "Elastic Eats",
"items": [
{
"name": "banana",
"stock": "12",
"category": "fruit"
},
{
"name": "peach",
"stock": "10",
"category": "fruit"
},
{
"name": "carrot",
"stock": "9",
"category": "vegetable"
},
{
"name": "broccoli",
"stock": "5",
"category": "vegetable"
}
]
}
----------------------------------

To find stores that have more than 10 bananas in stock, you would write a query like this:

`items:{ name:banana and stock > 10 }`

`items` is the "nested path". Everything inside the curly braces (the "nested group") must match a single document.
For example, `items:{ name:banana and stock:9 }` does not match because there isn't a single nested document that
matches the entire query in the nested group.

What if you want to find a store with more than 10 bananas that *also* stocks vegetables? This is the second way of querying a nested field, and you can do it like this:

`items:{ name:banana and stock > 10 } and items:{ category:vegetable }`

The first nested group (`name:banana and stock > 10`) must still match a single document, but the `category:vegetables`
subquery can match a different nested document because it is in a separate group.

KQL's syntax also supports nested fields inside of other nested fields—you simply have to specify the full path. Suppose you
have a document where `level1` and `level2` are both nested fields:

[source,json]
----------------------------------
{
"level1": [
{
"level2": [
{
"prop1": "foo",
"prop2": "bar"
},
{
"prop1": "baz",
"prop2": "qux"
}
]
}
]
}
----------------------------------

You can match on a single nested document by specifying the full path:

`level1.level2:{ prop1:foo and prop2:bar }`
25 changes: 23 additions & 2 deletions packages/kbn-es-query/src/__fixtures__/index_pattern_response.json
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,7 @@
"searchable": true,
"aggregatable": true,
"readFromDocValues": true,
"parent": "machine.os",
"subType": "multi"
"subType": { "multi": { "parent": "machine.os" } }
},
{
"name": "geo.src",
Expand Down Expand Up @@ -277,6 +276,28 @@
"searchable": true,
"aggregatable": true,
"readFromDocValues": false
},
{
"name": "nestedField.child",
"type": "string",
"esTypes": ["text"],
"count": 0,
"scripted": false,
"searchable": true,
"aggregatable": false,
"readFromDocValues": false,
"subType": { "nested": { "path": "nestedField" } }
},
{
"name": "nestedField.nestedChild.doublyNestedChild",
"type": "string",
"esTypes": ["text"],
"count": 0,
"scripted": false,
"searchable": true,
"aggregatable": false,
"readFromDocValues": false,
"subType": { "nested": { "path": "nestedField.nestedChild" } }
}
]
}
62 changes: 62 additions & 0 deletions packages/kbn-es-query/src/kuery/ast/__tests__/ast.js
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,68 @@ describe('kuery AST API', function () {
expect(actual).to.eql(expected);
});

it('should support nested queries indicated by curly braces', () => {
const expected = nodeTypes.function.buildNode(
'nested',
'nestedField',
nodeTypes.function.buildNode('is', 'childOfNested', 'foo')
);
const actual = ast.fromKueryExpression('nestedField:{ childOfNested: foo }');
expect(actual).to.eql(expected);
});

it('should support nested subqueries and subqueries inside nested queries', () => {
const expected = nodeTypes.function.buildNode(
'and',
[
nodeTypes.function.buildNode('is', 'response', '200'),
nodeTypes.function.buildNode(
'nested',
'nestedField',
nodeTypes.function.buildNode('or', [
nodeTypes.function.buildNode('is', 'childOfNested', 'foo'),
nodeTypes.function.buildNode('is', 'childOfNested', 'bar'),
])
)]);
const actual = ast.fromKueryExpression('response:200 and nestedField:{ childOfNested:foo or childOfNested:bar }');
expect(actual).to.eql(expected);
});

it('should support nested sub-queries inside paren groups', () => {
const expected = nodeTypes.function.buildNode(
'and',
[
nodeTypes.function.buildNode('is', 'response', '200'),
nodeTypes.function.buildNode('or', [
nodeTypes.function.buildNode(
'nested',
'nestedField',
nodeTypes.function.buildNode('is', 'childOfNested', 'foo')
),
nodeTypes.function.buildNode(
'nested',
'nestedField',
nodeTypes.function.buildNode('is', 'childOfNested', 'bar')
),
])
]);
const actual = ast.fromKueryExpression('response:200 and ( nestedField:{ childOfNested:foo } or nestedField:{ childOfNested:bar } )');
expect(actual).to.eql(expected);
});

it('should support nested groups inside other nested groups', () => {
const expected = nodeTypes.function.buildNode(
'nested',
'nestedField',
nodeTypes.function.buildNode(
'nested',
'nestedChild',
nodeTypes.function.buildNode('is', 'doublyNestedChild', 'foo')
)
);
const actual = ast.fromKueryExpression('nestedField:{ nestedChild:{ doublyNestedChild:foo } }');
expect(actual).to.eql(expected);
});
});

describe('fromLiteralExpression', function () {
Expand Down
4 changes: 2 additions & 2 deletions packages/kbn-es-query/src/kuery/ast/ast.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ function fromExpression(expression, parseOptions = {}, parse = parseKuery) {
* IndexPattern isn't required, but if you pass one in, we can be more intelligent
* about how we craft the queries (e.g. scripted fields)
*/
export function toElasticsearchQuery(node, indexPattern, config = {}) {
export function toElasticsearchQuery(node, indexPattern, config = {}, context = {}) {
if (!node || !node.type || !nodeTypes[node.type]) {
return toElasticsearchQuery(nodeTypes.function.buildNode('and', []));
}

return nodeTypes[node.type].toElasticsearchQuery(node, indexPattern, config);
return nodeTypes[node.type].toElasticsearchQuery(node, indexPattern, config, context);
}

export function doesKueryExpressionHaveLuceneSyntaxError(expression) {
Expand Down
Loading

0 comments on commit 1dcec9d

Please sign in to comment.