Skip to content

Commit

Permalink
Extract hits either as number or structured object
Browse files Browse the repository at this point in the history
With this commit we add the ability to extract metadata about the hit
count of a search request regardless whether Elasticsearch returns it as
a number (as in versions before 7.0) or as a structured object (as of
7.0). We also introduce a new metadata field `hits_relation` which
indicates whether the hit count by Elasticsearch is accurate or an
indication of a lower bound.

Relates elastic/elasticsearch#35849
Relates #612
Closes #611
  • Loading branch information
danielmitterdorfer authored Dec 3, 2018
1 parent ddc1e7a commit abf1623
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 2 deletions.
12 changes: 11 additions & 1 deletion esrally/driver/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,7 @@ class Query(Runner):
Always 1 for normal queries and the number of retrieved pages for scroll queries.
* ``unit``: The unit in which to interpret ``weight``. Always "ops".
* ``hits``: Total number of hits for this operation.
* ``hits_relation``: whether ``hits`` is accurate (``eq``) or a lower bound of the actual hit count (``gte``).
* ``timed_out``: Whether the search has timed out. For scroll queries, this flag is ``True`` if the flag was ``True`` for any of the
queries issued.
Expand Down Expand Up @@ -580,10 +581,17 @@ def request_body_query(self, es, params):
body=mandatory(params, "body", self),
**request_params)
hits = r["hits"]["total"]
if isinstance(hits, dict):
hits_total = hits["value"]
hits_relation = hits["relation"]
else:
hits_total = hits
hits_relation = "eq"
return {
"weight": 1,
"unit": "ops",
"hits": hits,
"hits": hits_total,
"hits_relation": hits_relation,
"timed_out": r["timed_out"],
"took": r["took"]
}
Expand Down Expand Up @@ -633,6 +641,8 @@ def scroll_query(self, es, params):
"weight": retrieved_pages,
"pages": retrieved_pages,
"hits": hits,
# as Rally determines the number of hits in a scroll, the result is always accurate.
"hits_relation": "eq",
"unit": "pages",
"timed_out": timed_out,
"took": took
Expand Down
55 changes: 54 additions & 1 deletion tests/driver/runner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,48 @@ def test_mixed_bulk_with_detailed_stats(self, es):
class QueryRunnerTests(TestCase):
@mock.patch("elasticsearch.Elasticsearch")
def test_query_match_only_request_body_defined(self, es):
es.search.return_value = {
"timed_out": False,
"took": 5,
"hits": {
"total": {
"value": 1,
"relation": "gte"
},
"hits": [
{
"some-doc-1"
},
{
"some-doc-2"
}
]
}
}

query_runner = runner.Query()

params = {
"body": {
"query": {
"match_all": {}
}
}
}

with query_runner:
result = query_runner(es, params)

self.assertEqual(1, result["weight"])
self.assertEqual("ops", result["unit"])
self.assertEqual(1, result["hits"])
self.assertEqual("gte", result["hits_relation"])
self.assertFalse(result["timed_out"])
self.assertEqual(5, result["took"])
self.assertFalse("error-type" in result)

@mock.patch("elasticsearch.Elasticsearch")
def test_query_hits_total_as_number(self, es):
es.search.return_value = {
"timed_out": False,
"took": 5,
Expand Down Expand Up @@ -587,6 +629,7 @@ def test_query_match_only_request_body_defined(self, es):
self.assertEqual(1, result["weight"])
self.assertEqual("ops", result["unit"])
self.assertEqual(2, result["hits"])
self.assertEqual("eq", result["hits_relation"])
self.assertFalse(result["timed_out"])
self.assertEqual(5, result["took"])
self.assertFalse("error-type" in result)
Expand All @@ -597,7 +640,10 @@ def test_query_match_all(self, es):
"timed_out": False,
"took": 5,
"hits": {
"total": 2,
"total": {
"value": 2,
"relation": "eq"
},
"hits": [
{
"some-doc-1"
Expand Down Expand Up @@ -628,6 +674,7 @@ def test_query_match_all(self, es):
self.assertEqual(1, result["weight"])
self.assertEqual("ops", result["unit"])
self.assertEqual(2, result["hits"])
self.assertEqual("eq", result["hits_relation"])
self.assertFalse(result["timed_out"])
self.assertEqual(5, result["took"])
self.assertFalse("error-type" in result)
Expand Down Expand Up @@ -678,6 +725,7 @@ def test_scroll_query_only_one_page(self, es):
self.assertEqual(1, results["weight"])
self.assertEqual(1, results["pages"])
self.assertEqual(2, results["hits"])
self.assertEqual("eq", results["hits_relation"])
self.assertEqual(4, results["took"])
self.assertEqual("pages", results["unit"])
self.assertFalse(results["timed_out"])
Expand Down Expand Up @@ -726,6 +774,7 @@ def test_scroll_query_only_one_page_only_request_body_defined(self, es):
self.assertEqual(1, results["weight"])
self.assertEqual(1, results["pages"])
self.assertEqual(2, results["hits"])
self.assertEqual("eq", results["hits_relation"])
self.assertEqual(4, results["took"])
self.assertEqual("pages", results["unit"])
self.assertFalse(results["timed_out"])
Expand Down Expand Up @@ -790,6 +839,7 @@ def test_scroll_query_with_explicit_number_of_pages(self, es):
self.assertEqual(2, results["weight"])
self.assertEqual(2, results["pages"])
self.assertEqual(3, results["hits"])
self.assertEqual("eq", results["hits_relation"])
self.assertEqual(79, results["took"])
self.assertEqual("pages", results["unit"])
self.assertTrue(results["timed_out"])
Expand Down Expand Up @@ -847,6 +897,7 @@ def test_scroll_query_early_termination(self, es):
self.assertEqual(2, results["weight"])
self.assertEqual(2, results["pages"])
self.assertEqual(1, results["hits"])
self.assertEqual("eq", results["hits_relation"])
self.assertEqual("pages", results["unit"])
self.assertEqual(55, results["took"])
self.assertFalse("error-type" in results)
Expand Down Expand Up @@ -902,6 +953,7 @@ def test_scroll_query_cannot_clear_scroll(self, es):
self.assertEqual(2, results["weight"])
self.assertEqual(2, results["pages"])
self.assertEqual(1, results["hits"])
self.assertEqual("eq", results["hits_relation"])
self.assertEqual("pages", results["unit"])
self.assertEqual(55, results["took"])
self.assertFalse("error-type" in results)
Expand Down Expand Up @@ -967,6 +1019,7 @@ def test_scroll_query_request_all_pages(self, es):
self.assertEqual(2, results["weight"])
self.assertEqual(2, results["pages"])
self.assertEqual(4, results["hits"])
self.assertEqual("eq", results["hits_relation"])
self.assertEqual(900, results["took"])
self.assertEqual("pages", results["unit"])
self.assertFalse(results["timed_out"])
Expand Down

0 comments on commit abf1623

Please sign in to comment.