From 85e502b008e8312841ba9c825f0478b868844ea6 Mon Sep 17 00:00:00 2001 From: Christian Dahlqvist Date: Mon, 19 Jun 2017 11:21:26 +0100 Subject: [PATCH 1/5] Added additional metrics for bulk requests. --- esrally/driver/runner.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/esrally/driver/runner.py b/esrally/driver/runner.py index d386a7705..1105e290a 100644 --- a/esrally/driver/runner.py +++ b/esrally/driver/runner.py @@ -114,6 +114,8 @@ def __call__(self, es, params): * ``index``: name of the affected index. May be `None` if it could not be derived. * ``bulk-size``: bulk size, e.g. 5.000. + * ``bulk-request-size-bytes``: size of the full bulk requset in bytes + * ``total-document-size-bytes``: size of all documents contained in the bulk request in bytes * ``weight``: operation-agnostic representation of the bulk size (used internally by Rally for throughput calculation). * ``unit``: The unit in which to interpret ``bulk-size`` and ``weight``. Always "docs". * ``success``: A boolean indicating whether the bulk request has succeeded. @@ -139,6 +141,8 @@ def __call__(self, es, params): "weight": 5000, "unit": "docs", "bulk-size": 5000, + "bulk-request-size-bytes": 2250000, + "total-document-size-bytes": 2000000, "success": True, "success-count": 5000, "error-count": 0 @@ -151,6 +155,8 @@ def __call__(self, es, params): "weight": 5000, "unit": "docs", "bulk-size": 5000, + "bulk-request-size-bytes": 2250000, + "total-document-size-bytes": 2000000, "success": False, "success-count": 4000, "error-count": 1000 @@ -164,6 +170,8 @@ def __call__(self, es, params): "weight": 5000, "unit": "docs", "bulk-size": 5000, + "bulk-request-size-bytes": 2250000, + "total-document-size-bytes": 2000000, "success": True, "success-count": 5000, "error-count": 0, @@ -193,6 +201,8 @@ def __call__(self, es, params): "weight": 5000, "unit": "docs", "bulk-size": 5000, + "bulk-request-size-bytes": 2250000, + "total-document-size-bytes": 2000000, "success": False, "success-count": 4000, "error-count": 1000, @@ -245,6 +255,18 @@ def __call__(self, es, params): raise exceptions.DataError( "Bulk parameter source did not provide a 'bulk-size' parameter. Please add it to your parameter source.") + bulk_request_size_bytes = 0 + total_document_size_bytes = 0 + + for i in range(len(params["body"])): + if params["action_metadata_present"]: + if i % 2 == 1: + total_document_size_bytes += len(params["body"][i]) + else: + total_document_size_bytes += len(params["body"][i]) + + bulk_request_size_bytes += len(params["body"][i]) + if with_action_metadata: # only half of the lines are documents response = es.bulk(body=params["body"], params=bulk_params) @@ -258,6 +280,8 @@ def __call__(self, es, params): "weight": bulk_size, "unit": "docs", "bulk-size": bulk_size, + "bulk-request-size-bytes": bulk_request_size_bytes, + "total-document-size-bytes": total_document_size_bytes } meta_data.update(stats) return meta_data From b66c5dc3de49d3f3f4e4eb147a42cc78ad953f33 Mon Sep 17 00:00:00 2001 From: Christian Dahlqvist Date: Wed, 21 Jun 2017 15:08:05 +0100 Subject: [PATCH 2/5] Moved bulk size stats to detailed_stats --- esrally/driver/runner.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/esrally/driver/runner.py b/esrally/driver/runner.py index 1105e290a..e950a097f 100644 --- a/esrally/driver/runner.py +++ b/esrally/driver/runner.py @@ -255,41 +255,41 @@ def __call__(self, es, params): raise exceptions.DataError( "Bulk parameter source did not provide a 'bulk-size' parameter. Please add it to your parameter source.") - bulk_request_size_bytes = 0 - total_document_size_bytes = 0 - - for i in range(len(params["body"])): - if params["action_metadata_present"]: - if i % 2 == 1: - total_document_size_bytes += len(params["body"][i]) - else: - total_document_size_bytes += len(params["body"][i]) - - bulk_request_size_bytes += len(params["body"][i]) - if with_action_metadata: # only half of the lines are documents response = es.bulk(body=params["body"], params=bulk_params) else: response = es.bulk(body=params["body"], index=index, doc_type=params["type"], params=bulk_params) - stats = self.detailed_stats(bulk_size, response) if detailed_results else self.simple_stats(bulk_size, response) + stats = self.detailed_stats(params, bulk_size, response) if detailed_results else self.simple_stats(bulk_size, response) meta_data = { "index": str(index) if index else None, "weight": bulk_size, "unit": "docs", - "bulk-size": bulk_size, - "bulk-request-size-bytes": bulk_request_size_bytes, - "total-document-size-bytes": total_document_size_bytes + "bulk-size": bulk_size } meta_data.update(stats) return meta_data - def detailed_stats(self, bulk_size, response): + def detailed_stats(self, params, bulk_size, response): ops = {} shards_histogram = OrderedDict() bulk_error_count = 0 + bulk_request_size_bytes = 0 + total_document_size_bytes = 0 + + for line_number, data in enumerate(params["body"]): + + line_size = len(data.encode('utf-8')) + if params["action_metadata_present"]: + if i % 2 == 1: + total_document_size_bytes += line_size + else: + total_document_size_bytes += line_size + + bulk_request_size_bytes += line_size + for idx, item in enumerate(response["items"]): # there is only one (top-level) item op, data = next(iter(item.items())) @@ -315,7 +315,9 @@ def detailed_stats(self, bulk_size, response): "success-count": bulk_size - bulk_error_count, "error-count": bulk_error_count, "ops": ops, - "shards_histogram": list(shards_histogram.values()) + "shards_histogram": list(shards_histogram.values()), + "bulk-request-size-bytes": bulk_request_size_bytes, + "total-document-size-bytes": total_document_size_bytes } def simple_stats(self, bulk_size, response): From 0e57e0f8563bcdc642caa6a961eea7469a30ddba Mon Sep 17 00:00:00 2001 From: Christian Dahlqvist Date: Wed, 21 Jun 2017 15:46:20 +0100 Subject: [PATCH 3/5] Fixed variable naming issue --- esrally/driver/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esrally/driver/runner.py b/esrally/driver/runner.py index e950a097f..0013bd938 100644 --- a/esrally/driver/runner.py +++ b/esrally/driver/runner.py @@ -283,7 +283,7 @@ def detailed_stats(self, params, bulk_size, response): line_size = len(data.encode('utf-8')) if params["action_metadata_present"]: - if i % 2 == 1: + if line_number % 2 == 1: total_document_size_bytes += line_size else: total_document_size_bytes += line_size From fa576382ac4124dc86d28b9ecbe9384f7a31c0b7 Mon Sep 17 00:00:00 2001 From: Christian Dahlqvist Date: Wed, 21 Jun 2017 15:54:22 +0100 Subject: [PATCH 4/5] Corrected documentation --- esrally/driver/runner.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/esrally/driver/runner.py b/esrally/driver/runner.py index 0013bd938..4fa147fed 100644 --- a/esrally/driver/runner.py +++ b/esrally/driver/runner.py @@ -130,7 +130,8 @@ def __call__(self, es, params): * ``shards_histogram``: An array of hashes where each hash has two keys: ``item-count`` contains the number of items to which a shard distribution applies and ``shards`` contains another hash with the actual distribution of ``total``, ``successful`` and ``failed`` shards (see examples below). - + * ``bulk-request-size-bytes``: Total size of the bulk request body in bytes. + * ``total-document-size-bytes``: Total size of all documents within the bulk request body in bytes. Here are a few examples: @@ -141,8 +142,6 @@ def __call__(self, es, params): "weight": 5000, "unit": "docs", "bulk-size": 5000, - "bulk-request-size-bytes": 2250000, - "total-document-size-bytes": 2000000, "success": True, "success-count": 5000, "error-count": 0 @@ -155,8 +154,6 @@ def __call__(self, es, params): "weight": 5000, "unit": "docs", "bulk-size": 5000, - "bulk-request-size-bytes": 2250000, - "total-document-size-bytes": 2000000, "success": False, "success-count": 4000, "error-count": 1000 From 25db1bb66985b48e87c0e0645bf85815ca9f7b5e Mon Sep 17 00:00:00 2001 From: Christian Dahlqvist Date: Thu, 29 Jun 2017 10:19:17 +0100 Subject: [PATCH 5/5] Updated detailed bulk index test with size stats --- tests/driver/runner_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/driver/runner_test.py b/tests/driver/runner_test.py index 8aba3e024..8c9fca253 100644 --- a/tests/driver/runner_test.py +++ b/tests/driver/runner_test.py @@ -457,6 +457,8 @@ def test_mixed_bulk_with_detailed_stats(self, es): } } ], result["shards_histogram"]) + self.assertEqual(158, result["bulk-request-size-bytes"]) + self.assertEqual(62, result["total-document-size-bytes"]) es.bulk.assert_called_with(body=bulk_params["body"], params={})