diff --git a/docs/track.rst b/docs/track.rst index bb339b500..056bc3fbb 100644 --- a/docs/track.rst +++ b/docs/track.rst @@ -83,9 +83,9 @@ Each type consists of the following properties: * ``name`` (mandatory): Name of the type. * ``mapping`` (mandatory): File name of the corresponding mapping file. * ``documents`` (optional): File name of the corresponding documents that should be indexed. If you are using parent-child, specify the number of parent documents. This file has to be compressed either as ``.zip``, ``.bz2``, ``.gz``, ``.tar``, ``.tar.gz``, ``.tgz`` or ``.tar.bz2`` and must contain exactly one JSON file with the same name. The preferred file extension for our official tracks is .bz2. -* ``document-count`` (optional): Number of documents in the documents file. This number will be used to verify that all documents have been indexed successfully. -* ``compressed-bytes`` (optional): The size in bytes of the compressed document file. This number is used to show users how much data will be downloaded by Rally and also to check whether the download is complete. -* ``uncompressed-bytes`` (optional): The size in bytes of the documents file after decompression. +* ``document-count`` (mandatory if ``documents`` is set): Number of documents in the documents file. This number is used by Rally to determine which client indexes which part of the document corpus (each of the N clients gets one N-th of the document corpus). +* ``compressed-bytes`` (optional but recommended if ``documents`` is set): The size in bytes of the compressed document file. This number is used to show users how much data will be downloaded by Rally and also to check whether the download is complete. +* ``uncompressed-bytes`` (optional but recommended if ``documents`` is set): The size in bytes of the documents file after decompression. This number is used by Rally to show users how much disk space the decompressed file will need and to check that the whole file could be decompressed successfully. Example:: diff --git a/esrally/track/loader.py b/esrally/track/loader.py index b0d6821b9..cc40869b1 100644 --- a/esrally/track/loader.py +++ b/esrally/track/loader.py @@ -549,18 +549,23 @@ def _create_type(self, type_spec, mapping_dir, data_dir): if compressed_docs: document_archive = "%s/%s" % (data_dir, compressed_docs) document_file = "%s/%s" % (data_dir, io.splitext(compressed_docs)[0]) + number_of_documents = self._r(type_spec, "document-count") + compressed_bytes = self._r(type_spec, "compressed-bytes", mandatory=False) + uncompressed_bytes = self._r(type_spec, "uncompressed-bytes", mandatory=False) else: document_archive = None document_file = None + number_of_documents = 0 + compressed_bytes = 0 + uncompressed_bytes = 0 return track.Type(name=self._r(type_spec, "name"), mapping_file="%s/%s" % (mapping_dir, self._r(type_spec, "mapping")), document_file=document_file, document_archive=document_archive, - number_of_documents=self._r(type_spec, "document-count", mandatory=False, default_value=0), - compressed_size_in_bytes=self._r(type_spec, "compressed-bytes", mandatory=False), - uncompressed_size_in_bytes=self._r(type_spec, "uncompressed-bytes", mandatory=False) - ) + number_of_documents=number_of_documents, + compressed_size_in_bytes=compressed_bytes, + uncompressed_size_in_bytes=uncompressed_bytes) def _create_challenges(self, track_spec): ops = self.parse_operations(self._r(track_spec, "operations")) diff --git a/esrally/track/track.py b/esrally/track/track.py index 76670c08f..59f71c866 100644 --- a/esrally/track/track.py +++ b/esrally/track/track.py @@ -112,9 +112,9 @@ def __init__(self, name, mapping_file, document_file=None, document_archive=None :param number_of_documents: The number of documents in the benchmark document. Needed for proper progress reporting. Only needed if a document_archive is given. :param compressed_size_in_bytes: The compressed size in bytes of the benchmark document. Needed for verification of the download and - user reporting. Only needed if a document_archive is given. - :param uncompressed_size_in_bytes: The size in bytes of the benchmark document after decompressing it. Only needed if a - document_archive is given. + user reporting. Only useful if a document_archive is given (optional but recommended to be set). + :param uncompressed_size_in_bytes: The size in bytes of the benchmark document after decompressing it. Only useful if a + document_archive is given (optional but recommended to be set). """ self.name = name self.mapping_file = mapping_file @@ -126,9 +126,7 @@ def __init__(self, name, mapping_file, document_file=None, document_archive=None def has_valid_document_data(self): return self.document_file is not None and \ - self.number_of_documents > 0 and \ - self.compressed_size_in_bytes > 0 and \ - self.uncompressed_size_in_bytes > 0 + self.number_of_documents > 0 def __str__(self, *args, **kwargs): return self.name diff --git a/tests/track/loader_test.py b/tests/track/loader_test.py index 5270d5104..257eff3d1 100644 --- a/tests/track/loader_test.py +++ b/tests/track/loader_test.py @@ -293,6 +293,20 @@ def test_can_read_track_info(self): self.assertEqual("longer description of this track for unit test", resulting_track.description) self.assertEqual("https://localhost/data", resulting_track.source_root_url) + def test_document_count_mandatory_if_file_present(self): + track_specification = { + "short-description": "short description for unit test", + "description": "longer description of this track for unit test", + "data-url": "https://localhost/data", + "indices": [{"name": "test-index", "types": [{"name": "docs", "documents": "documents.json.bz2"}]}], + "operations": [], + "challenges": [] + } + reader = loader.TrackSpecificationReader() + with self.assertRaises(loader.TrackSyntaxError) as ctx: + reader("unittest", track_specification, "/mappings", "/data") + self.assertEqual("Track 'unittest' is invalid. Mandatory element 'document-count' is missing.", ctx.exception.args[0]) + def test_parse_with_mixed_warmup_iterations_and_measurement(self): track_specification = { "short-description": "short description for unit test",