Skip to content

Commit

Permalink
Improve handling of documents archive and related settings
Browse files Browse the repository at this point in the history
Closes #264
  • Loading branch information
danielmitterdorfer committed May 24, 2017
1 parent 93ba94f commit 626a7dd
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 13 deletions.
6 changes: 3 additions & 3 deletions docs/track.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ Each type consists of the following properties:
* ``name`` (mandatory): Name of the type.
* ``mapping`` (mandatory): File name of the corresponding mapping file.
* ``documents`` (optional): File name of the corresponding documents that should be indexed. If you are using parent-child, specify the number of parent documents. This file has to be compressed either as ``.zip``, ``.bz2``, ``.gz``, ``.tar``, ``.tar.gz``, ``.tgz`` or ``.tar.bz2`` and must contain exactly one JSON file with the same name. The preferred file extension for our official tracks is .bz2.
* ``document-count`` (optional): Number of documents in the documents file. This number will be used to verify that all documents have been indexed successfully.
* ``compressed-bytes`` (optional): The size in bytes of the compressed document file. This number is used to show users how much data will be downloaded by Rally and also to check whether the download is complete.
* ``uncompressed-bytes`` (optional): The size in bytes of the documents file after decompression.
* ``document-count`` (mandatory if ``documents`` is set): Number of documents in the documents file. This number is used by Rally to determine which client indexes which part of the document corpus (each of the N clients gets one N-th of the document corpus).
* ``compressed-bytes`` (optional but recommended if ``documents`` is set): The size in bytes of the compressed document file. This number is used to show users how much data will be downloaded by Rally and also to check whether the download is complete.
* ``uncompressed-bytes`` (optional but recommended if ``documents`` is set): The size in bytes of the documents file after decompression. This number is used by Rally to show users how much disk space the decompressed file will need and to check that the whole file could be decompressed successfully.

Example::

Expand Down
13 changes: 9 additions & 4 deletions esrally/track/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,18 +549,23 @@ def _create_type(self, type_spec, mapping_dir, data_dir):
if compressed_docs:
document_archive = "%s/%s" % (data_dir, compressed_docs)
document_file = "%s/%s" % (data_dir, io.splitext(compressed_docs)[0])
number_of_documents = self._r(type_spec, "document-count")
compressed_bytes = self._r(type_spec, "compressed-bytes", mandatory=False)
uncompressed_bytes = self._r(type_spec, "uncompressed-bytes", mandatory=False)
else:
document_archive = None
document_file = None
number_of_documents = 0
compressed_bytes = 0
uncompressed_bytes = 0

return track.Type(name=self._r(type_spec, "name"),
mapping_file="%s/%s" % (mapping_dir, self._r(type_spec, "mapping")),
document_file=document_file,
document_archive=document_archive,
number_of_documents=self._r(type_spec, "document-count", mandatory=False, default_value=0),
compressed_size_in_bytes=self._r(type_spec, "compressed-bytes", mandatory=False),
uncompressed_size_in_bytes=self._r(type_spec, "uncompressed-bytes", mandatory=False)
)
number_of_documents=number_of_documents,
compressed_size_in_bytes=compressed_bytes,
uncompressed_size_in_bytes=uncompressed_bytes)

def _create_challenges(self, track_spec):
ops = self.parse_operations(self._r(track_spec, "operations"))
Expand Down
10 changes: 4 additions & 6 deletions esrally/track/track.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,9 @@ def __init__(self, name, mapping_file, document_file=None, document_archive=None
:param number_of_documents: The number of documents in the benchmark document. Needed for proper progress reporting. Only needed if
a document_archive is given.
:param compressed_size_in_bytes: The compressed size in bytes of the benchmark document. Needed for verification of the download and
user reporting. Only needed if a document_archive is given.
:param uncompressed_size_in_bytes: The size in bytes of the benchmark document after decompressing it. Only needed if a
document_archive is given.
user reporting. Only useful if a document_archive is given (optional but recommended to be set).
:param uncompressed_size_in_bytes: The size in bytes of the benchmark document after decompressing it. Only useful if a
document_archive is given (optional but recommended to be set).
"""
self.name = name
self.mapping_file = mapping_file
Expand All @@ -126,9 +126,7 @@ def __init__(self, name, mapping_file, document_file=None, document_archive=None

def has_valid_document_data(self):
return self.document_file is not None and \
self.number_of_documents > 0 and \
self.compressed_size_in_bytes > 0 and \
self.uncompressed_size_in_bytes > 0
self.number_of_documents > 0

def __str__(self, *args, **kwargs):
return self.name
Expand Down
14 changes: 14 additions & 0 deletions tests/track/loader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,20 @@ def test_can_read_track_info(self):
self.assertEqual("longer description of this track for unit test", resulting_track.description)
self.assertEqual("https://localhost/data", resulting_track.source_root_url)

def test_document_count_mandatory_if_file_present(self):
track_specification = {
"short-description": "short description for unit test",
"description": "longer description of this track for unit test",
"data-url": "https://localhost/data",
"indices": [{"name": "test-index", "types": [{"name": "docs", "documents": "documents.json.bz2"}]}],
"operations": [],
"challenges": []
}
reader = loader.TrackSpecificationReader()
with self.assertRaises(loader.TrackSyntaxError) as ctx:
reader("unittest", track_specification, "/mappings", "/data")
self.assertEqual("Track 'unittest' is invalid. Mandatory element 'document-count' is missing.", ctx.exception.args[0])

def test_parse_with_mixed_warmup_iterations_and_measurement(self):
track_specification = {
"short-description": "short description for unit test",
Expand Down

0 comments on commit 626a7dd

Please sign in to comment.