From 251089673b17fe99acd457a0d5f4909051f31ec0 Mon Sep 17 00:00:00 2001 From: Dale McDiarmid Date: Thu, 15 Oct 2020 15:40:26 +0100 Subject: [PATCH 1/9] Add support for datastreams --- esrally/driver/runner.py | 45 ++++++++++++ esrally/resources/track-schema.json | 24 ++++++- esrally/track/loader.py | 9 ++- esrally/track/params.py | 107 +++++++++++++++++++++++++--- esrally/track/track.py | 70 +++++++++++++++--- 5 files changed, 233 insertions(+), 22 deletions(-) diff --git a/esrally/driver/runner.py b/esrally/driver/runner.py index 0a9060974..28bc0919d 100644 --- a/esrally/driver/runner.py +++ b/esrally/driver/runner.py @@ -50,6 +50,8 @@ def register_default_runners(): register_runner(track.OperationType.Refresh.name, Retry(Refresh()), async_runner=True) register_runner(track.OperationType.CreateIndex.name, Retry(CreateIndex()), async_runner=True) register_runner(track.OperationType.DeleteIndex.name, Retry(DeleteIndex()), async_runner=True) + register_runner(track.OperationType.CreateDataStream.name, Retry(CreateDataStream()), async_runner=True) + register_runner(track.OperationType.DeleteDataStream.name, Retry(DeleteDataStream()), async_runner=True) register_runner(track.OperationType.CreateIndexTemplate.name, Retry(CreateIndexTemplate()), async_runner=True) register_runner(track.OperationType.DeleteIndexTemplate.name, Retry(DeleteIndexTemplate()), async_runner=True) register_runner(track.OperationType.ShrinkIndex.name, Retry(ShrinkIndex()), async_runner=True) @@ -1031,6 +1033,22 @@ def __repr__(self, *args, **kwargs): return "create-index" +class CreateDataStream(Runner): + """ + Execute the `create data stream API `_. + """ + + async def __call__(self, es, params): + data_streams = mandatory(params, "data-streams", self) + request_params = params.get("request-params", {}) + for data_stream in data_streams: + await es.indices.create_data_stream(data_stream, params=request_params) + return len(data_streams), "ops" + + def __repr__(self, *args, **kwargs): + return "create-data-stream" + + class DeleteIndex(Runner): """ Execute the `delete index API `_. @@ -1058,6 +1076,33 @@ def __repr__(self, *args, **kwargs): return "delete-index" +class DeleteDataStream(Runner): + """ + Execute the `delete data stream API `_. + """ + + async def __call__(self, es, params): + ops = 0 + + data_streams = mandatory(params, "data-streams", self) + only_if_exists = params.get("only-if-exists", False) + request_params = params.get("request-params", {}) + + for data_stream in data_streams: + if not only_if_exists: + await es.indices.delete_data_stream(data_stream, ignore=[404], params=request_params) + ops += 1 + elif only_if_exists and await es.indices.exists(index=data_stream): + self.logger.info("Data stream [%s] already exists. Deleting it.", data_stream) + await es.indices.delete_data_stream(data_stream, ignore=[404]) + ops += 1 + + return ops, "ops" + + def __repr__(self, *args, **kwargs): + return "delete-data-stream" + + class CreateIndexTemplate(Runner): """ Execute the `PUT index template API `_. diff --git a/esrally/resources/track-schema.json b/esrally/resources/track-schema.json index b22be6782..ce016a649 100644 --- a/esrally/resources/track-schema.json +++ b/esrally/resources/track-schema.json @@ -262,6 +262,24 @@ ] } }, + "data-streams": { + "type": "array", + "minItems": 1, + "uniqueItems": true, + "items": { + "title": "Index", + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the data stream to create." + } + }, + "required": [ + "name" + ] + } + }, "corpora": { "type": "array", "minItems": 1, @@ -291,6 +309,10 @@ "type": "string", "description": "The name of the associated index (if any)." }, + "target-data-stream": { + "type": "string", + "description": "The name of the associated data stream (if any)." + }, "target-type": { "type": "string", "description": "The name of the associated document type (if any)." @@ -469,4 +491,4 @@ "$ref": "#/definitions/schedule" } } -} \ No newline at end of file +} diff --git a/esrally/track/loader.py b/esrally/track/loader.py index 105660377..074385171 100644 --- a/esrally/track/loader.py +++ b/esrally/track/loader.py @@ -987,13 +987,15 @@ def __call__(self, track_name, track_specification, mapping_dir): meta_data = self._r(track_specification, "meta", mandatory=False) indices = [self._create_index(idx, mapping_dir) for idx in self._r(track_specification, "indices", mandatory=False, default_value=[])] + data_streams = [self._create_data_stream(idx) + for idx in self._r(track_specification, "data-streams", mandatory=False, default_value=[])] templates = [self._create_index_template(tpl, mapping_dir) for tpl in self._r(track_specification, "templates", mandatory=False, default_value=[])] corpora = self._create_corpora(self._r(track_specification, "corpora", mandatory=False, default_value=[]), indices) challenges = self._create_challenges(track_specification) # at this point, *all* track params must have been referenced in the templates - return track.Track(name=self.name, meta_data=meta_data, description=description, challenges=challenges, indices=indices, - templates=templates, corpora=corpora) + return track.Track(name=self.name, meta_data=meta_data, description=description, challenges=challenges, + indices=indices, data_streams=data_streams, templates=templates, corpora=corpora) def _error(self, msg): raise TrackSyntaxError("Track '%s' is invalid. %s" % (self.name, msg)) @@ -1031,6 +1033,9 @@ def _create_index(self, index_spec, mapping_dir): return track.Index(name=index_name, body=body, types=self._r(index_spec, "types", mandatory=False, default_value=[])) + def _create_data_stream(self, data_stream_spec): + return track.DataStream(name=self._r(data_stream_spec, "name")) + def _create_index_template(self, tpl_spec, mapping_dir): name = self._r(tpl_spec, "name") template_file = self._r(tpl_spec, "template") diff --git a/esrally/track/params.py b/esrally/track/params.py index 885943542..bfad41b8f 100644 --- a/esrally/track/params.py +++ b/esrally/track/params.py @@ -202,6 +202,70 @@ def params(self): return p +class CreateDataStreamParamSource(ParamSource): + def __init__(self, track, params, **kwargs): + super().__init__(track, params, **kwargs) + self.request_params = params.get("request-params", {}) + self.data_stream_definitions = [] + if track.data_streams: + filter_ds = params.get("data-stream") + if isinstance(filter_ds, str): + filter_ds = [filter_ds] + for ds in track.data_streams: + if not filter_ds or ds.name in filter_ds: + self.data_stream_definitions.append(ds.name) + else: + try: + idx = params["data-stream"] + if isinstance(idx, str): + idx = [idx] + for i in idx: + self.data_stream_definitions.append(i) + except KeyError: + raise exceptions.InvalidSyntax("Please set the property 'data-stream' for the create-data-stream operation") + + def params(self): + p = {} + # ensure we pass all parameters... + p.update(self._params) + p.update({ + "data_streams": self.data_stream_definitions, + "request-params": self.request_params + }) + return p + + +class DeleteDataStreamParamSource(ParamSource): + def __init__(self, track, params, **kwargs): + super().__init__(track, params, **kwargs) + self.request_params = params.get("request-params", {}) + self.only_if_exists = params.get("only-if-exists", True) + + self.data_stream_definitions = [] + target_data_stream = params.get("data-stream") + if target_data_stream: + if isinstance(target_data_stream, str): + target_data_stream = [target_data_stream] + for ds in target_data_stream: + self.data_stream_definitions.append(ds) + elif track.data_streams: + for ds in track.data_streams: + self.data_stream_definitions.append(ds.name) + else: + raise exceptions.InvalidSyntax("delete-data-stream operation targets no data stream") + + def params(self): + p = {} + # ensure we pass all parameters... + p.update(self._params) + p.update({ + "data_streams": self.data_stream_definitions, + "request-params": self.request_params, + "only-if-exists": self.only_if_exists + }) + return p + + class DeleteIndexParamSource(ParamSource): def __init__(self, track, params, **kwargs): super().__init__(track, params, **kwargs) @@ -344,10 +408,14 @@ def __init__(self, track, params, **kwargs): super().__init__(track, params, **kwargs) if len(track.indices) == 1: default_index = track.indices[0].name + elif len(track.data_streams) == 1: + default_index = track.data_streams[0].name else: default_index = None - index_name = params.get("index", default_index) + index_name = params.get("index") + if not index_name: + index_name = params.get("data-stream", default_index) type_name = params.get("type") request_cache = params.get("cache", None) query_body = params.get("body", None) @@ -614,7 +682,8 @@ class ForceMergeParamSource(ParamSource): def __init__(self, track, params, **kwargs): super().__init__(track, params, **kwargs) if len(track.indices) > 0: - default_index = ','.join(map(str, track.indices)) + # force merge data streams and indices + default_index = ','.join(map(str, track.indices + track.data_streams)) else: default_index = "_all" @@ -679,14 +748,21 @@ def chain(*iterables): def create_default_reader(docs, offset, num_lines, num_docs, batch_size, bulk_size, id_conflicts, conflict_probability, on_conflict, recency): source = Slice(io.MmapSource, offset, num_lines) + target = None + use_create = False + if docs.target_index: + target = docs.target_index + elif docs.target_data_stream: + target = docs.target_data_stream + use_create = True if docs.includes_action_and_meta_data: - return SourceOnlyIndexDataReader(docs.document_file, batch_size, bulk_size, source, docs.target_index, docs.target_type) + return SourceOnlyIndexDataReader(docs.document_file, batch_size, bulk_size, source, target, docs.target_type) else: - am_handler = GenerateActionMetaData(docs.target_index, docs.target_type, + am_handler = GenerateActionMetaData(target, docs.target_type, build_conflicting_ids(id_conflicts, num_docs, offset), conflict_probability, - on_conflict, recency) - return MetadataIndexDataReader(docs.document_file, batch_size, bulk_size, source, am_handler, docs.target_index, docs.target_type) + on_conflict, recency, use_create=use_create) + return MetadataIndexDataReader(docs.document_file, batch_size, bulk_size, source, am_handler, target, docs.target_type) def create_readers(num_clients, start_client_index, end_client_index, corpora, batch_size, bulk_size, id_conflicts, @@ -698,9 +774,14 @@ def create_readers(num_clients, start_client_index, end_client_index, corpora, b offset, num_docs, num_lines = bounds(docs.number_of_documents, start_client_index, end_client_index, num_clients, docs.includes_action_and_meta_data) if num_docs > 0: - logger.info("Task-relative clients at index [%d-%d] will bulk index [%d] docs starting from line offset [%d] for [%s/%s] " + target = "/" + if docs.target_index: + target = f"{docs.target_index}/{docs.target_type}" + elif docs.target_data_stream: + target = docs.target_data_stream + logger.info("Task-relative clients at index [%d-%d] will bulk index [%d] docs starting from line offset [%d] for [%s] " "from corpus [%s].", start_client_index, end_client_index, num_docs, offset, - docs.target_index, docs.target_type, corpus.name) + target, corpus.name) readers.append(create_reader(docs, offset, num_lines, num_docs, batch_size, bulk_size, id_conflicts, conflict_probability, on_conflict, recency)) else: @@ -790,8 +871,8 @@ def bulk_data_based(num_clients, start_client_index, end_client_index, corpora, class GenerateActionMetaData: RECENCY_SLOPE = 30 - def __init__(self, index_name, type_name, conflicting_ids=None, conflict_probability=None, on_conflict=None, - recency=None, rand=random.random, randint=random.randint, randexp=random.expovariate): + def __init__(self, index_name, type_name, conflicting_ids=None, conflict_probability=None, on_conflict=None, recency=None, + rand=random.random, randint=random.randint, randexp=random.expovariate, use_create=False): if type_name: self.meta_data_index_with_id = '{"index": {"_index": "%s", "_type": "%s", "_id": "%s"}}\n' % \ (index_name, type_name, "%s") @@ -802,9 +883,11 @@ def __init__(self, index_name, type_name, conflicting_ids=None, conflict_probabi self.meta_data_index_with_id = '{"index": {"_index": "%s", "_id": "%s"}}\n' % (index_name, "%s") self.meta_data_update_with_id = '{"update": {"_index": "%s", "_id": "%s"}}\n' % (index_name, "%s") self.meta_data_index_no_id = '{"index": {"_index": "%s"}}\n' % index_name + self.meta_data_create_no_id = '{"create": {"_index": "%s"}}\n' % index_name self.conflicting_ids = conflicting_ids self.on_conflict = on_conflict + self.use_create = use_create # random() produces numbers between 0 and 1 and the user denotes the probability in percentage between 0 and 100 self.conflict_probability = conflict_probability / 100.0 if conflict_probability is not None else 0 self.recency = recency if recency is not None else 0 @@ -857,6 +940,8 @@ def __next__(self): else: raise exceptions.RallyAssertionError("Unknown action [{}]".format(action)) else: + if self.use_create: + return "create", self.meta_data_create_no_id return "index", self.meta_data_index_no_id @@ -1020,6 +1105,8 @@ def read_bulk(self): register_param_source_for_operation(track.OperationType.Search, SearchParamSource) register_param_source_for_operation(track.OperationType.CreateIndex, CreateIndexParamSource) register_param_source_for_operation(track.OperationType.DeleteIndex, DeleteIndexParamSource) +register_param_source_for_operation(track.OperationType.CreateDataStream, CreateDataStreamParamSource) +register_param_source_for_operation(track.OperationType.DeleteDataStream, DeleteDataStreamParamSource) register_param_source_for_operation(track.OperationType.CreateIndexTemplate, CreateIndexTemplateParamSource) register_param_source_for_operation(track.OperationType.DeleteIndexTemplate, DeleteIndexTemplateParamSource) register_param_source_for_operation(track.OperationType.Sleep, SleepParamSource) diff --git a/esrally/track/track.py b/esrally/track/track.py index 52c0660ed..00f1165ec 100644 --- a/esrally/track/track.py +++ b/esrally/track/track.py @@ -68,6 +68,46 @@ def __eq__(self, other): return self.name == other.name +class DataStream: + """ + Defines a data stream in Elasticsearch. + """ + + def __init__(self, name): + """ + + Creates a new data stream. + + :param name: The data stream name. Mandatory. + """ + self.name = name + + def matches(self, pattern): + if pattern is None: + return True + elif pattern in ["_all", "*"]: + return True + elif self.name == pattern: + return True + else: + return False + + def __str__(self): + return self.name + + def __repr__(self): + r = [] + for prop, value in vars(self).items(): + r.append("%s = [%s]" % (prop, repr(value))) + return ", ".join(r) + + def __hash__(self): + return hash(self.name) + + def __eq__(self, other): + return self.name == other.name + + class IndexTemplate: """ Defines an index template in Elasticsearch. @@ -108,7 +148,7 @@ class Documents: SOURCE_FORMAT_BULK = "bulk" def __init__(self, source_format, document_file=None, document_archive=None, base_url=None, includes_action_and_meta_data=False, - number_of_documents=0, compressed_size_in_bytes=0, uncompressed_size_in_bytes=0, target_index=None, target_type=None): + number_of_documents=0, compressed_size_in_bytes=0, uncompressed_size_in_bytes=0, target_index=None, target_data_stream=None, target_type=None): """ :param source_format: The format of these documents. Mandatory. @@ -126,6 +166,7 @@ def __init__(self, source_format, document_file=None, document_archive=None, bas :param uncompressed_size_in_bytes: The size in bytes of the benchmark document after decompressing it. Only useful if a document_archive is given (optional but recommended to be set). :param target_index: The index to target for bulk operations. May be ``None`` if ``includes_action_and_meta_data`` is ``False``. + :param target_data_stream: The datastream to target for bulk operations. Maybe be ``None`` if ``includes_action_and_meta_data`` is ``False``. :param target_type: The document type to target for bulk operations. May be ``None`` if ``includes_action_and_meta_data`` is ``False``. """ @@ -139,6 +180,7 @@ def __init__(self, source_format, document_file=None, document_archive=None, bas self.compressed_size_in_bytes = compressed_size_in_bytes self.uncompressed_size_in_bytes = uncompressed_size_in_bytes self.target_index = target_index + self.target_data_stream = target_data_stream self.target_type = target_type def has_compressed_corpus(self): @@ -170,16 +212,16 @@ def __repr__(self): def __hash__(self): return hash(self.source_format) ^ hash(self.document_file) ^ hash(self.document_archive) ^ hash(self.base_url) ^ \ hash(self.includes_action_and_meta_data) ^ hash(self.number_of_documents) ^ hash(self.compressed_size_in_bytes) ^ \ - hash(self.uncompressed_size_in_bytes) ^ hash(self.target_index) ^ hash(self.target_type) + hash(self.uncompressed_size_in_bytes) ^ hash(self.target_index) ^ hash(self.target_data_stream) ^ hash(self.target_type) def __eq__(self, othr): return (isinstance(othr, type(self)) and (self.source_format, self.document_file, self.document_archive, self.base_url, self.includes_action_and_meta_data, self.number_of_documents, self.compressed_size_in_bytes, self.uncompressed_size_in_bytes, - self.target_type, self.target_type) == + self.target_type, self.target_data_stream, self.target_type) == (othr.source_format, othr.document_file, othr.document_archive, othr.base_url, othr.includes_action_and_meta_data, othr.number_of_documents, othr.compressed_size_in_bytes, othr.uncompressed_size_in_bytes, - othr.target_type, othr.target_type)) + othr.target_type, othr.target_data_stream, othr.target_type)) class DocumentCorpus: @@ -219,7 +261,7 @@ def uncompressed_size_in_bytes(self, source_format): return None return num - def filter(self, source_format=None, target_indices=None): + def filter(self, source_format=None, target_indices=None, target_data_streams=None): filtered = [] for d in self.documents: # skip if source format or target index does not match @@ -227,6 +269,8 @@ def filter(self, source_format=None, target_indices=None): continue if target_indices and d.target_index not in target_indices: continue + if target_data_streams and d.target_data_stream not in target_data_streams: + continue filtered.append(d) return DocumentCorpus(self.name, filtered) @@ -262,7 +306,7 @@ class Track: A track defines the data set that is used. It corresponds loosely to a use case (e.g. logging, event processing, analytics, ...) """ - def __init__(self, name, description=None, meta_data=None, challenges=None, indices=None, templates=None, corpora=None, + def __init__(self, name, description=None, meta_data=None, challenges=None, indices=None, data_streams=None, templates=None, corpora=None, has_plugins=False): """ @@ -274,6 +318,7 @@ def __init__(self, name, description=None, meta_data=None, challenges=None, indi :param challenges: A list of one or more challenges to use. Precondition: If the list is non-empty it contains exactly one element with its ``default`` property set to ``True``. :param indices: A list of indices for this track. May be None. + :param data_streams: A list of data streams for this track. May be None. :param templates: A list of index templates for this track. May be None. :param corpora: A list of document corpus definitions for this track. May be None. :param has_plugins: True iff the track also defines plugins (e.g. custom runners or parameter sources). @@ -283,6 +328,7 @@ def __init__(self, name, description=None, meta_data=None, challenges=None, indi self.description = description if description is not None else "" self.challenges = challenges if challenges else [] self.indices = indices if indices else [] + self.data_streams = data_streams if data_streams else [] self.corpora = corpora if corpora else [] self.templates = templates if templates else [] self.has_plugins = has_plugins @@ -354,12 +400,12 @@ def __repr__(self): def __hash__(self): return hash(self.name) ^ hash(self.meta_data) ^ hash(self.description) ^ hash(self.challenges) ^ \ - hash(self.indices) ^ hash(self.templates) ^ hash(self.corpora) + hash(self.indices) ^ hash(self.data_streams) ^ hash(self.templates) ^ hash(self.corpora) def __eq__(self, othr): return (isinstance(othr, type(self)) and - (self.name, self.meta_data, self.description, self.challenges, self.indices, self.templates, self.corpora) == - (othr.name, othr.meta_data, othr.description, othr.challenges, othr.indices, othr.templates, othr.corpora)) + (self.name, self.meta_data, self.description, self.challenges, self.indices, self.data_streams, self.templates, self.corpora) == + (othr.name, othr.meta_data, othr.description, othr.challenges, othr.indices, othr.data_streams, othr.templates, othr.corpora)) class Challenge: @@ -449,6 +495,8 @@ class OperationType(Enum): StartTransform = 1025 WaitForTransform = 1026 DeleteTransform = 1027 + CreateDataStream = 1028 + DeleteDataStream = 1029 @property def admin_op(self): @@ -526,6 +574,10 @@ def from_hyphenated_string(cls, v): return OperationType.WaitForTransform elif v == "delete-transform": return OperationType.DeleteTransform + elif v == "create-data-stream": + return OperationType.CreateDataStream + elif v == "delete-data-stream": + return OperationType.DeleteDataStream else: raise KeyError("No enum value for [%s]" % v) From 2dd70b60900fb6efd44575e4e6dbcdc7e33d3d3b Mon Sep 17 00:00:00 2001 From: Dale McDiarmid Date: Thu, 15 Oct 2020 18:07:55 +0100 Subject: [PATCH 2/9] Loader tests --- esrally/track/loader.py | 27 +++++++-- esrally/track/params.py | 4 +- tests/track/loader_test.py | 117 ++++++++++++++++++++++++++++++++++++- 3 files changed, 141 insertions(+), 7 deletions(-) diff --git a/esrally/track/loader.py b/esrally/track/loader.py index 074385171..73a7ccda0 100644 --- a/esrally/track/loader.py +++ b/esrally/track/loader.py @@ -991,7 +991,7 @@ def __call__(self, track_name, track_specification, mapping_dir): for idx in self._r(track_specification, "data-streams", mandatory=False, default_value=[])] templates = [self._create_index_template(tpl, mapping_dir) for tpl in self._r(track_specification, "templates", mandatory=False, default_value=[])] - corpora = self._create_corpora(self._r(track_specification, "corpora", mandatory=False, default_value=[]), indices) + corpora = self._create_corpora(self._r(track_specification, "corpora", mandatory=False, default_value=[]), indices, data_streams) challenges = self._create_challenges(track_specification) # at this point, *all* track params must have been referenced in the templates return track.Track(name=self.name, meta_data=meta_data, description=description, challenges=challenges, @@ -1061,7 +1061,7 @@ def _load_template(self, contents, description): self.logger.exception("Could not load file template for %s.", description) raise TrackSyntaxError("Could not load file template for '%s'" % description, str(e)) - def _create_corpora(self, corpora_specs, indices): + def _create_corpora(self, corpora_specs, indices, data_streams): document_corpora = [] known_corpora_names = set() for corpus_spec in corpora_specs: @@ -1082,6 +1082,11 @@ def _create_corpora(self, corpora_specs, indices): else: corpus_target_idx = self._r(corpus_spec, "target-index", mandatory=False) + if len(data_streams) == 1: + corpus_target_ds = self._r(corpus_spec, "target-data-stream", mandatory=False, default_value=data_streams[0].name) + else: + corpus_target_ds = self._r(corpus_spec, "target-data-stream", mandatory=False) + if len(indices) == 1 and len(indices[0].types) == 1: corpus_target_type = self._r(corpus_spec, "target-type", mandatory=False, default_value=indices[0].types[0]) else: @@ -1110,10 +1115,21 @@ def _create_corpora(self, corpora_specs, indices): target_type = None else: # we need an index if no meta-data are present. - target_idx = self._r(doc_spec, "target-index", mandatory=corpus_target_idx is None, - default_value=corpus_target_idx, error_ctx=docs) + target_idx = self._r(doc_spec, "target-index", + mandatory=corpus_target_idx is None and corpus_target_ds is None, + error_ctx=docs) target_type = self._r(doc_spec, "target-type", mandatory=False, default_value=corpus_target_type, error_ctx=docs) + # not mandatory as we defn have corpus_target_idx or corpus_target_ds here + target_ds = self._r(doc_spec, "target-data-stream", mandatory=False, + error_ctx=docs) + # here we choose to use either an index or data streams. If either are explicitly specified + # (index takes precedence) this is preferred over any defaults + if not target_idx and not target_ds: + if corpus_target_idx: + target_idx = corpus_target_idx + else: + target_ds = corpus_target_ds docs = track.Documents(source_format=source_format, document_file=document_file, @@ -1123,7 +1139,8 @@ def _create_corpora(self, corpora_specs, indices): number_of_documents=num_docs, compressed_size_in_bytes=compressed_bytes, uncompressed_size_in_bytes=uncompressed_bytes, - target_index=target_idx, target_type=target_type) + target_index=target_idx, target_type=target_type, + target_data_stream=target_ds) corpus.documents.append(docs) else: self._error("Unknown source-format [%s] in document corpus [%s]." % (source_format, name)) diff --git a/esrally/track/params.py b/esrally/track/params.py index bfad41b8f..143a4ba79 100644 --- a/esrally/track/params.py +++ b/esrally/track/params.py @@ -585,7 +585,9 @@ def used_corpora(self, t, params): for corpus in t.corpora: if corpus.name in corpora_names: - filtered_corpus = corpus.filter(source_format=track.Documents.SOURCE_FORMAT_BULK, target_indices=params.get("indices")) + filtered_corpus = corpus.filter(source_format=track.Documents.SOURCE_FORMAT_BULK, + target_indices=params.get("indices"), + target_data_streams=params.get("data-streams")) if filtered_corpus.number_of_documents(source_format=track.Documents.SOURCE_FORMAT_BULK) > 0: corpora.append(filtered_corpus) diff --git a/tests/track/loader_test.py b/tests/track/loader_test.py index 1b59bf025..c3213a231 100644 --- a/tests/track/loader_test.py +++ b/tests/track/loader_test.py @@ -1453,6 +1453,7 @@ def test_can_read_track_info(self): track_specification = { "description": "description for unit test", "indices": [{"name": "test-index", "types": ["test-type"]}], + "data-streams": [], "corpora": [], "operations": [], "challenges": [] @@ -1492,6 +1493,11 @@ def test_parse_with_mixed_warmup_iterations_and_measurement(self, mocked_params_ "types": [ "docs" ] } ], + "data-streams": [ + { + "name": "test-data-stream" + } + ], "corpora": [ { "name": "test", @@ -1826,6 +1832,11 @@ def test_parse_valid_track_specification(self): "types": ["main", "secondary"] } ], + "data-streams": [ + { + "name": "historical-data-stream" + } + ], "corpora": [ { "name": "test", @@ -1845,6 +1856,13 @@ def test_parse_valid_track_specification(self): "document-count": 20, "compressed-bytes": 200, "uncompressed-bytes": 20000 + }, + { + "source-file": "documents-main.json.bz2", + "document-count": 10, + "compressed-bytes": 100, + "uncompressed-bytes": 10000, + "target-data-stream": "historical-data-stream" } ] } @@ -1929,10 +1947,12 @@ def test_parse_valid_track_specification(self): self.assertEqual(2, len(resulting_track.indices[0].types)) self.assertEqual("main", resulting_track.indices[0].types[0]) self.assertEqual("secondary", resulting_track.indices[0].types[1]) + # data streams + self.assertEqual("historical-data-stream", resulting_track.data_streams[0].name) # corpora self.assertEqual(1, len(resulting_track.corpora)) self.assertEqual("test", resulting_track.corpora[0].name) - self.assertEqual(2, len(resulting_track.corpora[0].documents)) + self.assertEqual(3, len(resulting_track.corpora[0].documents)) docs_primary = resulting_track.corpora[0].documents[0] self.assertEqual(track.Documents.SOURCE_FORMAT_BULK, docs_primary.source_format) @@ -1959,6 +1979,18 @@ def test_parse_valid_track_specification(self): self.assertIsNone(docs_secondary.target_index) self.assertIsNone(docs_secondary.target_type) + docs_tertiary = resulting_track.corpora[0].documents[2] + self.assertEqual(track.Documents.SOURCE_FORMAT_BULK, docs_tertiary.source_format) + self.assertEqual("documents-main.json", docs_tertiary.document_file) + self.assertEqual("documents-main.json.bz2", docs_tertiary.document_archive) + self.assertEqual("https://localhost/data", docs_tertiary.base_url) + self.assertFalse(docs_tertiary.includes_action_and_meta_data) + self.assertEqual(10, docs_tertiary.number_of_documents) + self.assertEqual(100, docs_tertiary.compressed_size_in_bytes) + self.assertIsNone(docs_tertiary.target_index) + self.assertIsNone(docs_tertiary.target_type) + self.assertEqual("historical-data-stream", docs_tertiary.target_data_stream) + # challenges self.assertEqual(1, len(resulting_track.challenges)) self.assertEqual("default-challenge", resulting_track.challenges[0].name) @@ -1979,6 +2011,11 @@ def test_parse_valid_without_types(self, mocked_param_checker): # no type information here } ], + "data-streams": [ + { + "name": "historical-data-stream" + } + ], "corpora": [ { "name": "test", @@ -2021,12 +2058,15 @@ def test_parse_valid_without_types(self, mocked_param_checker): # indices self.assertEqual(1, len(resulting_track.indices)) self.assertEqual("index-historical", resulting_track.indices[0].name) + self.assertEqual("historical-data-stream", resulting_track.data_streams[0].name) self.assertDictEqual({ "settings": { "number_of_shards": 3 } }, resulting_track.indices[0].body) self.assertEqual(0, len(resulting_track.indices[0].types)) + # data streams + self.assertEqual(1, len(resulting_track.data_streams)) # corpora self.assertEqual(1, len(resulting_track.corpora)) self.assertEqual("test", resulting_track.corpora[0].name) @@ -2043,6 +2083,81 @@ def test_parse_valid_without_types(self, mocked_param_checker): self.assertEqual(10000, docs_primary.uncompressed_size_in_bytes) self.assertEqual("index-historical", docs_primary.target_index) self.assertIsNone(docs_primary.target_type) + self.assertIsNone(docs_primary.target_data_stream) + + # challenges + self.assertEqual(1, len(resulting_track.challenges)) + + @mock.patch("esrally.track.loader.register_all_params_in_track") + def test_parse_valid_without_indices(self, mocked_param_checker): + track_specification = { + "description": "description for unit test", + "data-streams": [ + { + "name": "historical-data-stream" + } + ], + "corpora": [ + { + "name": "test", + "base-url": "https://localhost/data", + "documents": [ + { + "source-file": "documents-main.json.bz2", + "document-count": 10, + "compressed-bytes": 100, + "uncompressed-bytes": 10000, + }, + ] + } + ], + "schedule": [ + { + "clients": 8, + "operation": { + "name": "index-append", + "operation-type": "bulk", + "bulk-size": 5000 + } + } + ] + } + reader = loader.TrackSpecificationReader( + track_params={"number_of_shards": 3}, + source=io.DictStringFileSourceFactory({ + "/mappings/body.json": [""" + { + "settings": { + "number_of_shards": {{ number_of_shards }} + } + } + """] + })) + resulting_track = reader("unittest", track_specification, "/mappings") + self.assertEqual("unittest", resulting_track.name) + self.assertEqual("description for unit test", resulting_track.description) + # indices + self.assertEqual(0, len(resulting_track.indices)) + # data streams + self.assertEqual(1, len(resulting_track.data_streams)) + self.assertEqual("historical-data-stream", resulting_track.data_streams[0].name) + # corpora + self.assertEqual(1, len(resulting_track.corpora)) + self.assertEqual("test", resulting_track.corpora[0].name) + self.assertEqual(1, len(resulting_track.corpora[0].documents)) + + docs_primary = resulting_track.corpora[0].documents[0] + self.assertEqual(track.Documents.SOURCE_FORMAT_BULK, docs_primary.source_format) + self.assertEqual("documents-main.json", docs_primary.document_file) + self.assertEqual("documents-main.json.bz2", docs_primary.document_archive) + self.assertEqual("https://localhost/data", docs_primary.base_url) + self.assertFalse(docs_primary.includes_action_and_meta_data) + self.assertEqual(10, docs_primary.number_of_documents) + self.assertEqual(100, docs_primary.compressed_size_in_bytes) + self.assertEqual(10000, docs_primary.uncompressed_size_in_bytes) + self.assertEqual("historical-data-stream", docs_primary.target_data_stream) + self.assertIsNone(docs_primary.target_type) + self.assertIsNone(docs_primary.target_index) # challenges self.assertEqual(1, len(resulting_track.challenges)) From 74f10b4a3bc2b6760dafb6222982cb5bb3118a8c Mon Sep 17 00:00:00 2001 From: Dale McDiarmid Date: Thu, 15 Oct 2020 18:51:48 +0100 Subject: [PATCH 3/9] Track and runner tests --- esrally/driver/runner.py | 2 +- tests/driver/runner_test.py | 93 +++++++++++++++++++++++++++++++++++++ tests/track/track_test.py | 31 +++++++++++++ 3 files changed, 125 insertions(+), 1 deletion(-) diff --git a/esrally/driver/runner.py b/esrally/driver/runner.py index 28bc0919d..9cf8ad7b0 100644 --- a/esrally/driver/runner.py +++ b/esrally/driver/runner.py @@ -1094,7 +1094,7 @@ async def __call__(self, es, params): ops += 1 elif only_if_exists and await es.indices.exists(index=data_stream): self.logger.info("Data stream [%s] already exists. Deleting it.", data_stream) - await es.indices.delete_data_stream(data_stream, ignore=[404]) + await es.indices.delete_data_stream(data_stream, params=request_params) ops += 1 return ops, "ops" diff --git a/tests/driver/runner_test.py b/tests/driver/runner_test.py index f34e85ed6..a1fca526c 100644 --- a/tests/driver/runner_test.py +++ b/tests/driver/runner_test.py @@ -2115,6 +2115,52 @@ async def test_param_indices_mandatory(self, es): self.assertEqual(0, es.indices.create.call_count) +class CreateDataStreamRunnerTests(TestCase): + @mock.patch("elasticsearch.Elasticsearch") + @run_async + async def test_creates_multiple_data_streams(self, es): + es.indices.create_data_stream.return_value = as_future() + + r = runner.CreateDataStream() + + request_params = { + "wait_for_active_shards": "true" + } + + params = { + "data-streams": [ + "data-stream-A", + "data-stream-B" + ], + "request-params": request_params + } + + result = await r(es, params) + + self.assertEqual((2, "ops"), result) + + es.indices.create_data_stream.assert_has_calls([ + mock.call("data-stream-A", params=request_params), + mock.call("data-stream-B", params=request_params) + ]) + + + @mock.patch("elasticsearch.Elasticsearch") + @run_async + async def test_param_data_streams_mandatory(self, es): + es.indices.create_data_stream.return_value = as_future() + + r = runner.CreateDataStream() + + params = {} + with self.assertRaisesRegex(exceptions.DataError, + "Parameter source for operation 'create-data-stream' did not provide the mandatory parameter 'data-streams'. " + "Please add it to your parameter source."): + await r(es, params) + + self.assertEqual(0, es.indices.create_data_stream.call_count) + + class DeleteIndexRunnerTests(TestCase): @mock.patch("elasticsearch.Elasticsearch") @run_async @@ -2162,6 +2208,53 @@ async def test_deletes_all_indices(self, es): self.assertEqual(0, es.indices.exists.call_count) +class DeleteDataStreamRunnerTests(TestCase): + @mock.patch("elasticsearch.Elasticsearch") + @run_async + async def test_deletes_existing_data_streams(self, es): + es.indices.exists.side_effect = [as_future(False), as_future(True)] + es.indices.delete_data_stream.return_value = as_future() + + r = runner.DeleteDataStream() + + params = { + "data-streams": ["data-stream-A", "data-stream-B"], + "only-if-exists": True + } + + result = await r(es, params) + + self.assertEqual((1, "ops"), result) + + es.indices.delete_data_stream.assert_called_once_with("data-stream-B", params={}) + + @mock.patch("elasticsearch.Elasticsearch") + @run_async + async def test_deletes_all_data_streams(self, es): + es.indices.delete_data_stream.return_value = as_future() + + r = runner.DeleteDataStream() + + params = { + "data-streams": ["data-stream-A", "data-stream-B"], + "only-if-exists": False, + "request-params": { + "ignore_unavailable": "true", + "expand_wildcards": "none" + } + } + + result = await r(es, params) + + self.assertEqual((2, "ops"), result) + + es.indices.delete_data_stream.assert_has_calls([ + mock.call("data-stream-A", ignore=[404], params=params["request-params"]), + mock.call("data-stream-B", ignore=[404], params=params["request-params"]) + ]) + self.assertEqual(0, es.indices.exists.call_count) + + class CreateIndexTemplateRunnerTests(TestCase): @mock.patch("elasticsearch.Elasticsearch") @run_async diff --git a/tests/track/track_test.py b/tests/track/track_test.py index 6b768efc8..14d0e2950 100644 --- a/tests/track/track_test.py +++ b/tests/track/track_test.py @@ -86,6 +86,22 @@ def test_str(self): self.assertEqual("test", str(track.Index("test"))) +class DataStreamTests(TestCase): + def test_matches_exactly(self): + self.assertTrue(track.DataStream("test").matches("test")) + self.assertFalse(track.DataStream("test").matches(" test")) + + def test_matches_if_no_pattern_is_defined(self): + self.assertTrue(track.DataStream("test").matches(pattern=None)) + + def test_matches_if_catch_all_pattern_is_defined(self): + self.assertTrue(track.DataStream("test").matches(pattern="*")) + self.assertTrue(track.DataStream("test").matches(pattern="_all")) + + def test_str(self): + self.assertEqual("test", str(track.DataStream("test"))) + + class DocumentCorpusTests(TestCase): def test_do_not_filter(self): corpus = track.DocumentCorpus("test", documents=[ @@ -129,6 +145,21 @@ def test_filter_documents_by_indices(self): self.assertEqual(1, len(filtered_corpus.documents)) self.assertEqual("logs-02", filtered_corpus.documents[0].target_index) + def test_filter_documents_by_data_streams(self): + corpus = track.DocumentCorpus("test", documents=[ + track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, number_of_documents=5, + target_data_stream="logs-01"), + track.Documents(source_format="other", number_of_documents=6, target_data_stream="logs-02"), + track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, number_of_documents=7, + target_data_stream="logs-03"), + track.Documents(source_format=None, number_of_documents=8, target_data_stream=None) + ]) + + filtered_corpus = corpus.filter(target_data_streams=["logs-02"]) + self.assertEqual("test", filtered_corpus.name) + self.assertEqual(1, len(filtered_corpus.documents)) + self.assertEqual("logs-02", filtered_corpus.documents[0].target_data_stream) + def test_filter_documents_by_format_and_indices(self): corpus = track.DocumentCorpus("test", documents=[ track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, number_of_documents=5, target_index="logs-01"), From 8bd15ef696cd0fb86c0defb008a85ace2b030f99 Mon Sep 17 00:00:00 2001 From: Dale McDiarmid Date: Fri, 16 Oct 2020 12:50:30 +0100 Subject: [PATCH 4/9] More tests --- esrally/track/params.py | 26 +++-- tests/track/params_test.py | 211 ++++++++++++++++++++++++++++++++++++- 2 files changed, 228 insertions(+), 9 deletions(-) diff --git a/esrally/track/params.py b/esrally/track/params.py index 143a4ba79..1d32b8f43 100644 --- a/esrally/track/params.py +++ b/esrally/track/params.py @@ -229,7 +229,7 @@ def params(self): # ensure we pass all parameters... p.update(self._params) p.update({ - "data_streams": self.data_stream_definitions, + "data-streams": self.data_stream_definitions, "request-params": self.request_params }) return p @@ -259,7 +259,7 @@ def params(self): # ensure we pass all parameters... p.update(self._params) p.update({ - "data_streams": self.data_stream_definitions, + "data-streams": self.data_stream_definitions, "request-params": self.request_params, "only-if-exists": self.only_if_exists }) @@ -412,7 +412,7 @@ def __init__(self, track, params, **kwargs): default_index = track.data_streams[0].name else: default_index = None - + # indexes are preferred by data streams can also be queried the same way index_name = params.get("index") if not index_name: index_name = params.get("data-stream", default_index) @@ -435,7 +435,7 @@ def __init__(self, track, params, **kwargs): } if not index_name: - raise exceptions.InvalidSyntax("'index' is mandatory and is missing for operation '{}'".format(kwargs.get("operation_name"))) + raise exceptions.InvalidSyntax("'index' or 'data-stream' is mandatory and is missing for operation '{}'".format(kwargs.get("operation_name"))) if pages: self.query_params["pages"] = pages @@ -511,6 +511,9 @@ def __init__(self, track, params, **kwargs): else: raise exceptions.InvalidSyntax("Unknown 'conflicts' setting [%s]" % id_conflicts) + if params.get("data-streams", None) and self.id_conflicts != IndexIdConflict.NoConflicts: + raise exceptions.InvalidSyntax("'conflicts' cannot be used with 'data-streams'") + if self.id_conflicts != IndexIdConflict.NoConflicts: self.conflict_probability = self.float_param(params, name="conflict-probability", default_value=25, min_value=0, max_value=100, min_operator=operator.lt) @@ -683,13 +686,16 @@ def percent_completed(self): class ForceMergeParamSource(ParamSource): def __init__(self, track, params, **kwargs): super().__init__(track, params, **kwargs) - if len(track.indices) > 0: - # force merge data streams and indices + if len(track.indices) > 0 or len(track.data_streams) > 0: + # force merge data streams and indices - API call is the same so treat as indexes default_index = ','.join(map(str, track.indices + track.data_streams)) else: default_index = "_all" - self._index_name = params.get("index", default_index) + self._index_name = params.get("index") + if not self._index_name: + self._index_name = params.get("data-stream", default_index) + self._max_num_segments = params.get("max-num-segments") self._request_timeout = params.get("request-timeout") self._poll_period = params.get("poll-period", 10) @@ -757,6 +763,9 @@ def create_default_reader(docs, offset, num_lines, num_docs, batch_size, bulk_si elif docs.target_data_stream: target = docs.target_data_stream use_create = True + if id_conflicts != IndexIdConflict.NoConflicts: + # can only create docs in data streams + raise exceptions.RallyError("Do not create readers for docs targeting data streams with conflicts") if docs.includes_action_and_meta_data: return SourceOnlyIndexDataReader(docs.document_file, batch_size, bulk_size, source, target, docs.target_type) @@ -886,7 +895,8 @@ def __init__(self, index_name, type_name, conflicting_ids=None, conflict_probabi self.meta_data_update_with_id = '{"update": {"_index": "%s", "_id": "%s"}}\n' % (index_name, "%s") self.meta_data_index_no_id = '{"index": {"_index": "%s"}}\n' % index_name self.meta_data_create_no_id = '{"create": {"_index": "%s"}}\n' % index_name - + if use_create and conflicting_ids: + raise exceptions.RallyError("'use_create' be True with 'conflicting_ids'") self.conflicting_ids = conflicting_ids self.on_conflict = on_conflict self.use_create = use_create diff --git a/tests/track/params_test.py b/tests/track/params_test.py index b52c5a165..10832cefd 100644 --- a/tests/track/params_test.py +++ b/tests/track/params_test.py @@ -143,6 +143,16 @@ def test_generate_action_meta_data_without_id_conflicts(self): self.assertEqual(("index", '{"index": {"_index": "test_index", "_type": "test_type"}}\n'), next(params.GenerateActionMetaData("test_index", "test_type"))) + def test_generate_action_meta_data_create(self): + self.assertEqual(("create", '{"create": {"_index": "test_index"}}\n'), + next(params.GenerateActionMetaData("test_index", None, use_create=True))) + + def test_generate_action_meta_data_create_with_conflicts(self): + with self.assertRaises(exceptions.RallyError) as ctx: + params.GenerateActionMetaData("test_index", None, conflicting_ids=[100, 200, 300, 400], use_create=True) + self.assertEqual("'use_create' be True with 'conflicting_ids'", + ctx.exception.args[0]) + def test_generate_action_meta_data_typeless(self): self.assertEqual(("index", '{"index": {"_index": "test_index"}}\n'), next(params.GenerateActionMetaData("test_index", type_name=None))) @@ -814,6 +824,15 @@ def test_create_with_unknown_on_conflict_setting(self): self.assertEqual("Unknown 'on-conflict' setting [delete]", ctx.exception.args[0]) + def test_create_with_conflicts_and_data_streams(self): + with self.assertRaises(exceptions.InvalidSyntax) as ctx: + params.BulkIndexParamSource(track=track.Track(name="unit-test"), params={ + "data-streams": ["test-data-stream-1", "test-data-stream-2"], + "conflicts": "sequential" + }) + + self.assertEqual("'conflicts' cannot be used with 'data-streams'", ctx.exception.args[0]) + def test_create_with_ingest_percentage_too_low(self): corpus = track.DocumentCorpus(name="default", documents=[ track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, @@ -939,6 +958,41 @@ def test_filters_corpora(self): partition = source.partition(0, 1) self.assertEqual(partition.corpora, [corpora[1]]) + def test_filters_corpora_by_data_stream(self): + corpora = [ + track.DocumentCorpus(name="default", documents=[ + track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + number_of_documents=10, + target_data_stream="test-data-stream-1" + ) + ]), + track.DocumentCorpus(name="special", documents=[ + track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + number_of_documents=100, + target_index="test-idx2", + target_type="type" + ) + ]), + track.DocumentCorpus(name="special-2", documents=[ + track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + number_of_documents=10, + target_data_stream="test-data-stream-2" + ) + ]) + ] + + source = params.BulkIndexParamSource( + track=track.Track(name="unit-test", corpora=corpora), + params={ + "data-streams": ["test-data-stream-1", "test-data-stream-2"], + "bulk-size": 5000, + "batch-size": 20000, + "pipeline": "test-pipeline" + }) + + partition = source.partition(0, 1) + self.assertEqual(partition.corpora, [corpora[0],corpora[2]]) + def test_raises_exception_if_no_corpus_matches(self): corpus = track.DocumentCorpus(name="default", documents=[ track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, @@ -1499,6 +1553,46 @@ def test_filter_index(self): self.assertEqual("index2", index) +class CreateDataStreamParamSourceTests(TestCase): + def test_create_data_stream(self): + source = params.CreateDataStreamParamSource(track.Track(name="unit-test"), params={ + "data-stream": "test-data-stream" + }) + p = source.params() + self.assertEqual(1, len(p["data-streams"])) + ds = p["data-streams"][0] + self.assertEqual("test-data-stream", ds) + self.assertEqual({}, p["request-params"]) + + def test_create_data_stream_inline_without_body(self): + source = params.CreateDataStreamParamSource(track.Track(name="unit-test"), params={ + "data-stream": "test-data-stream", + "request-params": { + "wait_for_active_shards": True + } + }) + + p = source.params() + self.assertEqual(1, len(p["data-streams"])) + ds = p["data-streams"][0] + self.assertEqual("test-data-stream", ds) + self.assertDictEqual({ + "wait_for_active_shards": True + }, p["request-params"]) + + def test_filter_data_stream(self): + source = params.CreateDataStreamParamSource(track.Track(name="unit-test", data_streams=[track.DataStream(name="data-stream-1"), + track.DataStream(name="data-stream-2"), + track.DataStream(name="data-stream-3")]), + params={ "data-stream": "data-stream-2"}) + + p = source.params() + self.assertEqual(1, len(p["data-streams"])) + + ds = p["data-streams"][0] + self.assertEqual("data-stream-2", ds) + + class DeleteIndexParamSourceTests(TestCase): def test_delete_index_from_track(self): source = params.DeleteIndexParamSource(track.Track(name="unit-test", indices=[ @@ -1539,6 +1633,46 @@ def test_delete_no_index(self): self.assertEqual("delete-index operation targets no index", ctx.exception.args[0]) +class DeleteDataStreamParamSourceTests(TestCase): + def test_delete_data_stream_from_track(self): + source = params.DeleteDataStreamParamSource(track.Track(name="unit-test", data_streams=[ + track.DataStream(name="data-stream-1"), + track.DataStream(name="data-stream-2"), + track.DataStream(name="data-stream-3") + ]), params={}) + + p = source.params() + + self.assertEqual(["data-stream-1", "data-stream-2", "data-stream-3"], p["data-streams"]) + self.assertDictEqual({}, p["request-params"]) + self.assertTrue(p["only-if-exists"]) + + def test_filter_data_stream_from_track(self): + source = params.DeleteDataStreamParamSource(track.Track(name="unit-test", data_streams=[ + track.DataStream(name="data-stream-1"), + track.DataStream(name="data-stream-2"), + track.DataStream(name="data-stream-3") + ]), params={"data-stream": "data-stream-2", "only-if-exists": False, "request-params": {"allow_no_indices": True}}) + + p = source.params() + + self.assertEqual(["data-stream-2"], p["data-streams"]) + self.assertDictEqual({"allow_no_indices": True}, p["request-params"]) + self.assertFalse(p["only-if-exists"]) + + def test_delete_data_stream_by_name(self): + source = params.DeleteDataStreamParamSource(track.Track(name="unit-test"), params={"data-stream": "data-stream-2"}) + + p = source.params() + + self.assertEqual(["data-stream-2"], p["data-streams"]) + + def test_delete_no_data_stream(self): + with self.assertRaises(exceptions.InvalidSyntax) as ctx: + params.DeleteDataStreamParamSource(track.Track(name="unit-test"), params={}) + self.assertEqual("delete-data-stream operation targets no data stream", ctx.exception.args[0]) + + class CreateIndexTemplateParamSourceTests(TestCase): def test_create_index_template_inline(self): source = params.CreateIndexTemplateParamSource(track=track.Track(name="unit-test"), params={ @@ -1711,6 +1845,31 @@ def test_passes_cache(self): } }, p["body"]) + def test_uses_data_stream(self): + ds1 = track.DataStream(name="data-stream-1") + + source = params.SearchParamSource(track=track.Track(name="unit-test", data_streams=[ds1]), params={ + "body": { + "query": { + "match_all": {} + } + }, + "cache": True + }) + p = source.params() + + self.assertEqual(6, len(p)) + self.assertEqual("data-stream-1", p["index"]) + self.assertIsNone(p["type"]) + self.assertEqual({}, p["request-params"]) + self.assertEqual(True, p["cache"]) + self.assertEqual(True, p["response-compression-enabled"]) + self.assertEqual({ + "query": { + "match_all": {} + } + }, p["body"]) + def test_create_without_index(self): with self.assertRaises(exceptions.InvalidSyntax) as ctx: params.SearchParamSource(track=track.Track(name="unit-test"), params={ @@ -1722,7 +1881,7 @@ def test_create_without_index(self): } }, operation_name="test_operation") - self.assertEqual("'index' is mandatory and is missing for operation 'test_operation'", ctx.exception.args[0]) + self.assertEqual("'index' or 'data-stream' is mandatory and is missing for operation 'test_operation'", ctx.exception.args[0]) def test_passes_request_parameters(self): index1 = track.Index(name="index1", types=["type1"]) @@ -1782,6 +1941,34 @@ def test_user_specified_overrides_defaults(self): } }, p["body"]) + def test_user_specified_data_stream_overrides_defaults(self): + ds1 = track.DataStream(name="data-stream-1") + + source = params.SearchParamSource(track=track.Track(name="unit-test", data_streams=[ds1]), params={ + "data-stream": "data-stream-2", + "cache": False, + "response-compression-enabled": False, + "body": { + "query": { + "match_all": {} + } + } + }) + p = source.params() + + self.assertEqual(6, len(p)) + self.assertEqual("data-stream-2", p["index"]) + self.assertIsNone(p["type"]) + self.assertDictEqual({}, p["request-params"]) + # Explicitly check for equality to `False` - assertFalse would also succeed if it is `None`. + self.assertEqual(False, p["cache"]) + self.assertEqual(False, p["response-compression-enabled"]) + self.assertEqual({ + "query": { + "match_all": {} + } + }, p["body"]) + def test_replaces_body_params(self): import copy @@ -1825,6 +2012,18 @@ def test_force_merge_index_from_track(self): self.assertEqual("index1,index2,index3", p["index"]) self.assertEqual("blocking", p["mode"]) + def test_force_merge_data_stream_from_track(self): + source = params.ForceMergeParamSource(track.Track(name="unit-test", data_streams=[ + track.DataStream(name="data-stream-1"), + track.DataStream(name="data-stream-2"), + track.DataStream(name="data-stream-3") + ]), params={}) + + p = source.params() + + self.assertEqual("data-stream-1,data-stream-2,data-stream-3", p["index"]) + self.assertEqual("blocking", p["mode"]) + def test_force_merge_index_by_name(self): source = params.ForceMergeParamSource(track.Track(name="unit-test"), params={"index": "index2"}) @@ -1833,6 +2032,14 @@ def test_force_merge_index_by_name(self): self.assertEqual("index2", p["index"]) self.assertEqual("blocking", p["mode"]) + def test_force_merge_by_data_stream_name(self): + source = params.ForceMergeParamSource(track.Track(name="unit-test"), params={"data-stream": "data-stream-2"}) + + p = source.params() + + self.assertEqual("data-stream-2", p["index"]) + self.assertEqual("blocking", p["mode"]) + def test_default_force_merge_index(self): source = params.ForceMergeParamSource(track.Track(name="unit-test"), params={}) @@ -1854,3 +2061,5 @@ def test_force_merge_all_params(self): self.assertEqual(30, p["request-timeout"]) self.assertEqual(1, p["max-num-segments"]) self.assertEqual("polling", p["mode"]) + + From 107d3e534d19b12ee787eef8e708b310349766d1 Mon Sep 17 00:00:00 2001 From: Dale McDiarmid Date: Fri, 16 Oct 2020 13:16:07 +0100 Subject: [PATCH 5/9] Lint fixes --- esrally/track/loader.py | 23 +++++++++++++---------- esrally/track/params.py | 4 +++- esrally/track/track.py | 23 ++++++++++++++--------- tests/driver/runner_test.py | 4 ++-- tests/track/params_test.py | 2 -- 5 files changed, 32 insertions(+), 24 deletions(-) diff --git a/esrally/track/loader.py b/esrally/track/loader.py index 73a7ccda0..f2ab690e2 100644 --- a/esrally/track/loader.py +++ b/esrally/track/loader.py @@ -1114,22 +1114,25 @@ def _create_corpora(self, corpora_specs, indices, data_streams): target_idx = None target_type = None else: + target_ds = self._r(doc_spec, "target-data-stream", mandatory=False, + error_ctx=docs) + # we need an index if no meta-data are present. target_idx = self._r(doc_spec, "target-index", - mandatory=corpus_target_idx is None and corpus_target_ds is None, + mandatory=corpus_target_idx is None and corpus_target_ds is None + and target_ds is None, error_ctx=docs) target_type = self._r(doc_spec, "target-type", mandatory=False, default_value=corpus_target_type, error_ctx=docs) - # not mandatory as we defn have corpus_target_idx or corpus_target_ds here - target_ds = self._r(doc_spec, "target-data-stream", mandatory=False, - error_ctx=docs) + # here we choose to use either an index or data streams. If either are explicitly specified - # (index takes precedence) this is preferred over any defaults - if not target_idx and not target_ds: - if corpus_target_idx: - target_idx = corpus_target_idx - else: - target_ds = corpus_target_ds + # (index takes precedence) this is preferred over any defaults. Index then takes precedence. + if target_idx: + target_ds = None + elif target_ds is None and corpus_target_idx: + target_idx = corpus_target_idx + elif target_ds is None: + target_ds = corpus_target_ds docs = track.Documents(source_format=source_format, document_file=document_file, diff --git a/esrally/track/params.py b/esrally/track/params.py index 1d32b8f43..87bad0122 100644 --- a/esrally/track/params.py +++ b/esrally/track/params.py @@ -435,7 +435,9 @@ def __init__(self, track, params, **kwargs): } if not index_name: - raise exceptions.InvalidSyntax("'index' or 'data-stream' is mandatory and is missing for operation '{}'".format(kwargs.get("operation_name"))) + raise exceptions.InvalidSyntax( + "'index' or 'data-stream' is mandatory and is missing for operation '{}'".format( + kwargs.get("operation_name"))) if pages: self.query_params["pages"] = pages diff --git a/esrally/track/track.py b/esrally/track/track.py index 00f1165ec..5a6292710 100644 --- a/esrally/track/track.py +++ b/esrally/track/track.py @@ -147,8 +147,10 @@ def __eq__(self, other): class Documents: SOURCE_FORMAT_BULK = "bulk" - def __init__(self, source_format, document_file=None, document_archive=None, base_url=None, includes_action_and_meta_data=False, - number_of_documents=0, compressed_size_in_bytes=0, uncompressed_size_in_bytes=0, target_index=None, target_data_stream=None, target_type=None): + def __init__(self, source_format, document_file=None, document_archive=None, base_url=None, + includes_action_and_meta_data=False, + number_of_documents=0, compressed_size_in_bytes=0, uncompressed_size_in_bytes=0, target_index=None, + target_data_stream=None, target_type=None): """ :param source_format: The format of these documents. Mandatory. @@ -163,10 +165,11 @@ def __init__(self, source_format, document_file=None, document_archive=None, bas a document_archive is given. :param compressed_size_in_bytes: The compressed size in bytes of the benchmark document. Needed for verification of the download and user reporting. Only useful if a document_archive is given (optional but recommended to be set). - :param uncompressed_size_in_bytes: The size in bytes of the benchmark document after decompressing it. Only useful if a - document_archive is given (optional but recommended to be set). + :param uncompressed_size_in_bytes: The size in bytes of the benchmark document after decompressing it. + Only useful if a document_archive is given (optional but recommended to be set). :param target_index: The index to target for bulk operations. May be ``None`` if ``includes_action_and_meta_data`` is ``False``. - :param target_data_stream: The datastream to target for bulk operations. Maybe be ``None`` if ``includes_action_and_meta_data`` is ``False``. + :param target_data_stream: The data stream to target for bulk operations. + Maybe be ``None`` if ``includes_action_and_meta_data`` is ``False``. :param target_type: The document type to target for bulk operations. May be ``None`` if ``includes_action_and_meta_data`` is ``False``. """ @@ -306,8 +309,8 @@ class Track: A track defines the data set that is used. It corresponds loosely to a use case (e.g. logging, event processing, analytics, ...) """ - def __init__(self, name, description=None, meta_data=None, challenges=None, indices=None, data_streams=None, templates=None, corpora=None, - has_plugins=False): + def __init__(self, name, description=None, meta_data=None, challenges=None, indices=None, data_streams=None, + templates=None, corpora=None, has_plugins=False): """ Creates a new track. @@ -404,8 +407,10 @@ def __hash__(self): def __eq__(self, othr): return (isinstance(othr, type(self)) and - (self.name, self.meta_data, self.description, self.challenges, self.indices, self.data_streams, self.templates, self.corpora) == - (othr.name, othr.meta_data, othr.description, othr.challenges, othr.indices, othr.data_streams, othr.templates, othr.corpora)) + (self.name, self.meta_data, self.description, self.challenges, self.indices, self.data_streams, + self.templates, self.corpora) == + (othr.name, othr.meta_data, othr.description, othr.challenges, othr.indices, othr.data_streams, + othr.templates, othr.corpora)) class Challenge: diff --git a/tests/driver/runner_test.py b/tests/driver/runner_test.py index a1fca526c..049b6349b 100644 --- a/tests/driver/runner_test.py +++ b/tests/driver/runner_test.py @@ -2154,8 +2154,8 @@ async def test_param_data_streams_mandatory(self, es): params = {} with self.assertRaisesRegex(exceptions.DataError, - "Parameter source for operation 'create-data-stream' did not provide the mandatory parameter 'data-streams'. " - "Please add it to your parameter source."): + "Parameter source for operation 'create-data-stream' did not provide the " + "mandatory parameter 'data-streams'. Please add it to your parameter source."): await r(es, params) self.assertEqual(0, es.indices.create_data_stream.call_count) diff --git a/tests/track/params_test.py b/tests/track/params_test.py index 10832cefd..79f688241 100644 --- a/tests/track/params_test.py +++ b/tests/track/params_test.py @@ -2061,5 +2061,3 @@ def test_force_merge_all_params(self): self.assertEqual(30, p["request-timeout"]) self.assertEqual(1, p["max-num-segments"]) self.assertEqual("polling", p["mode"]) - - From 0e10cc22c020fcfc654d5b31930b0a41a166a56c Mon Sep 17 00:00:00 2001 From: Dale McDiarmid Date: Wed, 21 Oct 2020 12:03:17 +0100 Subject: [PATCH 6/9] Changes post review + don't allow indices + data-streams --- esrally/driver/runner.py | 6 +- esrally/resources/track-schema.json | 2 +- esrally/track/loader.py | 199 ++++++---- esrally/track/params.py | 36 +- tests/driver/runner_test.py | 3 +- tests/track/loader_test.py | 573 +++++++++++++++++++++------- tests/track/params_test.py | 195 ++++++---- 7 files changed, 711 insertions(+), 303 deletions(-) diff --git a/esrally/driver/runner.py b/esrally/driver/runner.py index 9cf8ad7b0..ca0315081 100644 --- a/esrally/driver/runner.py +++ b/esrally/driver/runner.py @@ -1040,7 +1040,7 @@ class CreateDataStream(Runner): async def __call__(self, es, params): data_streams = mandatory(params, "data-streams", self) - request_params = params.get("request-params", {}) + request_params = mandatory(params, "request-params", self) for data_stream in data_streams: await es.indices.create_data_stream(data_stream, params=request_params) return len(data_streams), "ops" @@ -1085,8 +1085,8 @@ async def __call__(self, es, params): ops = 0 data_streams = mandatory(params, "data-streams", self) - only_if_exists = params.get("only-if-exists", False) - request_params = params.get("request-params", {}) + only_if_exists = mandatory(params, "only-if-exists", self) + request_params = mandatory(params, "request-params", self) for data_stream in data_streams: if not only_if_exists: diff --git a/esrally/resources/track-schema.json b/esrally/resources/track-schema.json index ce016a649..ef2edf9da 100644 --- a/esrally/resources/track-schema.json +++ b/esrally/resources/track-schema.json @@ -267,7 +267,7 @@ "minItems": 1, "uniqueItems": true, "items": { - "title": "Index", + "title": "Data Stream", "type": "object", "properties": { "name": { diff --git a/esrally/track/loader.py b/esrally/track/loader.py index f2ab690e2..662937f28 100644 --- a/esrally/track/loader.py +++ b/esrally/track/loader.py @@ -301,7 +301,8 @@ def track_names(self): return [self.track_name] def track_dir(self, track_name): - assert track_name == self.track_name, "Expect provided track name [%s] to match [%s]" % (track_name, self.track_name) + assert track_name == self.track_name, "Expect provided track name [%s] to match [%s]" % ( + track_name, self.track_name) return self._track_dir def track_file(self, track_name): @@ -343,7 +344,8 @@ def prepare_track(t, cfg): test_mode = cfg.opts("track", "test.mode.enabled") for corpus in used_corpora(t, cfg): data_root = data_dir(cfg, t.name, corpus.name) - logger.info("Resolved data root directory for document corpus [%s] in track [%s] to %s.", corpus.name, t.name, data_root) + logger.info("Resolved data root directory for document corpus [%s] in track [%s] to %s.", corpus.name, t.name, + data_root) prep = DocumentSetPreparator(t.name, offline, test_mode) for document_set in corpus.documents: @@ -380,8 +382,9 @@ def decompress(self, archive_path, documents_path, uncompressed_size): io.decompress(archive_path, io.dirname(archive_path)) console.println("[OK]") if not os.path.isfile(documents_path): - raise exceptions.DataError("Decompressing [%s] did not create [%s]. Please check with the track author if the compressed " - "archive has been created correctly." % (archive_path, documents_path)) + raise exceptions.DataError( + "Decompressing [%s] did not create [%s]. Please check with the track author if the compressed " + "archive has been created correctly." % (archive_path, documents_path)) extracted_bytes = os.path.getsize(documents_path) if uncompressed_size is not None and extracted_bytes != uncompressed_size: @@ -395,7 +398,8 @@ def download(self, base_url, target_path, size_in_bytes, detail_on_missing_root_ raise exceptions.DataError("%s and it cannot be downloaded because no base URL is provided." % detail_on_missing_root_url) if self.offline: - raise exceptions.SystemSetupError("Cannot find %s. Please disable offline mode and retry again." % target_path) + raise exceptions.SystemSetupError( + "Cannot find %s. Please disable offline mode and retry again." % target_path) data_url = "%s/%s" % (base_url, file_name) try: @@ -413,8 +417,9 @@ def download(self, base_url, target_path, size_in_bytes, detail_on_missing_root_ self.logger.info("Downloaded data from [%s] to [%s].", data_url, target_path) except urllib.error.HTTPError as e: if e.code == 404 and self.test_mode: - raise exceptions.DataError("Track [%s] does not support test mode. Please ask the track author to add it or " - "disable test mode and retry." % self.track_name) + raise exceptions.DataError( + "Track [%s] does not support test mode. Please ask the track author to add it or " + "disable test mode and retry." % self.track_name) else: msg = "Could not download [%s] to [%s]" % (data_url, target_path) if e.reason: @@ -461,7 +466,8 @@ def prepare_document_set(self, document_set, data_root): :param data_root: The data root directory for this document set. """ doc_path = os.path.join(data_root, document_set.document_file) - archive_path = os.path.join(data_root, document_set.document_archive) if document_set.has_compressed_corpus() else None + archive_path = os.path.join(data_root, + document_set.document_archive) if document_set.has_compressed_corpus() else None while True: if self.is_locally_available(doc_path) and \ self.has_expected_size(doc_path, document_set.uncompressed_size_in_bytes): @@ -483,7 +489,8 @@ def prepare_document_set(self, document_set, data_root): # provide a specific error message in case there is no download URL if self.is_locally_available(target_path): # convert expected_size eagerly to a string as it might be None (but in that case we'll never see that error message) - msg = "%s is present but does not have the expected size of %s bytes" % (target_path, str(expected_size)) + msg = "%s is present but does not have the expected size of %s bytes" % ( + target_path, str(expected_size)) else: msg = "%s is missing" % target_path @@ -511,7 +518,8 @@ def prepare_bundled_document_set(self, document_set, data_root): :return: See postcondition. """ doc_path = os.path.join(data_root, document_set.document_file) - archive_path = os.path.join(data_root, document_set.document_archive) if document_set.has_compressed_corpus() else None + archive_path = os.path.join(data_root, + document_set.document_archive) if document_set.has_compressed_corpus() else None while True: if self.is_locally_available(doc_path): @@ -571,7 +579,8 @@ def replace_includes(self, base_path, track_fragment): for glob_pattern in match: full_glob_path = os.path.join(base_path, glob_pattern) sub_source = self.read_glob_files(full_glob_path) - repl[glob_pattern] = self.replace_includes(base_path=io.dirname(full_glob_path), track_fragment=sub_source) + repl[glob_pattern] = self.replace_includes(base_path=io.dirname(full_glob_path), + track_fragment=sub_source) def replstring(matchobj): # matchobj.groups() is a tuple and first element contains the matched group id @@ -683,7 +692,8 @@ def relative_glob(start, f): return render_template(loader=jinja2.FileSystemLoader(base_path), template_source=template_source.assembled_source, template_vars=template_vars, - template_internal_vars=default_internal_template_vars(glob_helper=lambda f: relative_glob(base_path, f))) + template_internal_vars=default_internal_template_vars( + glob_helper=lambda f: relative_glob(base_path, f))) def filter_tasks(t, filters, exclude=False): @@ -723,6 +733,7 @@ def filter_out_match(task, filters, exclude): return t + def filters_from_filtered_tasks(filtered_tasks): filters = [] if filtered_tasks: @@ -762,8 +773,9 @@ def post_process_for_test_mode(t): path, ext = io.splitext(document_set.document_file) document_set.document_file = "%s-1k%s" % (path, ext) else: - raise exceptions.RallyAssertionError("Document corpus [%s] has neither compressed nor uncompressed corpus." % - corpus.name) + raise exceptions.RallyAssertionError( + "Document corpus [%s] has neither compressed nor uncompressed corpus." % + corpus.name) # we don't want to check sizes document_set.compressed_size_in_bytes = None @@ -787,11 +799,13 @@ def post_process_for_test_mode(t): if leaf_task.warmup_time_period is not None and leaf_task.warmup_time_period > 0: leaf_task.warmup_time_period = 0 if logger.isEnabledFor(logging.DEBUG): - logger.debug("Resetting warmup time period for [%s] to [%d] seconds.", str(leaf_task), leaf_task.warmup_time_period) + logger.debug("Resetting warmup time period for [%s] to [%d] seconds.", str(leaf_task), + leaf_task.warmup_time_period) if leaf_task.time_period is not None and leaf_task.time_period > 10: leaf_task.time_period = 10 if logger.isEnabledFor(logging.DEBUG): - logger.debug("Resetting measurement time period for [%s] to [%d] seconds.", str(leaf_task), leaf_task.time_period) + logger.debug("Resetting measurement time period for [%s] to [%d] seconds.", str(leaf_task), + leaf_task.time_period) leaf_task.params.pop("target-throughput", None) leaf_task.params.pop("target-interval", None) @@ -851,7 +865,8 @@ def read(self, track_name, track_spec_file, mapping_dir): # involving lines numbers and it also does not bloat Rally's log file so much. tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json") try: - rendered = render_template_from_file(track_spec_file, self.track_params, complete_track_params=self.complete_track_params) + rendered = render_template_from_file(track_spec_file, self.track_params, + complete_track_params=self.complete_track_params) with open(tmp.name, "wt", encoding="utf-8") as f: f.write(rendered) self.logger.info("Final rendered track for '%s' has been written to '%s'.", track_spec_file, tmp.name) @@ -875,7 +890,8 @@ def read(self, track_name, track_spec_file, mapping_dir): raise TrackSyntaxError(msg) except Exception as e: self.logger.exception("Could not load [%s].", track_spec_file) - msg = "Could not load '{}'. The complete track has been written to '{}' for diagnosis.".format(track_spec_file, tmp.name) + msg = "Could not load '{}'. The complete track has been written to '{}' for diagnosis.".format( + track_spec_file, tmp.name) # Convert to string early on to avoid serialization errors with Jinja exceptions. raise TrackSyntaxError(msg, str(e)) # check the track version before even attempting to validate the JSON format to avoid bogus errors. @@ -883,21 +899,25 @@ def read(self, track_name, track_spec_file, mapping_dir): try: track_version = int(raw_version) except ValueError: - raise exceptions.InvalidSyntax("version identifier for track %s must be numeric but was [%s]" % (track_name, str(raw_version))) + raise exceptions.InvalidSyntax( + "version identifier for track %s must be numeric but was [%s]" % (track_name, str(raw_version))) if TrackFileReader.MINIMUM_SUPPORTED_TRACK_VERSION > track_version: - raise exceptions.RallyError("Track {} is on version {} but needs to be updated at least to version {} to work with the " - "current version of Rally.".format(track_name, track_version, - TrackFileReader.MINIMUM_SUPPORTED_TRACK_VERSION)) + raise exceptions.RallyError( + "Track {} is on version {} but needs to be updated at least to version {} to work with the " + "current version of Rally.".format(track_name, track_version, + TrackFileReader.MINIMUM_SUPPORTED_TRACK_VERSION)) if TrackFileReader.MAXIMUM_SUPPORTED_TRACK_VERSION < track_version: - raise exceptions.RallyError("Track {} requires a newer version of Rally. Please upgrade Rally (supported track version: {}, " - "required track version: {}).".format(track_name, TrackFileReader.MAXIMUM_SUPPORTED_TRACK_VERSION, - track_version)) + raise exceptions.RallyError( + "Track {} requires a newer version of Rally. Please upgrade Rally (supported track version: {}, " + "required track version: {}).".format(track_name, TrackFileReader.MAXIMUM_SUPPORTED_TRACK_VERSION, + track_version)) try: jsonschema.validate(track_spec, self.track_schema) except jsonschema.exceptions.ValidationError as ve: raise TrackSyntaxError( "Track '{}' is invalid.\n\nError details: {}\nInstance: {}\nPath: {}\nSchema path: {}".format( - track_name, ve.message, json.dumps(ve.instance, indent=4, sort_keys=True), ve.absolute_path, ve.absolute_schema_path)) + track_name, ve.message, json.dumps(ve.instance, indent=4, sort_keys=True), ve.absolute_path, + ve.absolute_schema_path)) current_track = self.read_track(track_name, track_spec, mapping_dir) @@ -988,10 +1008,14 @@ def __call__(self, track_name, track_specification, mapping_dir): indices = [self._create_index(idx, mapping_dir) for idx in self._r(track_specification, "indices", mandatory=False, default_value=[])] data_streams = [self._create_data_stream(idx) - for idx in self._r(track_specification, "data-streams", mandatory=False, default_value=[])] + for idx in self._r(track_specification, "data-streams", mandatory=False, default_value=[])] + if len(indices) > 0 and len(data_streams) > 0: + # we guard against this early and support either or + raise TrackSyntaxError("indices and data-streams cannot both be specified") templates = [self._create_index_template(tpl, mapping_dir) for tpl in self._r(track_specification, "templates", mandatory=False, default_value=[])] - corpora = self._create_corpora(self._r(track_specification, "corpora", mandatory=False, default_value=[]), indices, data_streams) + corpora = self._create_corpora(self._r(track_specification, "corpora", mandatory=False, default_value=[]), + indices, data_streams) challenges = self._create_challenges(track_specification) # at this point, *all* track params must have been referenced in the templates return track.Track(name=self.name, meta_data=meta_data, description=description, challenges=challenges, @@ -1031,7 +1055,8 @@ def _create_index(self, index_spec, mapping_dir): else: body = None - return track.Index(name=index_name, body=body, types=self._r(index_spec, "types", mandatory=False, default_value=[])) + return track.Index(name=index_name, body=body, + types=self._r(index_spec, "types", mandatory=False, default_value=[])) def _create_data_stream(self, data_stream_spec): return track.DataStream(name=self._r(data_stream_spec, "name")) @@ -1062,6 +1087,8 @@ def _load_template(self, contents, description): raise TrackSyntaxError("Could not load file template for '%s'" % description, str(e)) def _create_corpora(self, corpora_specs, indices, data_streams): + if len(indices) > 0 and len(data_streams) > 0: + raise TrackSyntaxError("indices and data-streams cannot both be specified") document_corpora = [] known_corpora_names = set() for corpus_spec in corpora_specs: @@ -1074,22 +1101,29 @@ def _create_corpora(self, corpora_specs, indices, data_streams): corpus = track.DocumentCorpus(name=name) # defaults on corpus level default_base_url = self._r(corpus_spec, "base-url", mandatory=False, default_value=None) - default_source_format = self._r(corpus_spec, "source-format", mandatory=False, default_value=track.Documents.SOURCE_FORMAT_BULK) - default_action_and_meta_data = self._r(corpus_spec, "includes-action-and-meta-data", mandatory=False, default_value=False) + default_source_format = self._r(corpus_spec, "source-format", mandatory=False, + default_value=track.Documents.SOURCE_FORMAT_BULK) + default_action_and_meta_data = self._r(corpus_spec, "includes-action-and-meta-data", mandatory=False, + default_value=False) + corpus_target_idx = None + corpus_target_ds = None + corpus_target_type = None if len(indices) == 1: corpus_target_idx = self._r(corpus_spec, "target-index", mandatory=False, default_value=indices[0].name) - else: + elif len(indices) > 0: corpus_target_idx = self._r(corpus_spec, "target-index", mandatory=False) if len(data_streams) == 1: - corpus_target_ds = self._r(corpus_spec, "target-data-stream", mandatory=False, default_value=data_streams[0].name) - else: + corpus_target_ds = self._r(corpus_spec, "target-data-stream", mandatory=False, + default_value=data_streams[0].name) + elif len(data_streams) > 0: corpus_target_ds = self._r(corpus_spec, "target-data-stream", mandatory=False) if len(indices) == 1 and len(indices[0].types) == 1: - corpus_target_type = self._r(corpus_spec, "target-type", mandatory=False, default_value=indices[0].types[0]) - else: + corpus_target_type = self._r(corpus_spec, "target-type", mandatory=False, + default_value=indices[0].types[0]) + elif len(indices) > 0: corpus_target_type = self._r(corpus_spec, "target-type", mandatory=False) for doc_spec in self._r(corpus_spec, "documents"): @@ -1113,26 +1147,36 @@ def _create_corpora(self, corpora_specs, indices, data_streams): if includes_action_and_meta_data: target_idx = None target_type = None + target_ds = None else: - target_ds = self._r(doc_spec, "target-data-stream", mandatory=False, - error_ctx=docs) + target_type = self._r(doc_spec, "target-type", mandatory=False, + default_value=corpus_target_type, error_ctx=docs) - # we need an index if no meta-data are present. + # require to be specified id we're using data streams and we have no default + target_ds = self._r(doc_spec, "target-data-stream", + mandatory=len(data_streams) > 0 and corpus_target_ds is None, + default_value=corpus_target_ds, + error_ctx=docs) + if target_ds and len(indices) > 0: + # if indices are in use we error + raise TrackSyntaxError("target-data-stream cannot be used when using indices") + elif target_ds and target_type: + raise TrackSyntaxError("target-type cannot be used when using data-streams") + + # need an index if we're using indices and no meta-data are present and we don't have a default target_idx = self._r(doc_spec, "target-index", - mandatory=corpus_target_idx is None and corpus_target_ds is None - and target_ds is None, + mandatory=len(indices) > 0 and corpus_target_idx is None, + default_value=corpus_target_idx, error_ctx=docs) - target_type = self._r(doc_spec, "target-type", mandatory=False, - default_value=corpus_target_type, error_ctx=docs) + # either target_idx or target_ds + if target_idx and len(data_streams) > 0: + # if data streams are in use we error + raise TrackSyntaxError("target-index cannot be used when using data-streams") - # here we choose to use either an index or data streams. If either are explicitly specified - # (index takes precedence) this is preferred over any defaults. Index then takes precedence. - if target_idx: - target_ds = None - elif target_ds is None and corpus_target_idx: - target_idx = corpus_target_idx - elif target_ds is None: - target_ds = corpus_target_ds + # we need one or the other + if target_idx is None and target_ds is None: + raise TrackSyntaxError(f"a {'target-index' if len(indices) > 0 else 'target-data-stream'} " + f"is required for {docs}" ) docs = track.Documents(source_format=source_format, document_file=document_file, @@ -1147,7 +1191,6 @@ def _create_corpora(self, corpora_specs, indices, data_streams): corpus.documents.append(docs) else: self._error("Unknown source-format [%s] in document corpus [%s]." % (source_format, name)) - document_corpora.append(corpus) return document_corpora @@ -1171,8 +1214,9 @@ def _create_challenges(self, track_spec): "explicit call to the cluster settings API.".format(self.name), logger=self.logger) if default and default_challenge is not None: - self._error("Both '%s' and '%s' are defined as default challenges. Please define only one of them as default." - % (default_challenge.name, name)) + self._error( + "Both '%s' and '%s' are defined as default challenges. Please define only one of them as default." + % (default_challenge.name, name)) if name in known_challenge_names: self._error("Duplicate challenge with name '%s'." % name) known_challenge_names.add(name) @@ -1191,8 +1235,9 @@ def _create_challenges(self, track_spec): for task in schedule: for sub_task in task: if sub_task.name in known_task_names: - self._error("Challenge '%s' contains multiple tasks with the name '%s'. Please use the task's name property to " - "assign a unique name for each task." % (name, sub_task.name)) + self._error( + "Challenge '%s' contains multiple tasks with the name '%s'. Please use the task's name property to " + "assign a unique name for each task." % (name, sub_task.name)) else: known_task_names.add(sub_task.name) @@ -1210,8 +1255,9 @@ def _create_challenges(self, track_spec): challenges.append(challenge) if challenges and default_challenge is None: - self._error("No default challenge specified. Please edit the track and add \"default\": true to one of the challenges %s." - % ", ".join([c.name for c in challenges])) + self._error( + "No default challenge specified. Please edit the track and add \"default\": true to one of the challenges %s." + % ", ".join([c.name for c in challenges])) return challenges def _get_challenge_specs(self, track_spec): @@ -1224,7 +1270,8 @@ def _get_challenge_specs(self, track_spec): if count_defined == 0: self._error("You must define 'challenge', 'challenges' or 'schedule' but none is specified.") elif count_defined > 1: - self._error("Multiple out of 'challenge', 'challenges' or 'schedule' are defined but only one of them is allowed.") + self._error( + "Multiple out of 'challenge', 'challenges' or 'schedule' are defined but only one of them is allowed.") elif challenge is not None: return [challenge], False elif challenges is not None: @@ -1235,7 +1282,8 @@ def _get_challenge_specs(self, track_spec): "schedule": schedule }], True else: - raise AssertionError("Unexpected: schedule=[{}], challenge=[{}], challenges=[{}]".format(schedule, challenge, challenges)) + raise AssertionError( + "Unexpected: schedule=[{}], challenge=[{}], challenges=[{}]".format(schedule, challenge, challenges)) def parse_parallel(self, ops_spec, ops, challenge_name): # use same default values as #parseTask() in case the 'parallel' element did not specify anything @@ -1257,11 +1305,13 @@ def parse_parallel(self, ops_spec, ops, challenge_name): if task.completes_parent and not completion_task: completion_task = task elif task.completes_parent: - self._error("'parallel' element for challenge '%s' contains multiple tasks with the name '%s' which are marked with " - "'completed-by' but only task is allowed to match." % (challenge_name, completed_by)) + self._error( + "'parallel' element for challenge '%s' contains multiple tasks with the name '%s' which are marked with " + "'completed-by' but only task is allowed to match." % (challenge_name, completed_by)) if not completion_task: - self._error("'parallel' element for challenge '%s' is marked with 'completed-by' with task name '%s' but no task with " - "this name exists." % (challenge_name, completed_by)) + self._error( + "'parallel' element for challenge '%s' is marked with 'completed-by' with task name '%s' but no task with " + "this name exists." % (challenge_name, completed_by)) return track.Parallel(tasks, clients) def parse_task(self, task_spec, ops, challenge_name, default_warmup_iterations=None, default_iterations=None, @@ -1281,8 +1331,10 @@ def parse_task(self, task_spec, ops, challenge_name, default_warmup_iterations=N meta_data=self._r(task_spec, "meta", error_ctx=op.name, mandatory=False), warmup_iterations=self._r(task_spec, "warmup-iterations", error_ctx=op.name, mandatory=False, default_value=default_warmup_iterations), - iterations=self._r(task_spec, "iterations", error_ctx=op.name, mandatory=False, default_value=default_iterations), - warmup_time_period=self._r(task_spec, "warmup-time-period", error_ctx=op.name, mandatory=False, + iterations=self._r(task_spec, "iterations", error_ctx=op.name, mandatory=False, + default_value=default_iterations), + warmup_time_period=self._r(task_spec, "warmup-time-period", error_ctx=op.name, + mandatory=False, default_value=default_warmup_time_period), time_period=self._r(task_spec, "time-period", error_ctx=op.name, mandatory=False, default_value=default_time_period), @@ -1292,11 +1344,15 @@ def parse_task(self, task_spec, ops, challenge_name, default_warmup_iterations=N # this is to provide scheduler-specific parameters for custom schedulers. params=task_spec) if task.warmup_iterations is not None and task.time_period is not None: - self._error("Operation '%s' in challenge '%s' defines '%d' warmup iterations and a time period of '%d' seconds. Please do not " - "mix time periods and iterations." % (op.name, challenge_name, task.warmup_iterations, task.time_period)) + self._error( + "Operation '%s' in challenge '%s' defines '%d' warmup iterations and a time period of '%d' seconds. Please do not " + "mix time periods and iterations." % ( + op.name, challenge_name, task.warmup_iterations, task.time_period)) elif task.warmup_time_period is not None and task.iterations is not None: - self._error("Operation '%s' in challenge '%s' defines a warmup time period of '%d' seconds and '%d' iterations. Please do not " - "mix time periods and iterations." % (op.name, challenge_name, task.warmup_time_period, task.iterations)) + self._error( + "Operation '%s' in challenge '%s' defines a warmup time period of '%d' seconds and '%d' iterations. Please do not " + "mix time periods and iterations." % ( + op.name, challenge_name, task.warmup_time_period, task.iterations)) return task @@ -1341,6 +1397,7 @@ def parse_operation(self, op_spec, error_ctx="operations"): op_type = op_type_name try: - return track.Operation(name=op_name, meta_data=meta_data, operation_type=op_type, params=params, param_source=param_source) + return track.Operation(name=op_name, meta_data=meta_data, operation_type=op_type, params=params, + param_source=param_source) except exceptions.InvalidSyntax as e: raise TrackSyntaxError("Invalid operation [%s]: %s" % (op_name, str(e))) diff --git a/esrally/track/params.py b/esrally/track/params.py index 87bad0122..57ff35e73 100644 --- a/esrally/track/params.py +++ b/esrally/track/params.py @@ -385,7 +385,7 @@ def params(self): # "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19", # "Mozilla/5.0 (IE 11.0; Windows NT 6.3; Trident/7.0; .NET4.0E; .NET4.0C; rv:11.0) like Gecko", # "Mozilla/5.0 (IE 11.0; Windows NT 6.3; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko" -# ] +# ] if not index_name: # }, # "index": "logs-*", # "body": { @@ -407,16 +407,19 @@ class SearchParamSource(ParamSource): def __init__(self, track, params, **kwargs): super().__init__(track, params, **kwargs) if len(track.indices) == 1: - default_index = track.indices[0].name + default_target = track.indices[0].name elif len(track.data_streams) == 1: - default_index = track.data_streams[0].name + default_target = track.data_streams[0].name else: - default_index = None + default_target = None # indexes are preferred by data streams can also be queried the same way - index_name = params.get("index") - if not index_name: - index_name = params.get("data-stream", default_index) + target_name = params.get("index") type_name = params.get("type") + if not target_name: + target_name = params.get("data-stream", default_target) + if target_name and type_name: + raise exceptions.InvalidSyntax( + f"'type' not supported with 'data-stream' for operation '{kwargs.get('operation_name')}'") request_cache = params.get("cache", None) query_body = params.get("body", None) query_body_params = params.get("body-params", None) @@ -426,7 +429,7 @@ def __init__(self, track, params, **kwargs): response_compression_enabled = params.get("response-compression-enabled", True) self.query_params = { - "index": index_name, + "index": target_name, "type": type_name, "cache": request_cache, "request-params": request_params, @@ -434,10 +437,9 @@ def __init__(self, track, params, **kwargs): "body": query_body } - if not index_name: + if not target_name: raise exceptions.InvalidSyntax( - "'index' or 'data-stream' is mandatory and is missing for operation '{}'".format( - kwargs.get("operation_name"))) + f"'index' or 'data-stream' is mandatory and is missing for operation '{kwargs.get('operation_name')}'") if pages: self.query_params["pages"] = pages @@ -690,13 +692,13 @@ def __init__(self, track, params, **kwargs): super().__init__(track, params, **kwargs) if len(track.indices) > 0 or len(track.data_streams) > 0: # force merge data streams and indices - API call is the same so treat as indexes - default_index = ','.join(map(str, track.indices + track.data_streams)) + default_target = ','.join(map(str, track.indices + track.data_streams)) else: - default_index = "_all" + default_target = "_all" - self._index_name = params.get("index") - if not self._index_name: - self._index_name = params.get("data-stream", default_index) + self._target_name = params.get("index") + if not self._target_name: + self._target_name = params.get("data-stream", default_target) self._max_num_segments = params.get("max-num-segments") self._request_timeout = params.get("request-timeout") @@ -705,7 +707,7 @@ def __init__(self, track, params, **kwargs): def params(self): return { - "index": self._index_name, + "index": self._target_name, "max-num-segments": self._max_num_segments, "request-timeout": self._request_timeout, "mode": self._mode, diff --git a/tests/driver/runner_test.py b/tests/driver/runner_test.py index 049b6349b..51527738b 100644 --- a/tests/driver/runner_test.py +++ b/tests/driver/runner_test.py @@ -2219,7 +2219,8 @@ async def test_deletes_existing_data_streams(self, es): params = { "data-streams": ["data-stream-A", "data-stream-B"], - "only-if-exists": True + "only-if-exists": True, + "request-params": {} } result = await r(es, params) diff --git a/tests/track/loader_test.py b/tests/track/loader_test.py index c3213a231..e5febbaa2 100644 --- a/tests/track/loader_test.py +++ b/tests/track/loader_test.py @@ -79,7 +79,8 @@ def test_track_from_named_pipe(self, is_file, is_dir, path_exists): with self.assertRaises(exceptions.SystemSetupError) as ctx: loader.SimpleTrackRepository("a named pipe cannot point to a track") - self.assertEqual("a named pipe cannot point to a track is neither a file nor a directory", ctx.exception.args[0]) + self.assertEqual("a named pipe cannot point to a track is neither a file nor a directory", + ctx.exception.args[0]) @mock.patch("os.path.exists") def test_track_from_non_existing_path(self, path_exists): @@ -199,7 +200,8 @@ def test_raise_error_on_wrong_uncompressed_file_size(self, is_file, get_size, de compressed_size_in_bytes=200, uncompressed_size_in_bytes=2000), data_root="/tmp") - self.assertEqual("[/tmp/docs.json] is corrupt. Extracted [1] bytes but [2000] bytes are expected.", ctx.exception.args[0]) + self.assertEqual("[/tmp/docs.json] is corrupt. Extracted [1] bytes but [2000] bytes are expected.", + ctx.exception.args[0]) decompress.assert_called_with("/tmp/docs.json.bz2", "/tmp") @@ -225,8 +227,9 @@ def test_raise_error_if_compressed_does_not_contain_expected_document_file(self, compressed_size_in_bytes=200, uncompressed_size_in_bytes=2000), data_root="/tmp") - self.assertEqual("Decompressing [/tmp/docs.json.bz2] did not create [/tmp/docs.json]. Please check with the track author if the " - "compressed archive has been created correctly.", ctx.exception.args[0]) + self.assertEqual( + "Decompressing [/tmp/docs.json.bz2] did not create [/tmp/docs.json]. Please check with the track author if the " + "compressed archive has been created correctly.", ctx.exception.args[0]) decompress.assert_called_with("/tmp/docs.json.bz2", "/tmp") @@ -276,7 +279,8 @@ def test_download_document_archive_if_no_file_available(self, is_file, get_size, @mock.patch("esrally.utils.io.ensure_dir") @mock.patch("os.path.getsize") @mock.patch("os.path.isfile") - def test_download_document_file_if_no_file_available(self, is_file, get_size, ensure_dir, download, prepare_file_offset_table): + def test_download_document_file_if_no_file_available(self, is_file, get_size, ensure_dir, download, + prepare_file_offset_table): # uncompressed file does not exist # file check for uncompressed file before download attempt (for potential error message) # after download uncompressed file exists @@ -321,7 +325,8 @@ def test_raise_download_error_if_offline(self, is_file, ensure_dir, download): uncompressed_size_in_bytes=2000), data_root="/tmp") - self.assertEqual("Cannot find /tmp/docs.json. Please disable offline mode and retry again.", ctx.exception.args[0]) + self.assertEqual("Cannot find /tmp/docs.json. Please disable offline mode and retry again.", + ctx.exception.args[0]) self.assertEqual(0, ensure_dir.call_count) self.assertEqual(0, download.call_count) @@ -369,8 +374,9 @@ def test_raise_download_error_if_no_url_provided_and_wrong_file_size(self, is_fi uncompressed_size_in_bytes=2000), data_root="/tmp") - self.assertEqual("/tmp/docs.json is present but does not have the expected size of 2000 bytes and it cannot be downloaded because " - "no base URL is provided.", ctx.exception.args[0]) + self.assertEqual( + "/tmp/docs.json is present but does not have the expected size of 2000 bytes and it cannot be downloaded because " + "no base URL is provided.", ctx.exception.args[0]) self.assertEqual(0, ensure_dir.call_count) self.assertEqual(0, download.call_count) @@ -384,8 +390,9 @@ def test_raise_download_error_no_test_mode_file(self, is_file, ensure_dir, downl # uncompressed file does not exist is_file.return_value = False - download.side_effect = urllib.error.HTTPError("http://benchmarks.elasticsearch.org.s3.amazonaws.com/corpora/unit-test/docs-1k.json", - 404, "", None, None) + download.side_effect = urllib.error.HTTPError( + "http://benchmarks.elasticsearch.org.s3.amazonaws.com/corpora/unit-test/docs-1k.json", + 404, "", None, None) p = loader.DocumentSetPreparator(track_name="unit-test", offline=False, test_mode=True) @@ -397,8 +404,9 @@ def test_raise_download_error_no_test_mode_file(self, is_file, ensure_dir, downl uncompressed_size_in_bytes=None), data_root="/tmp") - self.assertEqual("Track [unit-test] does not support test mode. Please ask the track author to add it or disable test mode " - "and retry.", ctx.exception.args[0]) + self.assertEqual( + "Track [unit-test] does not support test mode. Please ask the track author to add it or disable test mode " + "and retry.", ctx.exception.args[0]) ensure_dir.assert_called_with("/tmp") download.assert_called_with("http://benchmarks.elasticsearch.org/corpora/unit-test/docs-1k.json", @@ -437,7 +445,8 @@ def test_raise_download_error_on_connection_problems(self, is_file, ensure_dir, @mock.patch("esrally.utils.io.decompress") @mock.patch("os.path.getsize") @mock.patch("os.path.isfile") - def test_prepare_bundled_document_set_if_document_file_available(self, is_file, get_size, decompress, prepare_file_offset_table): + def test_prepare_bundled_document_set_if_document_file_available(self, is_file, get_size, decompress, + prepare_file_offset_table): is_file.return_value = True # check only uncompressed get_size.side_effect = [2000] @@ -445,13 +454,14 @@ def test_prepare_bundled_document_set_if_document_file_available(self, is_file, p = loader.DocumentSetPreparator(track_name="unit-test", offline=False, test_mode=False) - self.assertTrue(p.prepare_bundled_document_set(document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, - document_file="docs.json", - document_archive="docs.json.bz2", - number_of_documents=5, - compressed_size_in_bytes=200, - uncompressed_size_in_bytes=2000), - data_root=".")) + self.assertTrue(p.prepare_bundled_document_set( + document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + document_file="docs.json", + document_archive="docs.json.bz2", + number_of_documents=5, + compressed_size_in_bytes=200, + uncompressed_size_in_bytes=2000), + data_root=".")) prepare_file_offset_table.assert_called_with("./docs.json") @@ -459,19 +469,21 @@ def test_prepare_bundled_document_set_if_document_file_available(self, is_file, @mock.patch("esrally.utils.io.decompress") @mock.patch("os.path.getsize") @mock.patch("os.path.isfile") - def test_prepare_bundled_document_set_does_nothing_if_no_document_files(self, is_file, get_size, decompress, prepare_file_offset_table): + def test_prepare_bundled_document_set_does_nothing_if_no_document_files(self, is_file, get_size, decompress, + prepare_file_offset_table): # no files present is_file.return_value = False p = loader.DocumentSetPreparator(track_name="unit-test", offline=False, test_mode=False) - self.assertFalse(p.prepare_bundled_document_set(document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, - document_file="docs.json", - document_archive="docs.json.bz2", - number_of_documents=5, - compressed_size_in_bytes=200, - uncompressed_size_in_bytes=2000), - data_root=".")) + self.assertFalse(p.prepare_bundled_document_set( + document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + document_file="docs.json", + document_archive="docs.json.bz2", + number_of_documents=5, + compressed_size_in_bytes=200, + uncompressed_size_in_bytes=2000), + data_root=".")) self.assertEqual(0, decompress.call_count) self.assertEqual(0, prepare_file_offset_table.call_count) @@ -608,13 +620,15 @@ def test_used_corpora(self): {d.document_archive for d in used_corpora[0].documents}) self.assertEqual("http_logs_unparsed", used_corpora[1].name) - self.assertEqual({"documents-201998.unparsed.json.bz2"}, {d.document_archive for d in used_corpora[1].documents}) + self.assertEqual({"documents-201998.unparsed.json.bz2"}, + {d.document_archive for d in used_corpora[1].documents}) @mock.patch("esrally.utils.io.prepare_file_offset_table") @mock.patch("esrally.utils.io.decompress") @mock.patch("os.path.getsize") @mock.patch("os.path.isfile") - def test_prepare_bundled_document_set_decompresses_compressed_docs(self, is_file, get_size, decompress, prepare_file_offset_table): + def test_prepare_bundled_document_set_decompresses_compressed_docs(self, is_file, get_size, decompress, + prepare_file_offset_table): # uncompressed is missing # decompressed is present # check if uncompressed is present after decompression @@ -628,13 +642,14 @@ def test_prepare_bundled_document_set_decompresses_compressed_docs(self, is_file p = loader.DocumentSetPreparator(track_name="unit-test", offline=False, test_mode=False) - self.assertTrue(p.prepare_bundled_document_set(document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, - document_file="docs.json", - document_archive="docs.json.bz2", - number_of_documents=5, - compressed_size_in_bytes=200, - uncompressed_size_in_bytes=2000), - data_root=".")) + self.assertTrue(p.prepare_bundled_document_set( + document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + document_file="docs.json", + document_archive="docs.json.bz2", + number_of_documents=5, + compressed_size_in_bytes=200, + uncompressed_size_in_bytes=2000), + data_root=".")) prepare_file_offset_table.assert_called_with("./docs.json") @@ -650,21 +665,24 @@ def test_prepare_bundled_document_set_error_compressed_docs_wrong_size(self, is_ p = loader.DocumentSetPreparator(track_name="unit-test", offline=False, test_mode=False) with self.assertRaises(exceptions.DataError) as ctx: - p.prepare_bundled_document_set(document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, - document_file="docs.json", - document_archive="docs.json.bz2", - number_of_documents=5, - compressed_size_in_bytes=200, - uncompressed_size_in_bytes=2000), - data_root=".") - - self.assertEqual("./docs.json.bz2 is present but does not have the expected size of 200 bytes.", ctx.exception.args[0]) + p.prepare_bundled_document_set( + document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + document_file="docs.json", + document_archive="docs.json.bz2", + number_of_documents=5, + compressed_size_in_bytes=200, + uncompressed_size_in_bytes=2000), + data_root=".") + + self.assertEqual("./docs.json.bz2 is present but does not have the expected size of 200 bytes.", + ctx.exception.args[0]) @mock.patch("esrally.utils.io.prepare_file_offset_table") @mock.patch("esrally.utils.io.decompress") @mock.patch("os.path.getsize") @mock.patch("os.path.isfile") - def test_prepare_bundled_document_set_uncompressed_docs_wrong_size(self, is_file, get_size, decompress, prepare_file_offset_table): + def test_prepare_bundled_document_set_uncompressed_docs_wrong_size(self, is_file, get_size, decompress, + prepare_file_offset_table): # uncompressed is present is_file.side_effect = [True] # uncompressed @@ -673,14 +691,16 @@ def test_prepare_bundled_document_set_uncompressed_docs_wrong_size(self, is_file p = loader.DocumentSetPreparator(track_name="unit-test", offline=False, test_mode=False) with self.assertRaises(exceptions.DataError) as ctx: - p.prepare_bundled_document_set(document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, - document_file="docs.json", - document_archive="docs.json.bz2", - number_of_documents=5, - compressed_size_in_bytes=200, - uncompressed_size_in_bytes=2000), - data_root=".") - self.assertEqual("./docs.json is present but does not have the expected size of 2000 bytes.", ctx.exception.args[0]) + p.prepare_bundled_document_set( + document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + document_file="docs.json", + document_archive="docs.json.bz2", + number_of_documents=5, + compressed_size_in_bytes=200, + uncompressed_size_in_bytes=2000), + data_root=".") + self.assertEqual("./docs.json is present but does not have the expected size of 2000 bytes.", + ctx.exception.args[0]) self.assertEqual(0, prepare_file_offset_table.call_count) @@ -799,7 +819,8 @@ def test_render_simple_template(self): } """ - rendered = loader.render_template(template, template_internal_vars=TemplateRenderTests.unittest_template_internal_vars) + rendered = loader.render_template(template, + template_internal_vars=TemplateRenderTests.unittest_template_internal_vars) expected = """ { @@ -851,7 +872,7 @@ def key_globber(e): source = io.DictStringFileSourceFactory({ "dynamic-key-1": [ textwrap.dedent('"dkey1": "value1"') - ], + ], "dynamic-key-2": [ textwrap.dedent('"dkey2": "value2"') ], @@ -1199,7 +1220,7 @@ def test_post_processes_track_spec(self): } complete_track_params = loader.CompleteTrackParams() - index_body = '{"settings": {"index.number_of_shards": {{ number_of_shards | default(5) }}, '\ + index_body = '{"settings": {"index.number_of_shards": {{ number_of_shards | default(5) }}, ' \ '"index.number_of_replicas": {{ number_of_replicas | default(0)}} }}' self.assertEqual( @@ -1237,7 +1258,8 @@ def test_sets_absolute_path(self, path_exists): cfg.add(config.Scope.application, "benchmarks", "local.dataset.cache", "/data") default_challenge = track.Challenge("default", default=True, schedule=[ - track.Task(name="index", operation=track.Operation("index", operation_type=track.OperationType.Bulk), clients=4) + track.Task(name="index", operation=track.Operation("index", operation_type=track.OperationType.Bulk), + clients=4) ]) another_challenge = track.Challenge("other", default=False) t = track.Track(name="u", challenges=[another_challenge, default_challenge], @@ -1273,7 +1295,8 @@ def test_rejects_invalid_syntax(self): def test_rejects_unknown_filter_type(self): with self.assertRaises(exceptions.SystemSetupError) as ctx: loader.filters_from_filtered_tasks(["valid", "op-type:index"]) - self.assertEqual("Invalid format for filtered tasks: [op-type:index]. Expected [type] but got [op-type].", ctx.exception.args[0]) + self.assertEqual("Invalid format for filtered tasks: [op-type:index]. Expected [type] but got [op-type].", + ctx.exception.args[0]) def test_filters_tasks(self): track_specification = { @@ -1347,10 +1370,10 @@ def test_filters_tasks(self): self.assertEqual(4, len(full_track.challenges[0].schedule)) filtered = loader.filter_tasks(full_track, [track.TaskNameFilter("index-3"), - track.TaskOpTypeFilter("search"), - # Filtering should also work for non-core operation types. - track.TaskOpTypeFilter("custom-operation-type") - ]) + track.TaskOpTypeFilter("search"), + # Filtering should also work for non-core operation types. + track.TaskOpTypeFilter("custom-operation-type") + ]) schedule = filtered.challenges[0].schedule self.assertEqual(3, len(schedule)) @@ -1429,14 +1452,16 @@ def test_filters_exclude_tasks(self): full_track = reader("unittest", track_specification, "/mappings") self.assertEqual(4, len(full_track.challenges[0].schedule)) - filtered = loader.filter_tasks(full_track, [track.TaskNameFilter("index-3"), track.TaskOpTypeFilter("search")], exclude=True) + filtered = loader.filter_tasks(full_track, [track.TaskNameFilter("index-3"), track.TaskOpTypeFilter("search")], + exclude=True) schedule = filtered.challenges[0].schedule self.assertEqual(3, len(schedule)) - self.assertEqual(["index-1",'index-2'], [t.name for t in schedule[0].tasks]) + self.assertEqual(["index-1", 'index-2'], [t.name for t in schedule[0].tasks]) self.assertEqual("node-stats", schedule[1].name) self.assertEqual("cluster-stats", schedule[2].name) + class TrackSpecificationReaderTests(TestCase): def test_description_is_optional(self): track_specification = { @@ -1471,8 +1496,8 @@ def test_document_count_mandatory_if_file_present(self): { "name": "test", "base-url": "https://localhost/data", - "documents": [{ "source-file": "documents-main.json.bz2"} - ] + "documents": [{"source-file": "documents-main.json.bz2"} + ] } ], "challenges": [] @@ -1480,7 +1505,8 @@ def test_document_count_mandatory_if_file_present(self): reader = loader.TrackSpecificationReader() with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Track 'unittest' is invalid. Mandatory element 'document-count' is missing.", ctx.exception.args[0]) + self.assertEqual("Track 'unittest' is invalid. Mandatory element 'document-count' is missing.", + ctx.exception.args[0]) @mock.patch("esrally.track.loader.register_all_params_in_track") def test_parse_with_mixed_warmup_iterations_and_measurement(self, mocked_params_checker): @@ -1490,12 +1516,7 @@ def test_parse_with_mixed_warmup_iterations_and_measurement(self, mocked_params_ { "name": "test-index", "body": "index.json", - "types": [ "docs" ] - } - ], - "data-streams": [ - { - "name": "test-data-stream" + "types": ["docs"] } ], "corpora": [ @@ -1539,9 +1560,10 @@ def test_parse_with_mixed_warmup_iterations_and_measurement(self, mocked_params_ })) with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Track 'unittest' is invalid. Operation 'index-append' in challenge 'default-challenge' defines '3' warmup " - "iterations and a time period of '60' seconds. Please do not mix time periods and iterations.", - ctx.exception.args[0]) + self.assertEqual( + "Track 'unittest' is invalid. Operation 'index-append' in challenge 'default-challenge' defines '3' warmup " + "iterations and a time period of '60' seconds. Please do not mix time periods and iterations.", + ctx.exception.args[0]) @mock.patch("esrally.track.loader.register_all_params_in_track") def test_parse_missing_challenge_or_challenges(self, mocked_params_checker): @@ -1551,7 +1573,7 @@ def test_parse_missing_challenge_or_challenges(self, mocked_params_checker): { "name": "test-index", "body": "index.json", - "types": [ "docs" ] + "types": ["docs"] } ], "corpora": [ @@ -1574,8 +1596,9 @@ def test_parse_missing_challenge_or_challenges(self, mocked_params_checker): })) with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Track 'unittest' is invalid. You must define 'challenge', 'challenges' or 'schedule' but none is specified.", - ctx.exception.args[0]) + self.assertEqual( + "Track 'unittest' is invalid. You must define 'challenge', 'challenges' or 'schedule' but none is specified.", + ctx.exception.args[0]) @mock.patch("esrally.track.loader.register_all_params_in_track") def test_parse_challenge_and_challenges_are_defined(self, mocked_params_checker): @@ -1585,7 +1608,7 @@ def test_parse_challenge_and_challenges_are_defined(self, mocked_params_checker) { "name": "test-index", "body": "index.json", - "types": [ "docs" ] + "types": ["docs"] } ], "corpora": [ @@ -1610,8 +1633,9 @@ def test_parse_challenge_and_challenges_are_defined(self, mocked_params_checker) })) with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Track 'unittest' is invalid. Multiple out of 'challenge', 'challenges' or 'schedule' are defined but only " - "one of them is allowed.", ctx.exception.args[0]) + self.assertEqual( + "Track 'unittest' is invalid. Multiple out of 'challenge', 'challenges' or 'schedule' are defined but only " + "one of them is allowed.", ctx.exception.args[0]) @mock.patch("esrally.track.loader.register_all_params_in_track") def test_parse_with_mixed_warmup_time_period_and_iterations(self, mocked_params_checker): @@ -1665,9 +1689,10 @@ def test_parse_with_mixed_warmup_time_period_and_iterations(self, mocked_params_ })) with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Track 'unittest' is invalid. Operation 'index-append' in challenge 'default-challenge' defines a warmup time " - "period of '20' seconds and '1000' iterations. Please do not mix time periods and iterations.", - ctx.exception.args[0]) + self.assertEqual( + "Track 'unittest' is invalid. Operation 'index-append' in challenge 'default-challenge' defines a warmup time " + "period of '20' seconds and '1000' iterations. Please do not mix time periods and iterations.", + ctx.exception.args[0]) def test_parse_duplicate_implicit_task_names(self): track_specification = { @@ -1696,9 +1721,10 @@ def test_parse_duplicate_implicit_task_names(self): reader = loader.TrackSpecificationReader() with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Track 'unittest' is invalid. Challenge 'default-challenge' contains multiple tasks with the name 'search'. Please" - " use the task's name property to assign a unique name for each task.", - ctx.exception.args[0]) + self.assertEqual( + "Track 'unittest' is invalid. Challenge 'default-challenge' contains multiple tasks with the name 'search'. Please" + " use the task's name property to assign a unique name for each task.", + ctx.exception.args[0]) def test_parse_duplicate_explicit_task_names(self): track_specification = { @@ -1729,9 +1755,10 @@ def test_parse_duplicate_explicit_task_names(self): reader = loader.TrackSpecificationReader() with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Track 'unittest' is invalid. Challenge 'default-challenge' contains multiple tasks with the name " - "'duplicate-task-name'. Please use the task's name property to assign a unique name for each task.", - ctx.exception.args[0]) + self.assertEqual( + "Track 'unittest' is invalid. Challenge 'default-challenge' contains multiple tasks with the name " + "'duplicate-task-name'. Please use the task's name property to assign a unique name for each task.", + ctx.exception.args[0]) @mock.patch("esrally.track.loader.register_all_params_in_track") def test_load_invalid_index_body(self, mocked_params_checker): @@ -1784,7 +1811,8 @@ def test_load_invalid_index_body(self, mocked_params_checker): })) with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Could not load file template for 'definition for index index-historical in body.json'", ctx.exception.args[0]) + self.assertEqual("Could not load file template for 'definition for index index-historical in body.json'", + ctx.exception.args[0]) def test_parse_unique_task_names(self): track_specification = { @@ -1822,7 +1850,7 @@ def test_parse_unique_task_names(self): self.assertEqual("search-two-clients", schedule[1].name) self.assertEqual("search", schedule[1].operation.name) - def test_parse_valid_track_specification(self): + def test_parse_indices_valid_track_specification(self): track_specification = { "description": "description for unit test", "indices": [ @@ -1832,11 +1860,6 @@ def test_parse_valid_track_specification(self): "types": ["main", "secondary"] } ], - "data-streams": [ - { - "name": "historical-data-stream" - } - ], "corpora": [ { "name": "test", @@ -1856,13 +1879,6 @@ def test_parse_valid_track_specification(self): "document-count": 20, "compressed-bytes": 200, "uncompressed-bytes": 20000 - }, - { - "source-file": "documents-main.json.bz2", - "document-count": 10, - "compressed-bytes": 100, - "uncompressed-bytes": 10000, - "target-data-stream": "historical-data-stream" } ] } @@ -1911,7 +1927,7 @@ def test_parse_valid_track_specification(self): track_params={"number_of_shards": 3}, complete_track_params=complete_track_params, source=io.DictStringFileSourceFactory({ - "/mappings/body.json": [""" + "/mappings/body.json": [""" { "settings": { "number_of_shards": {{ number_of_shards }} @@ -1922,7 +1938,7 @@ def test_parse_valid_track_specification(self): } } """] - })) + })) resulting_track = reader("unittest", track_specification, "/mappings") # j2 variables defined in the track -- used for checking mismatching user track params self.assertEqual( @@ -1947,12 +1963,10 @@ def test_parse_valid_track_specification(self): self.assertEqual(2, len(resulting_track.indices[0].types)) self.assertEqual("main", resulting_track.indices[0].types[0]) self.assertEqual("secondary", resulting_track.indices[0].types[1]) - # data streams - self.assertEqual("historical-data-stream", resulting_track.data_streams[0].name) # corpora self.assertEqual(1, len(resulting_track.corpora)) self.assertEqual("test", resulting_track.corpora[0].name) - self.assertEqual(3, len(resulting_track.corpora[0].documents)) + self.assertEqual(2, len(resulting_track.corpora[0].documents)) docs_primary = resulting_track.corpora[0].documents[0] self.assertEqual(track.Documents.SOURCE_FORMAT_BULK, docs_primary.source_format) @@ -1979,6 +1993,132 @@ def test_parse_valid_track_specification(self): self.assertIsNone(docs_secondary.target_index) self.assertIsNone(docs_secondary.target_type) + # challenges + self.assertEqual(1, len(resulting_track.challenges)) + self.assertEqual("default-challenge", resulting_track.challenges[0].name) + self.assertEqual("Default challenge", resulting_track.challenges[0].description) + self.assertEqual({"mixed": True, "max-clients": 8}, resulting_track.challenges[0].meta_data) + self.assertEqual({"append": True}, resulting_track.challenges[0].schedule[0].operation.meta_data) + self.assertEqual({"operation-index": 0}, resulting_track.challenges[0].schedule[0].meta_data) + + def test_parse_data_streams_valid_track_specification(self): + track_specification = { + "description": "description for unit test", + "data-streams": [ + { + "name": "data-stream-historical" + } + ], + "corpora": [ + { + "name": "test", + "base-url": "https://localhost/data", + "documents": [ + { + "source-file": "documents-main.json.bz2", + "document-count": 10, + "compressed-bytes": 100, + "uncompressed-bytes": 10000, + "target-data-stream": "data-stream-historical" + }, + { + "source-file": "documents-secondary.json.bz2", + "includes-action-and-meta-data": True, + "document-count": 20, + "compressed-bytes": 200, + "uncompressed-bytes": 20000 + }, + { + "source-file": "documents-main.json.bz2", + "document-count": 10, + "compressed-bytes": 100, + "uncompressed-bytes": 10000, + "target-data-stream": "data-stream-historical" + } + ] + } + ], + "operations": [ + { + "name": "index-append", + "operation-type": "index", + "bulk-size": 5000, + "meta": { + "append": True + } + }, + { + "name": "search", + "operation-type": "search", + "data-stream": "data-stream-historical" + } + ], + "challenges": [ + { + "name": "default-challenge", + "description": "Default challenge", + "meta": { + "mixed": True, + "max-clients": 8 + }, + "schedule": [ + { + "clients": 8, + "operation": "index-append", + "meta": { + "operation-index": 0 + } + }, + { + "clients": 1, + "operation": "search" + } + ] + } + ] + } + complete_track_params = loader.CompleteTrackParams() + reader = loader.TrackSpecificationReader( + complete_track_params=complete_track_params) + resulting_track = reader("unittest", track_specification, "/mappings") + # j2 variables defined in the track -- used for checking mismatching user track params + self.assertEqual("unittest", resulting_track.name) + self.assertEqual("description for unit test", resulting_track.description) + # data streams + self.assertEqual(1, len(resulting_track.data_streams)) + self.assertEqual("data-stream-historical", resulting_track.data_streams[0].name) + # corpora + self.assertEqual(1, len(resulting_track.corpora)) + self.assertEqual("test", resulting_track.corpora[0].name) + self.assertEqual(3, len(resulting_track.corpora[0].documents)) + + docs_primary = resulting_track.corpora[0].documents[0] + self.assertEqual(track.Documents.SOURCE_FORMAT_BULK, docs_primary.source_format) + self.assertEqual("documents-main.json", docs_primary.document_file) + self.assertEqual("documents-main.json.bz2", docs_primary.document_archive) + self.assertEqual("https://localhost/data", docs_primary.base_url) + self.assertFalse(docs_primary.includes_action_and_meta_data) + self.assertEqual(10, docs_primary.number_of_documents) + self.assertEqual(100, docs_primary.compressed_size_in_bytes) + self.assertEqual(10000, docs_primary.uncompressed_size_in_bytes) + self.assertEqual("data-stream-historical", docs_primary.target_data_stream) + self.assertIsNone(docs_primary.target_index) + self.assertIsNone(docs_primary.target_type) + + docs_secondary = resulting_track.corpora[0].documents[1] + self.assertEqual(track.Documents.SOURCE_FORMAT_BULK, docs_secondary.source_format) + self.assertEqual("documents-secondary.json", docs_secondary.document_file) + self.assertEqual("documents-secondary.json.bz2", docs_secondary.document_archive) + self.assertEqual("https://localhost/data", docs_secondary.base_url) + self.assertTrue(docs_secondary.includes_action_and_meta_data) + self.assertEqual(20, docs_secondary.number_of_documents) + self.assertEqual(200, docs_secondary.compressed_size_in_bytes) + self.assertEqual(20000, docs_secondary.uncompressed_size_in_bytes) + # This is defined by the action-and-meta-data line! + self.assertIsNone(docs_secondary.target_data_stream) + self.assertIsNone(docs_secondary.target_index) + self.assertIsNone(docs_secondary.target_type) + docs_tertiary = resulting_track.corpora[0].documents[2] self.assertEqual(track.Documents.SOURCE_FORMAT_BULK, docs_tertiary.source_format) self.assertEqual("documents-main.json", docs_tertiary.document_file) @@ -1989,7 +2129,7 @@ def test_parse_valid_track_specification(self): self.assertEqual(100, docs_tertiary.compressed_size_in_bytes) self.assertIsNone(docs_tertiary.target_index) self.assertIsNone(docs_tertiary.target_type) - self.assertEqual("historical-data-stream", docs_tertiary.target_data_stream) + self.assertEqual("data-stream-historical", docs_tertiary.target_data_stream) # challenges self.assertEqual(1, len(resulting_track.challenges)) @@ -1999,7 +2139,6 @@ def test_parse_valid_track_specification(self): self.assertEqual({"append": True}, resulting_track.challenges[0].schedule[0].operation.meta_data) self.assertEqual({"operation-index": 0}, resulting_track.challenges[0].schedule[0].meta_data) - @mock.patch("esrally.track.loader.register_all_params_in_track") def test_parse_valid_without_types(self, mocked_param_checker): track_specification = { @@ -2011,11 +2150,6 @@ def test_parse_valid_without_types(self, mocked_param_checker): # no type information here } ], - "data-streams": [ - { - "name": "historical-data-stream" - } - ], "corpora": [ { "name": "test", @@ -2058,15 +2192,12 @@ def test_parse_valid_without_types(self, mocked_param_checker): # indices self.assertEqual(1, len(resulting_track.indices)) self.assertEqual("index-historical", resulting_track.indices[0].name) - self.assertEqual("historical-data-stream", resulting_track.data_streams[0].name) self.assertDictEqual({ "settings": { "number_of_shards": 3 } }, resulting_track.indices[0].body) self.assertEqual(0, len(resulting_track.indices[0].types)) - # data streams - self.assertEqual(1, len(resulting_track.data_streams)) # corpora self.assertEqual(1, len(resulting_track.corpora)) self.assertEqual("test", resulting_track.corpora[0].name) @@ -2088,6 +2219,177 @@ def test_parse_valid_without_types(self, mocked_param_checker): # challenges self.assertEqual(1, len(resulting_track.challenges)) + @mock.patch("esrally.track.loader.register_all_params_in_track") + def test_parse_invalid_data_streams_with_indices(self, mocked_param_checker): + track_specification = { + "description": "description for unit test", + "indices": [ + { + "name": "index-historical", + # no type information here + } + ], + "data-streams": [ + { + "name": "historical-data-stream" + } + ], + "corpora": [ + { + "name": "test", + "base-url": "https://localhost/data", + "documents": [ + { + "source-file": "documents-main.json.bz2", + "document-count": 10, + "compressed-bytes": 100, + "uncompressed-bytes": 10000, + }, + ] + } + ], + "schedule": [ + { + "clients": 8, + "operation": { + "name": "index-append", + "operation-type": "bulk", + "bulk-size": 5000 + } + } + ] + } + complete_track_params = loader.CompleteTrackParams() + reader = loader.TrackSpecificationReader( + complete_track_params=complete_track_params) + with self.assertRaises(loader.TrackSyntaxError) as ctx: + reader("unittest", track_specification, "/mapping") + + @mock.patch("esrally.track.loader.register_all_params_in_track") + def test_parse_invalid_data_streams_with_target_index(self, mocked_param_checker): + track_specification = { + "description": "description for unit test", + "data-streams": [ + { + "name": "historical-data-stream" + } + ], + "corpora": [ + { + "name": "test", + "base-url": "https://localhost/data", + "documents": [ + { + "source-file": "documents-main.json.bz2", + "document-count": 10, + "compressed-bytes": 100, + "uncompressed-bytes": 10000, + "target-index": "historical-index", + }, + ] + } + ], + "schedule": [ + { + "clients": 8, + "operation": { + "name": "index-append", + "operation-type": "bulk", + "bulk-size": 5000 + } + } + ] + } + complete_track_params = loader.CompleteTrackParams() + reader = loader.TrackSpecificationReader( + complete_track_params=complete_track_params) + with self.assertRaises(loader.TrackSyntaxError) as ctx: + reader("unittest", track_specification, "/mapping") + + @mock.patch("esrally.track.loader.register_all_params_in_track") + def test_parse_invalid_data_streams_with_target_type(self, mocked_param_checker): + track_specification = { + "description": "description for unit test", + "data-streams": [ + { + "name": "historical-data-stream" + } + ], + "corpora": [ + { + "name": "test", + "base-url": "https://localhost/data", + "documents": [ + { + "source-file": "documents-main.json.bz2", + "document-count": 10, + "compressed-bytes": 100, + "uncompressed-bytes": 10000, + "target-type": "_doc", + }, + ] + } + ], + "schedule": [ + { + "clients": 8, + "operation": { + "name": "index-append", + "operation-type": "bulk", + "bulk-size": 5000 + } + } + ] + } + complete_track_params = loader.CompleteTrackParams() + reader = loader.TrackSpecificationReader( + complete_track_params=complete_track_params) + with self.assertRaises(loader.TrackSyntaxError) as ctx: + reader("unittest", track_specification, "/mapping") + + @mock.patch("esrally.track.loader.register_all_params_in_track") + def test_parse_invalid_no_data_stream_target(self, mocked_param_checker): + track_specification = { + "description": "description for unit test", + "data-streams": [ + { + "name": "historical-data-stream" + }, + { + "name": "historical-data-stream-2" + } + ], + "corpora": [ + { + "name": "test", + "base-url": "https://localhost/data", + "documents": [ + { + "source-file": "documents-main.json.bz2", + "document-count": 10, + "compressed-bytes": 100, + "uncompressed-bytes": 10000 + } + ] + } + ], + "schedule": [ + { + "clients": 8, + "operation": { + "name": "index-append", + "operation-type": "bulk", + "bulk-size": 5000 + } + } + ] + } + complete_track_params = loader.CompleteTrackParams() + reader = loader.TrackSpecificationReader( + complete_track_params=complete_track_params) + with self.assertRaises(loader.TrackSyntaxError) as ctx: + reader("unittest", track_specification, "/mapping") + @mock.patch("esrally.track.loader.register_all_params_in_track") def test_parse_valid_without_indices(self, mocked_param_checker): track_specification = { @@ -2188,7 +2490,7 @@ def test_parse_valid_track_specification_with_index_template(self): } } """], - })) + })) resulting_track = reader("unittest", track_specification, "/mappings") self.assertEqual( ["index_pattern", "number_of_shards"], @@ -2245,7 +2547,8 @@ def test_unique_challenge_names(self): reader = loader.TrackSpecificationReader() with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Track 'unittest' is invalid. Duplicate challenge with name 'test-challenge'.", ctx.exception.args[0]) + self.assertEqual("Track 'unittest' is invalid. Duplicate challenge with name 'test-challenge'.", + ctx.exception.args[0]) def test_not_more_than_one_default_challenge_possible(self): track_specification = { @@ -2284,8 +2587,9 @@ def test_not_more_than_one_default_challenge_possible(self): reader = loader.TrackSpecificationReader() with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Track 'unittest' is invalid. Both 'default-challenge' and 'another-challenge' are defined as default challenges. " - "Please define only one of them as default.", ctx.exception.args[0]) + self.assertEqual( + "Track 'unittest' is invalid. Both 'default-challenge' and 'another-challenge' are defined as default challenges. " + "Please define only one of them as default.", ctx.exception.args[0]) def test_at_least_one_default_challenge(self): track_specification = { @@ -2320,8 +2624,9 @@ def test_at_least_one_default_challenge(self): reader = loader.TrackSpecificationReader() with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Track 'unittest' is invalid. No default challenge specified. Please edit the track and add \"default\": true " - "to one of the challenges challenge, another-challenge.", ctx.exception.args[0]) + self.assertEqual( + "Track 'unittest' is invalid. No default challenge specified. Please edit the track and add \"default\": true " + "to one of the challenges challenge, another-challenge.", ctx.exception.args[0]) def test_exactly_one_default_challenge(self): track_specification = { @@ -2760,8 +3065,9 @@ def test_parallel_tasks_with_completed_by_set_no_task_matches(self): with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Track 'unittest' is invalid. 'parallel' element for challenge 'default-challenge' is marked with 'completed-by' " - "with task name 'non-existing-task' but no task with this name exists.", ctx.exception.args[0]) + self.assertEqual( + "Track 'unittest' is invalid. 'parallel' element for challenge 'default-challenge' is marked with 'completed-by' " + "with task name 'non-existing-task' but no task with this name exists.", ctx.exception.args[0]) def test_parallel_tasks_with_completed_by_set_multiple_tasks_match(self): track_specification = { @@ -2798,6 +3104,7 @@ def test_parallel_tasks_with_completed_by_set_multiple_tasks_match(self): with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Track 'unittest' is invalid. 'parallel' element for challenge 'default-challenge' contains multiple tasks with " - "the name 'index-1' which are marked with 'completed-by' but only task is allowed to match.", - ctx.exception.args[0]) + self.assertEqual( + "Track 'unittest' is invalid. 'parallel' element for challenge 'default-challenge' contains multiple tasks with " + "the name 'index-1' which are marked with 'completed-by' but only task is allowed to match.", + ctx.exception.args[0]) diff --git a/tests/track/params_test.py b/tests/track/params_test.py index 79f688241..97c10304e 100644 --- a/tests/track/params_test.py +++ b/tests/track/params_test.py @@ -211,7 +211,8 @@ def idx(type_name, id): def conflict(action, type_name, id): if type_name: - return action, '{"%s": {"_index": "test_index", "_type": "%s", "_id": "%s"}}\n' % (action, type_name, id) + return action, '{"%s": {"_index": "test_index", "_type": "%s", "_id": "%s"}}\n' % ( + action, type_name, id) else: return action, '{"%s": {"_index": "test_index", "_id": "%s"}}\n' % (action, id) @@ -481,16 +482,16 @@ def test_read_bulk_with_id_conflicts(self): bulks.append(bulk) self.assertEqual([ - b'{"index": {"_index": "test_index", "_type": "test_type", "_id": "100"}}\n' + - b'{"key": "value1"}\n' + - b'{"update": {"_index": "test_index", "_type": "test_type", "_id": "200"}}\n' + - b'{"doc":{"key": "value2"}}\n', - b'{"update": {"_index": "test_index", "_type": "test_type", "_id": "400"}}\n' + - b'{"doc":{"key": "value3"}}\n' + - b'{"update": {"_index": "test_index", "_type": "test_type", "_id": "300"}}\n' + - b'{"doc":{"key": "value4"}}\n', - b'{"index": {"_index": "test_index", "_type": "test_type", "_id": "200"}}\n' + - b'{"key": "value5"}\n' + b'{"index": {"_index": "test_index", "_type": "test_type", "_id": "100"}}\n' + + b'{"key": "value1"}\n' + + b'{"update": {"_index": "test_index", "_type": "test_type", "_id": "200"}}\n' + + b'{"doc":{"key": "value2"}}\n', + b'{"update": {"_index": "test_index", "_type": "test_type", "_id": "400"}}\n' + + b'{"doc":{"key": "value3"}}\n' + + b'{"update": {"_index": "test_index", "_type": "test_type", "_id": "300"}}\n' + + b'{"doc":{"key": "value4"}}\n', + b'{"index": {"_index": "test_index", "_type": "test_type", "_id": "200"}}\n' + + b'{"key": "value5"}\n' ], bulks) def test_read_bulk_with_external_id_and_zero_conflict_probability(self): @@ -523,15 +524,15 @@ def test_read_bulk_with_external_id_and_zero_conflict_probability(self): bulks.append(bulk) self.assertEqual([ - b'{"index": {"_index": "test_index", "_type": "test_type", "_id": "100"}}\n' + - b'{"key": "value1"}\n' + - b'{"index": {"_index": "test_index", "_type": "test_type", "_id": "200"}}\n' + - b'{"key": "value2"}\n', - - b'{"index": {"_index": "test_index", "_type": "test_type", "_id": "300"}}\n' + - b'{"key": "value3"}\n' + - b'{"index": {"_index": "test_index", "_type": "test_type", "_id": "400"}}\n' + - b'{"key": "value4"}\n' + b'{"index": {"_index": "test_index", "_type": "test_type", "_id": "100"}}\n' + + b'{"key": "value1"}\n' + + b'{"index": {"_index": "test_index", "_type": "test_type", "_id": "200"}}\n' + + b'{"key": "value2"}\n', + + b'{"index": {"_index": "test_index", "_type": "test_type", "_id": "300"}}\n' + + b'{"key": "value3"}\n' + + b'{"index": {"_index": "test_index", "_type": "test_type", "_id": "400"}}\n' + + b'{"key": "value4"}\n' ], bulks) def assert_bulks_sized(self, reader, expected_bulk_sizes, expected_line_sizes): @@ -598,22 +599,22 @@ def test_calculate_bounds(self): num_docs = 1000 clients = 2 - self.assertEqual(( 0, 500, 500), params.bounds(num_docs, 0, 0, clients, includes_action_and_meta_data=False)) + self.assertEqual((0, 500, 500), params.bounds(num_docs, 0, 0, clients, includes_action_and_meta_data=False)) self.assertEqual((500, 500, 500), params.bounds(num_docs, 1, 1, clients, includes_action_and_meta_data=False)) num_docs = 800 clients = 4 - self.assertEqual(( 0, 200, 400), params.bounds(num_docs, 0, 0, clients, includes_action_and_meta_data=True)) - self.assertEqual(( 400, 200, 400), params.bounds(num_docs, 1, 1, clients, includes_action_and_meta_data=True)) - self.assertEqual(( 800, 200, 400), params.bounds(num_docs, 2, 2, clients, includes_action_and_meta_data=True)) + self.assertEqual((0, 200, 400), params.bounds(num_docs, 0, 0, clients, includes_action_and_meta_data=True)) + self.assertEqual((400, 200, 400), params.bounds(num_docs, 1, 1, clients, includes_action_and_meta_data=True)) + self.assertEqual((800, 200, 400), params.bounds(num_docs, 2, 2, clients, includes_action_and_meta_data=True)) self.assertEqual((1200, 200, 400), params.bounds(num_docs, 3, 3, clients, includes_action_and_meta_data=True)) num_docs = 2000 clients = 8 - self.assertEqual(( 0, 250, 250), params.bounds(num_docs, 0, 0, clients, includes_action_and_meta_data=False)) - self.assertEqual(( 250, 250, 250), params.bounds(num_docs, 1, 1, clients, includes_action_and_meta_data=False)) - self.assertEqual(( 500, 250, 250), params.bounds(num_docs, 2, 2, clients, includes_action_and_meta_data=False)) - self.assertEqual(( 750, 250, 250), params.bounds(num_docs, 3, 3, clients, includes_action_and_meta_data=False)) + self.assertEqual((0, 250, 250), params.bounds(num_docs, 0, 0, clients, includes_action_and_meta_data=False)) + self.assertEqual((250, 250, 250), params.bounds(num_docs, 1, 1, clients, includes_action_and_meta_data=False)) + self.assertEqual((500, 250, 250), params.bounds(num_docs, 2, 2, clients, includes_action_and_meta_data=False)) + self.assertEqual((750, 250, 250), params.bounds(num_docs, 3, 3, clients, includes_action_and_meta_data=False)) self.assertEqual((1000, 250, 250), params.bounds(num_docs, 4, 4, clients, includes_action_and_meta_data=False)) self.assertEqual((1250, 250, 250), params.bounds(num_docs, 5, 5, clients, includes_action_and_meta_data=False)) self.assertEqual((1500, 250, 250), params.bounds(num_docs, 6, 6, clients, includes_action_and_meta_data=False)) @@ -624,25 +625,36 @@ def test_calculate_non_multiple_bounds_16_clients(self): # lines and every third client, one line more (1334). num_docs = 16000 clients = 12 - self.assertEqual(( 0, 1333, 1333), params.bounds(num_docs, 0, 0, clients, includes_action_and_meta_data=False)) - self.assertEqual(( 1333, 1334, 1334), params.bounds(num_docs, 1, 1, clients, includes_action_and_meta_data=False)) - self.assertEqual(( 2667, 1333, 1333), params.bounds(num_docs, 2, 2, clients, includes_action_and_meta_data=False)) - self.assertEqual(( 4000, 1333, 1333), params.bounds(num_docs, 3, 3, clients, includes_action_and_meta_data=False)) - self.assertEqual(( 5333, 1334, 1334), params.bounds(num_docs, 4, 4, clients, includes_action_and_meta_data=False)) - self.assertEqual(( 6667, 1333, 1333), params.bounds(num_docs, 5, 5, clients, includes_action_and_meta_data=False)) - self.assertEqual(( 8000, 1333, 1333), params.bounds(num_docs, 6, 6, clients, includes_action_and_meta_data=False)) - self.assertEqual(( 9333, 1334, 1334), params.bounds(num_docs, 7, 7, clients, includes_action_and_meta_data=False)) - self.assertEqual((10667, 1333, 1333), params.bounds(num_docs, 8, 8, clients, includes_action_and_meta_data=False)) - self.assertEqual((12000, 1333, 1333), params.bounds(num_docs, 9, 9, clients, includes_action_and_meta_data=False)) - self.assertEqual((13333, 1334, 1334), params.bounds(num_docs, 10, 10, clients, includes_action_and_meta_data=False)) - self.assertEqual((14667, 1333, 1333), params.bounds(num_docs, 11, 11, clients, includes_action_and_meta_data=False)) + self.assertEqual((0, 1333, 1333), params.bounds(num_docs, 0, 0, clients, includes_action_and_meta_data=False)) + self.assertEqual((1333, 1334, 1334), + params.bounds(num_docs, 1, 1, clients, includes_action_and_meta_data=False)) + self.assertEqual((2667, 1333, 1333), + params.bounds(num_docs, 2, 2, clients, includes_action_and_meta_data=False)) + self.assertEqual((4000, 1333, 1333), + params.bounds(num_docs, 3, 3, clients, includes_action_and_meta_data=False)) + self.assertEqual((5333, 1334, 1334), + params.bounds(num_docs, 4, 4, clients, includes_action_and_meta_data=False)) + self.assertEqual((6667, 1333, 1333), + params.bounds(num_docs, 5, 5, clients, includes_action_and_meta_data=False)) + self.assertEqual((8000, 1333, 1333), + params.bounds(num_docs, 6, 6, clients, includes_action_and_meta_data=False)) + self.assertEqual((9333, 1334, 1334), + params.bounds(num_docs, 7, 7, clients, includes_action_and_meta_data=False)) + self.assertEqual((10667, 1333, 1333), + params.bounds(num_docs, 8, 8, clients, includes_action_and_meta_data=False)) + self.assertEqual((12000, 1333, 1333), + params.bounds(num_docs, 9, 9, clients, includes_action_and_meta_data=False)) + self.assertEqual((13333, 1334, 1334), + params.bounds(num_docs, 10, 10, clients, includes_action_and_meta_data=False)) + self.assertEqual((14667, 1333, 1333), + params.bounds(num_docs, 11, 11, clients, includes_action_and_meta_data=False)) def test_calculate_non_multiple_bounds_6_clients(self): # With 3500 docs and 6 clients, every client needs to read 583.33 docs. We have two lines per doc, which makes it # 2 * 583.333 docs = 1166.6666 lines per client. We let them read 1166 and 1168 lines respectively (583 and 584 docs). num_docs = 3500 clients = 6 - self.assertEqual(( 0, 583, 1166), params.bounds(num_docs, 0, 0, clients, includes_action_and_meta_data=True)) + self.assertEqual((0, 583, 1166), params.bounds(num_docs, 0, 0, clients, includes_action_and_meta_data=True)) self.assertEqual((1166, 584, 1168), params.bounds(num_docs, 1, 1, clients, includes_action_and_meta_data=True)) self.assertEqual((2334, 583, 1166), params.bounds(num_docs, 2, 2, clients, includes_action_and_meta_data=True)) self.assertEqual((3500, 583, 1166), params.bounds(num_docs, 3, 3, clients, includes_action_and_meta_data=True)) @@ -653,11 +665,12 @@ def test_calculate_bounds_for_multiple_clients_per_worker(self): num_docs = 2000 clients = 8 # four clients per worker, each reads 250 lines - self.assertEqual(( 0, 1000, 1000), params.bounds(num_docs, 0, 3, clients, includes_action_and_meta_data=False)) - self.assertEqual((1000, 1000, 1000), params.bounds(num_docs, 4, 7, clients, includes_action_and_meta_data=False)) + self.assertEqual((0, 1000, 1000), params.bounds(num_docs, 0, 3, clients, includes_action_and_meta_data=False)) + self.assertEqual((1000, 1000, 1000), + params.bounds(num_docs, 4, 7, clients, includes_action_and_meta_data=False)) # four clients per worker, each reads 500 lines (includes action and metadata) - self.assertEqual(( 0, 1000, 2000), params.bounds(num_docs, 0, 3, clients, includes_action_and_meta_data=True)) + self.assertEqual((0, 1000, 2000), params.bounds(num_docs, 0, 3, clients, includes_action_and_meta_data=True)) self.assertEqual((2000, 1000, 2000), params.bounds(num_docs, 4, 7, clients, includes_action_and_meta_data=True)) def test_calculate_number_of_bulks(self): @@ -725,8 +738,8 @@ def test_create_without_corpora_definition(self): params.BulkIndexParamSource(track=track.Track(name="unit-test"), params={}) self.assertEqual("There is no document corpus definition for track unit-test. " - "You must add at least one before making bulk requests to Elasticsearch.", ctx.exception.args[0]) - + "You must add at least one before making bulk requests to Elasticsearch.", + ctx.exception.args[0]) def test_create_with_non_numeric_bulk_size(self): corpus = track.DocumentCorpus(name="default", documents=[ @@ -804,8 +817,9 @@ def test_create_with_metadata_in_source_file_but_conflicts(self): "conflicts": "random" }) - self.assertEqual("Cannot generate id conflicts [random] as [docs.json.bz2] in document corpus [default] already contains " - "an action and meta-data line.", ctx.exception.args[0]) + self.assertEqual( + "Cannot generate id conflicts [random] as [docs.json.bz2] in document corpus [default] already contains " + "an action and meta-data line.", ctx.exception.args[0]) def test_create_with_unknown_id_conflicts(self): with self.assertRaises(exceptions.InvalidSyntax) as ctx: @@ -991,7 +1005,7 @@ def test_filters_corpora_by_data_stream(self): }) partition = source.partition(0, 1) - self.assertEqual(partition.corpora, [corpora[0],corpora[2]]) + self.assertEqual(partition.corpora, [corpora[0], corpora[2]]) def test_raises_exception_if_no_corpus_matches(self): corpus = track.DocumentCorpus(name="default", documents=[ @@ -1012,7 +1026,8 @@ def test_raises_exception_if_no_corpus_matches(self): "pipeline": "test-pipeline" }) - self.assertEqual("The provided corpus ['does_not_exist'] does not match any of the corpora ['default'].", ctx.exception.args[0]) + self.assertEqual("The provided corpus ['does_not_exist'] does not match any of the corpora ['default'].", + ctx.exception.args[0]) def test_ingests_all_documents_by_default(self): corpora = [ @@ -1128,7 +1143,8 @@ def test_create_with_conflict_probability_too_high(self): "conflict-probability": 100.1 }) - self.assertEqual("'conflict-probability' must be in the range [0.0, 100.0] but was 100.1", ctx.exception.args[0]) + self.assertEqual("'conflict-probability' must be in the range [0.0, 100.0] but was 100.1", + ctx.exception.args[0]) def test_create_with_conflict_probability_not_numeric(self): with self.assertRaises(exceptions.InvalidSyntax) as ctx: @@ -1161,7 +1177,8 @@ def test_generate_two_bulks(self): bulks = params.bulk_data_based(num_clients=1, start_client_index=0, end_client_index=0, corpora=[corpus], batch_size=5, bulk_size=5, - id_conflicts=params.IndexIdConflict.NoConflicts, conflict_probability=None, on_conflict=None, + id_conflicts=params.IndexIdConflict.NoConflicts, conflict_probability=None, + on_conflict=None, recency=None, pipeline=None, original_params={ "my-custom-parameter": "foo", @@ -1193,18 +1210,18 @@ def test_generate_two_bulks(self): def test_generate_bulks_from_multiple_corpora(self): corpora = [ track.DocumentCorpus(name="default", documents=[ - track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, - number_of_documents=5, - target_index="logs-2018-01", - target_type="docs" - ), - track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, - number_of_documents=5, - target_index="logs-2018-02", - target_type="docs" - ), - - ]), + track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + number_of_documents=5, + target_index="logs-2018-01", + target_type="docs" + ), + track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + number_of_documents=5, + target_index="logs-2018-02", + target_type="docs" + ), + + ]), track.DocumentCorpus(name="special", documents=[ track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, number_of_documents=5, @@ -1213,11 +1230,12 @@ def test_generate_bulks_from_multiple_corpora(self): ) ]) - ] + ] bulks = params.bulk_data_based(num_clients=1, start_client_index=0, end_client_index=0, corpora=corpora, batch_size=5, bulk_size=5, - id_conflicts=params.IndexIdConflict.NoConflicts, conflict_probability=None, on_conflict=None, + id_conflicts=params.IndexIdConflict.NoConflicts, conflict_probability=None, + on_conflict=None, recency=None, pipeline=None, original_params={ "my-custom-parameter": "foo", @@ -1581,10 +1599,11 @@ def test_create_data_stream_inline_without_body(self): }, p["request-params"]) def test_filter_data_stream(self): - source = params.CreateDataStreamParamSource(track.Track(name="unit-test", data_streams=[track.DataStream(name="data-stream-1"), - track.DataStream(name="data-stream-2"), - track.DataStream(name="data-stream-3")]), - params={ "data-stream": "data-stream-2"}) + source = params.CreateDataStreamParamSource( + track.Track(name="unit-test", data_streams=[track.DataStream(name="data-stream-1"), + track.DataStream(name="data-stream-2"), + track.DataStream(name="data-stream-3")]), + params={"data-stream": "data-stream-2"}) p = source.params() self.assertEqual(1, len(p["data-streams"])) @@ -1652,7 +1671,8 @@ def test_filter_data_stream_from_track(self): track.DataStream(name="data-stream-1"), track.DataStream(name="data-stream-2"), track.DataStream(name="data-stream-3") - ]), params={"data-stream": "data-stream-2", "only-if-exists": False, "request-params": {"allow_no_indices": True}}) + ]), params={"data-stream": "data-stream-2", "only-if-exists": False, + "request-params": {"allow_no_indices": True}}) p = source.params() @@ -1661,7 +1681,8 @@ def test_filter_data_stream_from_track(self): self.assertFalse(p["only-if-exists"]) def test_delete_data_stream_by_name(self): - source = params.DeleteDataStreamParamSource(track.Track(name="unit-test"), params={"data-stream": "data-stream-2"}) + source = params.DeleteDataStreamParamSource(track.Track(name="unit-test"), + params={"data-stream": "data-stream-2"}) p = source.params() @@ -1788,8 +1809,9 @@ def test_delete_index_template_by_name_and_matching_indices_missing_index_patter "template": "default", "delete-matching-indices": True }) - self.assertEqual("The property 'index-pattern' is required for delete-index-template if 'delete-matching-indices' is true.", - ctx.exception.args[0]) + self.assertEqual( + "The property 'index-pattern' is required for delete-index-template if 'delete-matching-indices' is true.", + ctx.exception.args[0]) def test_delete_index_template_from_track(self): tpl1 = track.IndexTemplate(name="metrics", pattern="metrics-*", delete_matching_indices=True, content={ @@ -1877,11 +1899,12 @@ def test_create_without_index(self): "body": { "query": { "match_all": {} - } } + } }, operation_name="test_operation") - self.assertEqual("'index' or 'data-stream' is mandatory and is missing for operation 'test_operation'", ctx.exception.args[0]) + self.assertEqual("'index' or 'data-stream' is mandatory and is missing for operation 'test_operation'", + ctx.exception.args[0]) def test_passes_request_parameters(self): index1 = track.Index(name="index1", types=["type1"]) @@ -1995,9 +2018,27 @@ def test_replaces_body_params(self): # the implementation modifies the internal dict in-place (safe because we only have one client per process) hence we need to copy. first = copy.deepcopy(search.params(choice=lambda d: d[0])) second = copy.deepcopy(search.params(choice=lambda d: d[1])) - self.assertNotEqual(first, second) + def test_invalid_data_stream_with_type(self): + with self.assertRaises(exceptions.InvalidSyntax) as ctx: + ds1 = track.DataStream(name="data-stream-1") + + params.SearchParamSource(track=track.Track(name="unit-test", data_streams=[ds1]), params={ + "data-stream": "data-stream-2", + "type": "_doc", + "cache": False, + "response-compression-enabled": False, + "body": { + "query": { + "match_all": {} + } + } + }, operation_name="test_operation") + + self.assertEqual("'type' not supported with 'data-stream' for operation 'test_operation'", + ctx.exception.args[0]) + class ForceMergeParamSourceTests(TestCase): def test_force_merge_index_from_track(self): From d8dd0ff1e2bb4218f407dacfd9ea3303b5a2b391 Mon Sep 17 00:00:00 2001 From: Dale McDiarmid Date: Wed, 21 Oct 2020 16:54:09 +0100 Subject: [PATCH 7/9] Response to review --- esrally/track/loader.py | 2 +- esrally/track/params.py | 28 ++++++++++++---------------- tests/track/params_test.py | 2 +- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/esrally/track/loader.py b/esrally/track/loader.py index 662937f28..27d6fb9c1 100644 --- a/esrally/track/loader.py +++ b/esrally/track/loader.py @@ -1152,7 +1152,7 @@ def _create_corpora(self, corpora_specs, indices, data_streams): target_type = self._r(doc_spec, "target-type", mandatory=False, default_value=corpus_target_type, error_ctx=docs) - # require to be specified id we're using data streams and we have no default + # require to be specified if we're using data streams and we have no default target_ds = self._r(doc_spec, "target-data-stream", mandatory=len(data_streams) > 0 and corpus_target_ds is None, default_value=corpus_target_ds, diff --git a/esrally/track/params.py b/esrally/track/params.py index 57ff35e73..bbc26a691 100644 --- a/esrally/track/params.py +++ b/esrally/track/params.py @@ -216,11 +216,10 @@ def __init__(self, track, params, **kwargs): self.data_stream_definitions.append(ds.name) else: try: - idx = params["data-stream"] - if isinstance(idx, str): - idx = [idx] - for i in idx: - self.data_stream_definitions.append(i) + data_stream = params["data-stream"] + data_streams = [data_stream] if isinstance(data_stream, str) else data_stream + for ds in data_streams: + self.data_stream_definitions.append(ds) except KeyError: raise exceptions.InvalidSyntax("Please set the property 'data-stream' for the create-data-stream operation") @@ -244,8 +243,7 @@ def __init__(self, track, params, **kwargs): self.data_stream_definitions = [] target_data_stream = params.get("data-stream") if target_data_stream: - if isinstance(target_data_stream, str): - target_data_stream = [target_data_stream] + target_data_stream = [target_data_stream] if isinstance(target_data_stream, str) else target_data_stream for ds in target_data_stream: self.data_stream_definitions.append(ds) elif track.data_streams: @@ -385,7 +383,7 @@ def params(self): # "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19", # "Mozilla/5.0 (IE 11.0; Windows NT 6.3; Trident/7.0; .NET4.0E; .NET4.0C; rv:11.0) like Gecko", # "Mozilla/5.0 (IE 11.0; Windows NT 6.3; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko" -# ] if not index_name: +# ] # }, # "index": "logs-*", # "body": { @@ -412,7 +410,7 @@ def __init__(self, track, params, **kwargs): default_target = track.data_streams[0].name else: default_target = None - # indexes are preferred by data streams can also be queried the same way + # indices are preferred by data streams can also be queried the same way target_name = params.get("index") type_name = params.get("type") if not target_name: @@ -691,7 +689,7 @@ class ForceMergeParamSource(ParamSource): def __init__(self, track, params, **kwargs): super().__init__(track, params, **kwargs) if len(track.indices) > 0 or len(track.data_streams) > 0: - # force merge data streams and indices - API call is the same so treat as indexes + # force merge data streams and indices - API call is the same so treat as indices default_target = ','.join(map(str, track.indices + track.data_streams)) else: default_target = "_all" @@ -769,7 +767,7 @@ def create_default_reader(docs, offset, num_lines, num_docs, batch_size, bulk_si use_create = True if id_conflicts != IndexIdConflict.NoConflicts: # can only create docs in data streams - raise exceptions.RallyError("Do not create readers for docs targeting data streams with conflicts") + raise exceptions.RallyError("Conflicts cannot be generated with append only data streams") if docs.includes_action_and_meta_data: return SourceOnlyIndexDataReader(docs.document_file, batch_size, bulk_size, source, target, docs.target_type) @@ -789,10 +787,8 @@ def create_readers(num_clients, start_client_index, end_client_index, corpora, b offset, num_docs, num_lines = bounds(docs.number_of_documents, start_client_index, end_client_index, num_clients, docs.includes_action_and_meta_data) if num_docs > 0: - target = "/" - if docs.target_index: - target = f"{docs.target_index}/{docs.target_type}" - elif docs.target_data_stream: + target = f"{docs.target_index}/{docs.target_type}" if docs.target_index else "/" + if docs.target_data_stream: target = docs.target_data_stream logger.info("Task-relative clients at index [%d-%d] will bulk index [%d] docs starting from line offset [%d] for [%s] " "from corpus [%s].", start_client_index, end_client_index, num_docs, offset, @@ -900,7 +896,7 @@ def __init__(self, index_name, type_name, conflicting_ids=None, conflict_probabi self.meta_data_index_no_id = '{"index": {"_index": "%s"}}\n' % index_name self.meta_data_create_no_id = '{"create": {"_index": "%s"}}\n' % index_name if use_create and conflicting_ids: - raise exceptions.RallyError("'use_create' be True with 'conflicting_ids'") + raise exceptions.RallyError("Index mode '_create' cannot be used with conflicting ids") self.conflicting_ids = conflicting_ids self.on_conflict = on_conflict self.use_create = use_create diff --git a/tests/track/params_test.py b/tests/track/params_test.py index 97c10304e..b801521e3 100644 --- a/tests/track/params_test.py +++ b/tests/track/params_test.py @@ -150,7 +150,7 @@ def test_generate_action_meta_data_create(self): def test_generate_action_meta_data_create_with_conflicts(self): with self.assertRaises(exceptions.RallyError) as ctx: params.GenerateActionMetaData("test_index", None, conflicting_ids=[100, 200, 300, 400], use_create=True) - self.assertEqual("'use_create' be True with 'conflicting_ids'", + self.assertEqual("Index mode '_create' cannot be used with conflicting ids", ctx.exception.args[0]) def test_generate_action_meta_data_typeless(self): From dc683fb2f6b65ebfa9cd265f47bf09aad58c4f4b Mon Sep 17 00:00:00 2001 From: Dale McDiarmid Date: Thu, 22 Oct 2020 12:39:00 +0100 Subject: [PATCH 8/9] Fix Formatting --- esrally/track/loader.py | 123 ++++++++------------- esrally/track/params.py | 2 +- tests/track/loader_test.py | 221 +++++++++++++++---------------------- tests/track/params_test.py | 93 ++++++---------- 4 files changed, 174 insertions(+), 265 deletions(-) diff --git a/esrally/track/loader.py b/esrally/track/loader.py index 27d6fb9c1..51bc08c2d 100644 --- a/esrally/track/loader.py +++ b/esrally/track/loader.py @@ -301,8 +301,7 @@ def track_names(self): return [self.track_name] def track_dir(self, track_name): - assert track_name == self.track_name, "Expect provided track name [%s] to match [%s]" % ( - track_name, self.track_name) + assert track_name == self.track_name, "Expect provided track name [%s] to match [%s]" % (track_name, self.track_name) return self._track_dir def track_file(self, track_name): @@ -344,8 +343,7 @@ def prepare_track(t, cfg): test_mode = cfg.opts("track", "test.mode.enabled") for corpus in used_corpora(t, cfg): data_root = data_dir(cfg, t.name, corpus.name) - logger.info("Resolved data root directory for document corpus [%s] in track [%s] to %s.", corpus.name, t.name, - data_root) + logger.info("Resolved data root directory for document corpus [%s] in track [%s] to %s.", corpus.name, t.name, data_root) prep = DocumentSetPreparator(t.name, offline, test_mode) for document_set in corpus.documents: @@ -382,9 +380,8 @@ def decompress(self, archive_path, documents_path, uncompressed_size): io.decompress(archive_path, io.dirname(archive_path)) console.println("[OK]") if not os.path.isfile(documents_path): - raise exceptions.DataError( - "Decompressing [%s] did not create [%s]. Please check with the track author if the compressed " - "archive has been created correctly." % (archive_path, documents_path)) + raise exceptions.DataError("Decompressing [%s] did not create [%s]. Please check with the track author if the compressed " + "archive has been created correctly." % (archive_path, documents_path)) extracted_bytes = os.path.getsize(documents_path) if uncompressed_size is not None and extracted_bytes != uncompressed_size: @@ -398,8 +395,7 @@ def download(self, base_url, target_path, size_in_bytes, detail_on_missing_root_ raise exceptions.DataError("%s and it cannot be downloaded because no base URL is provided." % detail_on_missing_root_url) if self.offline: - raise exceptions.SystemSetupError( - "Cannot find %s. Please disable offline mode and retry again." % target_path) + raise exceptions.SystemSetupError("Cannot find %s. Please disable offline mode and retry again." % target_path) data_url = "%s/%s" % (base_url, file_name) try: @@ -417,9 +413,8 @@ def download(self, base_url, target_path, size_in_bytes, detail_on_missing_root_ self.logger.info("Downloaded data from [%s] to [%s].", data_url, target_path) except urllib.error.HTTPError as e: if e.code == 404 and self.test_mode: - raise exceptions.DataError( - "Track [%s] does not support test mode. Please ask the track author to add it or " - "disable test mode and retry." % self.track_name) + raise exceptions.DataError("Track [%s] does not support test mode. Please ask the track author to add it or " + "disable test mode and retry." % self.track_name) else: msg = "Could not download [%s] to [%s]" % (data_url, target_path) if e.reason: @@ -466,8 +461,7 @@ def prepare_document_set(self, document_set, data_root): :param data_root: The data root directory for this document set. """ doc_path = os.path.join(data_root, document_set.document_file) - archive_path = os.path.join(data_root, - document_set.document_archive) if document_set.has_compressed_corpus() else None + archive_path = os.path.join(data_root, document_set.document_archive) if document_set.has_compressed_corpus() else None while True: if self.is_locally_available(doc_path) and \ self.has_expected_size(doc_path, document_set.uncompressed_size_in_bytes): @@ -489,8 +483,7 @@ def prepare_document_set(self, document_set, data_root): # provide a specific error message in case there is no download URL if self.is_locally_available(target_path): # convert expected_size eagerly to a string as it might be None (but in that case we'll never see that error message) - msg = "%s is present but does not have the expected size of %s bytes" % ( - target_path, str(expected_size)) + msg = "%s is present but does not have the expected size of %s bytes" % (target_path, str(expected_size)) else: msg = "%s is missing" % target_path @@ -518,8 +511,7 @@ def prepare_bundled_document_set(self, document_set, data_root): :return: See postcondition. """ doc_path = os.path.join(data_root, document_set.document_file) - archive_path = os.path.join(data_root, - document_set.document_archive) if document_set.has_compressed_corpus() else None + archive_path = os.path.join(data_root, document_set.document_archive) if document_set.has_compressed_corpus() else None while True: if self.is_locally_available(doc_path): @@ -579,8 +571,7 @@ def replace_includes(self, base_path, track_fragment): for glob_pattern in match: full_glob_path = os.path.join(base_path, glob_pattern) sub_source = self.read_glob_files(full_glob_path) - repl[glob_pattern] = self.replace_includes(base_path=io.dirname(full_glob_path), - track_fragment=sub_source) + repl[glob_pattern] = self.replace_includes(base_path=io.dirname(full_glob_path), track_fragment=sub_source) def replstring(matchobj): # matchobj.groups() is a tuple and first element contains the matched group id @@ -692,8 +683,7 @@ def relative_glob(start, f): return render_template(loader=jinja2.FileSystemLoader(base_path), template_source=template_source.assembled_source, template_vars=template_vars, - template_internal_vars=default_internal_template_vars( - glob_helper=lambda f: relative_glob(base_path, f))) + template_internal_vars=default_internal_template_vars(glob_helper=lambda f: relative_glob(base_path, f))) def filter_tasks(t, filters, exclude=False): @@ -773,9 +763,8 @@ def post_process_for_test_mode(t): path, ext = io.splitext(document_set.document_file) document_set.document_file = "%s-1k%s" % (path, ext) else: - raise exceptions.RallyAssertionError( - "Document corpus [%s] has neither compressed nor uncompressed corpus." % - corpus.name) + raise exceptions.RallyAssertionError("Document corpus [%s] has neither compressed nor uncompressed corpus." % + corpus.name) # we don't want to check sizes document_set.compressed_size_in_bytes = None @@ -799,13 +788,11 @@ def post_process_for_test_mode(t): if leaf_task.warmup_time_period is not None and leaf_task.warmup_time_period > 0: leaf_task.warmup_time_period = 0 if logger.isEnabledFor(logging.DEBUG): - logger.debug("Resetting warmup time period for [%s] to [%d] seconds.", str(leaf_task), - leaf_task.warmup_time_period) + logger.debug("Resetting warmup time period for [%s] to [%d] seconds.", str(leaf_task), leaf_task.warmup_time_period) if leaf_task.time_period is not None and leaf_task.time_period > 10: leaf_task.time_period = 10 if logger.isEnabledFor(logging.DEBUG): - logger.debug("Resetting measurement time period for [%s] to [%d] seconds.", str(leaf_task), - leaf_task.time_period) + logger.debug("Resetting measurement time period for [%s] to [%d] seconds.", str(leaf_task), leaf_task.time_period) leaf_task.params.pop("target-throughput", None) leaf_task.params.pop("target-interval", None) @@ -865,8 +852,7 @@ def read(self, track_name, track_spec_file, mapping_dir): # involving lines numbers and it also does not bloat Rally's log file so much. tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json") try: - rendered = render_template_from_file(track_spec_file, self.track_params, - complete_track_params=self.complete_track_params) + rendered = render_template_from_file(track_spec_file, self.track_params, complete_track_params=self.complete_track_params) with open(tmp.name, "wt", encoding="utf-8") as f: f.write(rendered) self.logger.info("Final rendered track for '%s' has been written to '%s'.", track_spec_file, tmp.name) @@ -890,8 +876,7 @@ def read(self, track_name, track_spec_file, mapping_dir): raise TrackSyntaxError(msg) except Exception as e: self.logger.exception("Could not load [%s].", track_spec_file) - msg = "Could not load '{}'. The complete track has been written to '{}' for diagnosis.".format( - track_spec_file, tmp.name) + msg = "Could not load '{}'. The complete track has been written to '{}' for diagnosis.".format(track_spec_file, tmp.name) # Convert to string early on to avoid serialization errors with Jinja exceptions. raise TrackSyntaxError(msg, str(e)) # check the track version before even attempting to validate the JSON format to avoid bogus errors. @@ -899,25 +884,21 @@ def read(self, track_name, track_spec_file, mapping_dir): try: track_version = int(raw_version) except ValueError: - raise exceptions.InvalidSyntax( - "version identifier for track %s must be numeric but was [%s]" % (track_name, str(raw_version))) + raise exceptions.InvalidSyntax("version identifier for track %s must be numeric but was [%s]" % (track_name, str(raw_version))) if TrackFileReader.MINIMUM_SUPPORTED_TRACK_VERSION > track_version: - raise exceptions.RallyError( - "Track {} is on version {} but needs to be updated at least to version {} to work with the " - "current version of Rally.".format(track_name, track_version, - TrackFileReader.MINIMUM_SUPPORTED_TRACK_VERSION)) + raise exceptions.RallyError("Track {} is on version {} but needs to be updated at least to version {} to work with the " + "current version of Rally.".format(track_name, track_version, + TrackFileReader.MINIMUM_SUPPORTED_TRACK_VERSION)) if TrackFileReader.MAXIMUM_SUPPORTED_TRACK_VERSION < track_version: - raise exceptions.RallyError( - "Track {} requires a newer version of Rally. Please upgrade Rally (supported track version: {}, " - "required track version: {}).".format(track_name, TrackFileReader.MAXIMUM_SUPPORTED_TRACK_VERSION, - track_version)) + raise exceptions.RallyError("Track {} requires a newer version of Rally. Please upgrade Rally (supported track version: {}, " + "required track version: {}).".format(track_name, TrackFileReader.MAXIMUM_SUPPORTED_TRACK_VERSION, + track_version)) try: jsonschema.validate(track_spec, self.track_schema) except jsonschema.exceptions.ValidationError as ve: raise TrackSyntaxError( "Track '{}' is invalid.\n\nError details: {}\nInstance: {}\nPath: {}\nSchema path: {}".format( - track_name, ve.message, json.dumps(ve.instance, indent=4, sort_keys=True), ve.absolute_path, - ve.absolute_schema_path)) + track_name, ve.message, json.dumps(ve.instance, indent=4, sort_keys=True), ve.absolute_path, ve.absolute_schema_path)) current_track = self.read_track(track_name, track_spec, mapping_dir) @@ -1055,8 +1036,7 @@ def _create_index(self, index_spec, mapping_dir): else: body = None - return track.Index(name=index_name, body=body, - types=self._r(index_spec, "types", mandatory=False, default_value=[])) + return track.Index(name=index_name, body=body, types=self._r(index_spec, "types", mandatory=False, default_value=[])) def _create_data_stream(self, data_stream_spec): return track.DataStream(name=self._r(data_stream_spec, "name")) @@ -1214,9 +1194,8 @@ def _create_challenges(self, track_spec): "explicit call to the cluster settings API.".format(self.name), logger=self.logger) if default and default_challenge is not None: - self._error( - "Both '%s' and '%s' are defined as default challenges. Please define only one of them as default." - % (default_challenge.name, name)) + self._error("Both '%s' and '%s' are defined as default challenges. Please define only one of them as default." + % (default_challenge.name, name)) if name in known_challenge_names: self._error("Duplicate challenge with name '%s'." % name) known_challenge_names.add(name) @@ -1235,9 +1214,8 @@ def _create_challenges(self, track_spec): for task in schedule: for sub_task in task: if sub_task.name in known_task_names: - self._error( - "Challenge '%s' contains multiple tasks with the name '%s'. Please use the task's name property to " - "assign a unique name for each task." % (name, sub_task.name)) + self._error("Challenge '%s' contains multiple tasks with the name '%s'. Please use the task's name property to " + "assign a unique name for each task." % (name, sub_task.name)) else: known_task_names.add(sub_task.name) @@ -1255,9 +1233,8 @@ def _create_challenges(self, track_spec): challenges.append(challenge) if challenges and default_challenge is None: - self._error( - "No default challenge specified. Please edit the track and add \"default\": true to one of the challenges %s." - % ", ".join([c.name for c in challenges])) + self._error("No default challenge specified. Please edit the track and add \"default\": true to one of the challenges %s." + % ", ".join([c.name for c in challenges])) return challenges def _get_challenge_specs(self, track_spec): @@ -1270,8 +1247,7 @@ def _get_challenge_specs(self, track_spec): if count_defined == 0: self._error("You must define 'challenge', 'challenges' or 'schedule' but none is specified.") elif count_defined > 1: - self._error( - "Multiple out of 'challenge', 'challenges' or 'schedule' are defined but only one of them is allowed.") + self._error("Multiple out of 'challenge', 'challenges' or 'schedule' are defined but only one of them is allowed.") elif challenge is not None: return [challenge], False elif challenges is not None: @@ -1282,8 +1258,7 @@ def _get_challenge_specs(self, track_spec): "schedule": schedule }], True else: - raise AssertionError( - "Unexpected: schedule=[{}], challenge=[{}], challenges=[{}]".format(schedule, challenge, challenges)) + raise AssertionError("Unexpected: schedule=[{}], challenge=[{}], challenges=[{}]".format(schedule, challenge, challenges)) def parse_parallel(self, ops_spec, ops, challenge_name): # use same default values as #parseTask() in case the 'parallel' element did not specify anything @@ -1305,13 +1280,11 @@ def parse_parallel(self, ops_spec, ops, challenge_name): if task.completes_parent and not completion_task: completion_task = task elif task.completes_parent: - self._error( - "'parallel' element for challenge '%s' contains multiple tasks with the name '%s' which are marked with " - "'completed-by' but only task is allowed to match." % (challenge_name, completed_by)) + self._error("'parallel' element for challenge '%s' contains multiple tasks with the name '%s' which are marked with " + "'completed-by' but only task is allowed to match." % (challenge_name, completed_by)) if not completion_task: - self._error( - "'parallel' element for challenge '%s' is marked with 'completed-by' with task name '%s' but no task with " - "this name exists." % (challenge_name, completed_by)) + self._error("'parallel' element for challenge '%s' is marked with 'completed-by' with task name '%s' but no task with " + "this name exists." % (challenge_name, completed_by)) return track.Parallel(tasks, clients) def parse_task(self, task_spec, ops, challenge_name, default_warmup_iterations=None, default_iterations=None, @@ -1331,8 +1304,7 @@ def parse_task(self, task_spec, ops, challenge_name, default_warmup_iterations=N meta_data=self._r(task_spec, "meta", error_ctx=op.name, mandatory=False), warmup_iterations=self._r(task_spec, "warmup-iterations", error_ctx=op.name, mandatory=False, default_value=default_warmup_iterations), - iterations=self._r(task_spec, "iterations", error_ctx=op.name, mandatory=False, - default_value=default_iterations), + iterations=self._r(task_spec, "iterations", error_ctx=op.name, mandatory=False, default_value=default_iterations), warmup_time_period=self._r(task_spec, "warmup-time-period", error_ctx=op.name, mandatory=False, default_value=default_warmup_time_period), @@ -1344,15 +1316,11 @@ def parse_task(self, task_spec, ops, challenge_name, default_warmup_iterations=N # this is to provide scheduler-specific parameters for custom schedulers. params=task_spec) if task.warmup_iterations is not None and task.time_period is not None: - self._error( - "Operation '%s' in challenge '%s' defines '%d' warmup iterations and a time period of '%d' seconds. Please do not " - "mix time periods and iterations." % ( - op.name, challenge_name, task.warmup_iterations, task.time_period)) + self._error("Operation '%s' in challenge '%s' defines '%d' warmup iterations and a time period of '%d' seconds. Please do not " + "mix time periods and iterations." % (op.name, challenge_name, task.warmup_iterations, task.time_period)) elif task.warmup_time_period is not None and task.iterations is not None: - self._error( - "Operation '%s' in challenge '%s' defines a warmup time period of '%d' seconds and '%d' iterations. Please do not " - "mix time periods and iterations." % ( - op.name, challenge_name, task.warmup_time_period, task.iterations)) + self._error("Operation '%s' in challenge '%s' defines a warmup time period of '%d' seconds and '%d' iterations. Please do not " + "mix time periods and iterations." % (op.name, challenge_name, task.warmup_time_period, task.iterations)) return task @@ -1397,7 +1365,6 @@ def parse_operation(self, op_spec, error_ctx="operations"): op_type = op_type_name try: - return track.Operation(name=op_name, meta_data=meta_data, operation_type=op_type, params=params, - param_source=param_source) + return track.Operation(name=op_name, meta_data=meta_data, operation_type=op_type, params=params, param_source=param_source) except exceptions.InvalidSyntax as e: raise TrackSyntaxError("Invalid operation [%s]: %s" % (op_name, str(e))) diff --git a/esrally/track/params.py b/esrally/track/params.py index bbc26a691..18f9c7352 100644 --- a/esrally/track/params.py +++ b/esrally/track/params.py @@ -513,7 +513,7 @@ def __init__(self, track, params, **kwargs): else: raise exceptions.InvalidSyntax("Unknown 'conflicts' setting [%s]" % id_conflicts) - if params.get("data-streams", None) and self.id_conflicts != IndexIdConflict.NoConflicts: + if "data-streams" in params and self.id_conflicts != IndexIdConflict.NoConflicts: raise exceptions.InvalidSyntax("'conflicts' cannot be used with 'data-streams'") if self.id_conflicts != IndexIdConflict.NoConflicts: diff --git a/tests/track/loader_test.py b/tests/track/loader_test.py index e5febbaa2..f08982f06 100644 --- a/tests/track/loader_test.py +++ b/tests/track/loader_test.py @@ -79,8 +79,7 @@ def test_track_from_named_pipe(self, is_file, is_dir, path_exists): with self.assertRaises(exceptions.SystemSetupError) as ctx: loader.SimpleTrackRepository("a named pipe cannot point to a track") - self.assertEqual("a named pipe cannot point to a track is neither a file nor a directory", - ctx.exception.args[0]) + self.assertEqual("a named pipe cannot point to a track is neither a file nor a directory", ctx.exception.args[0]) @mock.patch("os.path.exists") def test_track_from_non_existing_path(self, path_exists): @@ -200,8 +199,7 @@ def test_raise_error_on_wrong_uncompressed_file_size(self, is_file, get_size, de compressed_size_in_bytes=200, uncompressed_size_in_bytes=2000), data_root="/tmp") - self.assertEqual("[/tmp/docs.json] is corrupt. Extracted [1] bytes but [2000] bytes are expected.", - ctx.exception.args[0]) + self.assertEqual("[/tmp/docs.json] is corrupt. Extracted [1] bytes but [2000] bytes are expected.", ctx.exception.args[0]) decompress.assert_called_with("/tmp/docs.json.bz2", "/tmp") @@ -227,9 +225,8 @@ def test_raise_error_if_compressed_does_not_contain_expected_document_file(self, compressed_size_in_bytes=200, uncompressed_size_in_bytes=2000), data_root="/tmp") - self.assertEqual( - "Decompressing [/tmp/docs.json.bz2] did not create [/tmp/docs.json]. Please check with the track author if the " - "compressed archive has been created correctly.", ctx.exception.args[0]) + self.assertEqual("Decompressing [/tmp/docs.json.bz2] did not create [/tmp/docs.json]. Please check with the track author if the " + "compressed archive has been created correctly.", ctx.exception.args[0]) decompress.assert_called_with("/tmp/docs.json.bz2", "/tmp") @@ -279,8 +276,7 @@ def test_download_document_archive_if_no_file_available(self, is_file, get_size, @mock.patch("esrally.utils.io.ensure_dir") @mock.patch("os.path.getsize") @mock.patch("os.path.isfile") - def test_download_document_file_if_no_file_available(self, is_file, get_size, ensure_dir, download, - prepare_file_offset_table): + def test_download_document_file_if_no_file_available(self, is_file, get_size, ensure_dir, download, prepare_file_offset_table): # uncompressed file does not exist # file check for uncompressed file before download attempt (for potential error message) # after download uncompressed file exists @@ -325,8 +321,7 @@ def test_raise_download_error_if_offline(self, is_file, ensure_dir, download): uncompressed_size_in_bytes=2000), data_root="/tmp") - self.assertEqual("Cannot find /tmp/docs.json. Please disable offline mode and retry again.", - ctx.exception.args[0]) + self.assertEqual("Cannot find /tmp/docs.json. Please disable offline mode and retry again.", ctx.exception.args[0]) self.assertEqual(0, ensure_dir.call_count) self.assertEqual(0, download.call_count) @@ -374,9 +369,8 @@ def test_raise_download_error_if_no_url_provided_and_wrong_file_size(self, is_fi uncompressed_size_in_bytes=2000), data_root="/tmp") - self.assertEqual( - "/tmp/docs.json is present but does not have the expected size of 2000 bytes and it cannot be downloaded because " - "no base URL is provided.", ctx.exception.args[0]) + self.assertEqual("/tmp/docs.json is present but does not have the expected size of 2000 bytes and it cannot be downloaded because " + "no base URL is provided.", ctx.exception.args[0]) self.assertEqual(0, ensure_dir.call_count) self.assertEqual(0, download.call_count) @@ -390,9 +384,8 @@ def test_raise_download_error_no_test_mode_file(self, is_file, ensure_dir, downl # uncompressed file does not exist is_file.return_value = False - download.side_effect = urllib.error.HTTPError( - "http://benchmarks.elasticsearch.org.s3.amazonaws.com/corpora/unit-test/docs-1k.json", - 404, "", None, None) + download.side_effect = urllib.error.HTTPError("http://benchmarks.elasticsearch.org.s3.amazonaws.com/corpora/unit-test/docs-1k.json", + 404, "", None, None) p = loader.DocumentSetPreparator(track_name="unit-test", offline=False, test_mode=True) @@ -404,9 +397,8 @@ def test_raise_download_error_no_test_mode_file(self, is_file, ensure_dir, downl uncompressed_size_in_bytes=None), data_root="/tmp") - self.assertEqual( - "Track [unit-test] does not support test mode. Please ask the track author to add it or disable test mode " - "and retry.", ctx.exception.args[0]) + self.assertEqual("Track [unit-test] does not support test mode. Please ask the track author to add it or disable test mode " + "and retry.", ctx.exception.args[0]) ensure_dir.assert_called_with("/tmp") download.assert_called_with("http://benchmarks.elasticsearch.org/corpora/unit-test/docs-1k.json", @@ -445,8 +437,7 @@ def test_raise_download_error_on_connection_problems(self, is_file, ensure_dir, @mock.patch("esrally.utils.io.decompress") @mock.patch("os.path.getsize") @mock.patch("os.path.isfile") - def test_prepare_bundled_document_set_if_document_file_available(self, is_file, get_size, decompress, - prepare_file_offset_table): + def test_prepare_bundled_document_set_if_document_file_available(self, is_file, get_size, decompress, prepare_file_offset_table): is_file.return_value = True # check only uncompressed get_size.side_effect = [2000] @@ -454,14 +445,13 @@ def test_prepare_bundled_document_set_if_document_file_available(self, is_file, p = loader.DocumentSetPreparator(track_name="unit-test", offline=False, test_mode=False) - self.assertTrue(p.prepare_bundled_document_set( - document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, - document_file="docs.json", - document_archive="docs.json.bz2", - number_of_documents=5, - compressed_size_in_bytes=200, - uncompressed_size_in_bytes=2000), - data_root=".")) + self.assertTrue(p.prepare_bundled_document_set(document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + document_file="docs.json", + document_archive="docs.json.bz2", + number_of_documents=5, + compressed_size_in_bytes=200, + uncompressed_size_in_bytes=2000), + data_root=".")) prepare_file_offset_table.assert_called_with("./docs.json") @@ -469,21 +459,19 @@ def test_prepare_bundled_document_set_if_document_file_available(self, is_file, @mock.patch("esrally.utils.io.decompress") @mock.patch("os.path.getsize") @mock.patch("os.path.isfile") - def test_prepare_bundled_document_set_does_nothing_if_no_document_files(self, is_file, get_size, decompress, - prepare_file_offset_table): + def test_prepare_bundled_document_set_does_nothing_if_no_document_files(self, is_file, get_size, decompress, prepare_file_offset_table): # no files present is_file.return_value = False p = loader.DocumentSetPreparator(track_name="unit-test", offline=False, test_mode=False) - self.assertFalse(p.prepare_bundled_document_set( - document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, - document_file="docs.json", - document_archive="docs.json.bz2", - number_of_documents=5, - compressed_size_in_bytes=200, - uncompressed_size_in_bytes=2000), - data_root=".")) + self.assertFalse(p.prepare_bundled_document_set(document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + document_file="docs.json", + document_archive="docs.json.bz2", + number_of_documents=5, + compressed_size_in_bytes=200, + uncompressed_size_in_bytes=2000), + data_root=".")) self.assertEqual(0, decompress.call_count) self.assertEqual(0, prepare_file_offset_table.call_count) @@ -620,15 +608,13 @@ def test_used_corpora(self): {d.document_archive for d in used_corpora[0].documents}) self.assertEqual("http_logs_unparsed", used_corpora[1].name) - self.assertEqual({"documents-201998.unparsed.json.bz2"}, - {d.document_archive for d in used_corpora[1].documents}) + self.assertEqual({"documents-201998.unparsed.json.bz2"}, {d.document_archive for d in used_corpora[1].documents}) @mock.patch("esrally.utils.io.prepare_file_offset_table") @mock.patch("esrally.utils.io.decompress") @mock.patch("os.path.getsize") @mock.patch("os.path.isfile") - def test_prepare_bundled_document_set_decompresses_compressed_docs(self, is_file, get_size, decompress, - prepare_file_offset_table): + def test_prepare_bundled_document_set_decompresses_compressed_docs(self, is_file, get_size, decompress, prepare_file_offset_table): # uncompressed is missing # decompressed is present # check if uncompressed is present after decompression @@ -642,14 +628,13 @@ def test_prepare_bundled_document_set_decompresses_compressed_docs(self, is_file p = loader.DocumentSetPreparator(track_name="unit-test", offline=False, test_mode=False) - self.assertTrue(p.prepare_bundled_document_set( - document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, - document_file="docs.json", - document_archive="docs.json.bz2", - number_of_documents=5, - compressed_size_in_bytes=200, - uncompressed_size_in_bytes=2000), - data_root=".")) + self.assertTrue(p.prepare_bundled_document_set(document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + document_file="docs.json", + document_archive="docs.json.bz2", + number_of_documents=5, + compressed_size_in_bytes=200, + uncompressed_size_in_bytes=2000), + data_root=".")) prepare_file_offset_table.assert_called_with("./docs.json") @@ -665,14 +650,13 @@ def test_prepare_bundled_document_set_error_compressed_docs_wrong_size(self, is_ p = loader.DocumentSetPreparator(track_name="unit-test", offline=False, test_mode=False) with self.assertRaises(exceptions.DataError) as ctx: - p.prepare_bundled_document_set( - document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, - document_file="docs.json", - document_archive="docs.json.bz2", - number_of_documents=5, - compressed_size_in_bytes=200, - uncompressed_size_in_bytes=2000), - data_root=".") + p.prepare_bundled_document_set(document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + document_file="docs.json", + document_archive="docs.json.bz2", + number_of_documents=5, + compressed_size_in_bytes=200, + uncompressed_size_in_bytes=2000), + data_root=".") self.assertEqual("./docs.json.bz2 is present but does not have the expected size of 200 bytes.", ctx.exception.args[0]) @@ -681,8 +665,7 @@ def test_prepare_bundled_document_set_error_compressed_docs_wrong_size(self, is_ @mock.patch("esrally.utils.io.decompress") @mock.patch("os.path.getsize") @mock.patch("os.path.isfile") - def test_prepare_bundled_document_set_uncompressed_docs_wrong_size(self, is_file, get_size, decompress, - prepare_file_offset_table): + def test_prepare_bundled_document_set_uncompressed_docs_wrong_size(self, is_file, get_size, decompress, prepare_file_offset_table): # uncompressed is present is_file.side_effect = [True] # uncompressed @@ -691,16 +674,14 @@ def test_prepare_bundled_document_set_uncompressed_docs_wrong_size(self, is_file p = loader.DocumentSetPreparator(track_name="unit-test", offline=False, test_mode=False) with self.assertRaises(exceptions.DataError) as ctx: - p.prepare_bundled_document_set( - document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, - document_file="docs.json", - document_archive="docs.json.bz2", - number_of_documents=5, - compressed_size_in_bytes=200, - uncompressed_size_in_bytes=2000), - data_root=".") - self.assertEqual("./docs.json is present but does not have the expected size of 2000 bytes.", - ctx.exception.args[0]) + p.prepare_bundled_document_set(document_set=track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + document_file="docs.json", + document_archive="docs.json.bz2", + number_of_documents=5, + compressed_size_in_bytes=200, + uncompressed_size_in_bytes=2000), + data_root=".") + self.assertEqual("./docs.json is present but does not have the expected size of 2000 bytes.", ctx.exception.args[0]) self.assertEqual(0, prepare_file_offset_table.call_count) @@ -819,8 +800,7 @@ def test_render_simple_template(self): } """ - rendered = loader.render_template(template, - template_internal_vars=TemplateRenderTests.unittest_template_internal_vars) + rendered = loader.render_template(template, template_internal_vars=TemplateRenderTests.unittest_template_internal_vars) expected = """ { @@ -1220,7 +1200,7 @@ def test_post_processes_track_spec(self): } complete_track_params = loader.CompleteTrackParams() - index_body = '{"settings": {"index.number_of_shards": {{ number_of_shards | default(5) }}, ' \ + index_body = '{"settings": {"index.number_of_shards": {{ number_of_shards | default(5) }}, '\ '"index.number_of_replicas": {{ number_of_replicas | default(0)}} }}' self.assertEqual( @@ -1258,8 +1238,7 @@ def test_sets_absolute_path(self, path_exists): cfg.add(config.Scope.application, "benchmarks", "local.dataset.cache", "/data") default_challenge = track.Challenge("default", default=True, schedule=[ - track.Task(name="index", operation=track.Operation("index", operation_type=track.OperationType.Bulk), - clients=4) + track.Task(name="index", operation=track.Operation("index", operation_type=track.OperationType.Bulk), clients=4) ]) another_challenge = track.Challenge("other", default=False) t = track.Track(name="u", challenges=[another_challenge, default_challenge], @@ -1295,8 +1274,7 @@ def test_rejects_invalid_syntax(self): def test_rejects_unknown_filter_type(self): with self.assertRaises(exceptions.SystemSetupError) as ctx: loader.filters_from_filtered_tasks(["valid", "op-type:index"]) - self.assertEqual("Invalid format for filtered tasks: [op-type:index]. Expected [type] but got [op-type].", - ctx.exception.args[0]) + self.assertEqual("Invalid format for filtered tasks: [op-type:index]. Expected [type] but got [op-type].", ctx.exception.args[0]) def test_filters_tasks(self): track_specification = { @@ -1370,10 +1348,10 @@ def test_filters_tasks(self): self.assertEqual(4, len(full_track.challenges[0].schedule)) filtered = loader.filter_tasks(full_track, [track.TaskNameFilter("index-3"), - track.TaskOpTypeFilter("search"), - # Filtering should also work for non-core operation types. - track.TaskOpTypeFilter("custom-operation-type") - ]) + track.TaskOpTypeFilter("search"), + # Filtering should also work for non-core operation types. + track.TaskOpTypeFilter("custom-operation-type") + ]) schedule = filtered.challenges[0].schedule self.assertEqual(3, len(schedule)) @@ -1452,12 +1430,11 @@ def test_filters_exclude_tasks(self): full_track = reader("unittest", track_specification, "/mappings") self.assertEqual(4, len(full_track.challenges[0].schedule)) - filtered = loader.filter_tasks(full_track, [track.TaskNameFilter("index-3"), track.TaskOpTypeFilter("search")], - exclude=True) + filtered = loader.filter_tasks(full_track, [track.TaskNameFilter("index-3"), track.TaskOpTypeFilter("search")], exclude=True) schedule = filtered.challenges[0].schedule self.assertEqual(3, len(schedule)) - self.assertEqual(["index-1", 'index-2'], [t.name for t in schedule[0].tasks]) + self.assertEqual(["index-1",'index-2'], [t.name for t in schedule[0].tasks]) self.assertEqual("node-stats", schedule[1].name) self.assertEqual("cluster-stats", schedule[2].name) @@ -1496,8 +1473,7 @@ def test_document_count_mandatory_if_file_present(self): { "name": "test", "base-url": "https://localhost/data", - "documents": [{"source-file": "documents-main.json.bz2"} - ] + "documents": [{"source-file": "documents-main.json.bz2"}] } ], "challenges": [] @@ -1505,8 +1481,7 @@ def test_document_count_mandatory_if_file_present(self): reader = loader.TrackSpecificationReader() with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Track 'unittest' is invalid. Mandatory element 'document-count' is missing.", - ctx.exception.args[0]) + self.assertEqual("Track 'unittest' is invalid. Mandatory element 'document-count' is missing.", ctx.exception.args[0]) @mock.patch("esrally.track.loader.register_all_params_in_track") def test_parse_with_mixed_warmup_iterations_and_measurement(self, mocked_params_checker): @@ -1560,10 +1535,9 @@ def test_parse_with_mixed_warmup_iterations_and_measurement(self, mocked_params_ })) with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual( - "Track 'unittest' is invalid. Operation 'index-append' in challenge 'default-challenge' defines '3' warmup " - "iterations and a time period of '60' seconds. Please do not mix time periods and iterations.", - ctx.exception.args[0]) + self.assertEqual("Track 'unittest' is invalid. Operation 'index-append' in challenge 'default-challenge' defines '3' warmup " + "iterations and a time period of '60' seconds. Please do not mix time periods and iterations.", + ctx.exception.args[0]) @mock.patch("esrally.track.loader.register_all_params_in_track") def test_parse_missing_challenge_or_challenges(self, mocked_params_checker): @@ -1596,9 +1570,8 @@ def test_parse_missing_challenge_or_challenges(self, mocked_params_checker): })) with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual( - "Track 'unittest' is invalid. You must define 'challenge', 'challenges' or 'schedule' but none is specified.", - ctx.exception.args[0]) + self.assertEqual("Track 'unittest' is invalid. You must define 'challenge', 'challenges' or 'schedule' but none is specified.", + ctx.exception.args[0]) @mock.patch("esrally.track.loader.register_all_params_in_track") def test_parse_challenge_and_challenges_are_defined(self, mocked_params_checker): @@ -1633,9 +1606,8 @@ def test_parse_challenge_and_challenges_are_defined(self, mocked_params_checker) })) with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual( - "Track 'unittest' is invalid. Multiple out of 'challenge', 'challenges' or 'schedule' are defined but only " - "one of them is allowed.", ctx.exception.args[0]) + self.assertEqual("Track 'unittest' is invalid. Multiple out of 'challenge', 'challenges' or 'schedule' are defined but only " + "one of them is allowed.", ctx.exception.args[0]) @mock.patch("esrally.track.loader.register_all_params_in_track") def test_parse_with_mixed_warmup_time_period_and_iterations(self, mocked_params_checker): @@ -1689,10 +1661,9 @@ def test_parse_with_mixed_warmup_time_period_and_iterations(self, mocked_params_ })) with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual( - "Track 'unittest' is invalid. Operation 'index-append' in challenge 'default-challenge' defines a warmup time " - "period of '20' seconds and '1000' iterations. Please do not mix time periods and iterations.", - ctx.exception.args[0]) + self.assertEqual("Track 'unittest' is invalid. Operation 'index-append' in challenge 'default-challenge' defines a warmup time " + "period of '20' seconds and '1000' iterations. Please do not mix time periods and iterations.", + ctx.exception.args[0]) def test_parse_duplicate_implicit_task_names(self): track_specification = { @@ -1721,10 +1692,9 @@ def test_parse_duplicate_implicit_task_names(self): reader = loader.TrackSpecificationReader() with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual( - "Track 'unittest' is invalid. Challenge 'default-challenge' contains multiple tasks with the name 'search'. Please" - " use the task's name property to assign a unique name for each task.", - ctx.exception.args[0]) + self.assertEqual("Track 'unittest' is invalid. Challenge 'default-challenge' contains multiple tasks with the name 'search'. Please" + " use the task's name property to assign a unique name for each task.", + ctx.exception.args[0]) def test_parse_duplicate_explicit_task_names(self): track_specification = { @@ -1755,10 +1725,9 @@ def test_parse_duplicate_explicit_task_names(self): reader = loader.TrackSpecificationReader() with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual( - "Track 'unittest' is invalid. Challenge 'default-challenge' contains multiple tasks with the name " - "'duplicate-task-name'. Please use the task's name property to assign a unique name for each task.", - ctx.exception.args[0]) + self.assertEqual("Track 'unittest' is invalid. Challenge 'default-challenge' contains multiple tasks with the name " + "'duplicate-task-name'. Please use the task's name property to assign a unique name for each task.", + ctx.exception.args[0]) @mock.patch("esrally.track.loader.register_all_params_in_track") def test_load_invalid_index_body(self, mocked_params_checker): @@ -1811,8 +1780,7 @@ def test_load_invalid_index_body(self, mocked_params_checker): })) with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Could not load file template for 'definition for index index-historical in body.json'", - ctx.exception.args[0]) + self.assertEqual("Could not load file template for 'definition for index index-historical in body.json'", ctx.exception.args[0]) def test_parse_unique_task_names(self): track_specification = { @@ -2547,8 +2515,7 @@ def test_unique_challenge_names(self): reader = loader.TrackSpecificationReader() with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual("Track 'unittest' is invalid. Duplicate challenge with name 'test-challenge'.", - ctx.exception.args[0]) + self.assertEqual("Track 'unittest' is invalid. Duplicate challenge with name 'test-challenge'.", ctx.exception.args[0]) def test_not_more_than_one_default_challenge_possible(self): track_specification = { @@ -2587,9 +2554,8 @@ def test_not_more_than_one_default_challenge_possible(self): reader = loader.TrackSpecificationReader() with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual( - "Track 'unittest' is invalid. Both 'default-challenge' and 'another-challenge' are defined as default challenges. " - "Please define only one of them as default.", ctx.exception.args[0]) + self.assertEqual("Track 'unittest' is invalid. Both 'default-challenge' and 'another-challenge' are defined as default challenges. " + "Please define only one of them as default.", ctx.exception.args[0]) def test_at_least_one_default_challenge(self): track_specification = { @@ -2624,9 +2590,8 @@ def test_at_least_one_default_challenge(self): reader = loader.TrackSpecificationReader() with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual( - "Track 'unittest' is invalid. No default challenge specified. Please edit the track and add \"default\": true " - "to one of the challenges challenge, another-challenge.", ctx.exception.args[0]) + self.assertEqual("Track 'unittest' is invalid. No default challenge specified. Please edit the track and add \"default\": true " + "to one of the challenges challenge, another-challenge.", ctx.exception.args[0]) def test_exactly_one_default_challenge(self): track_specification = { @@ -3065,9 +3030,8 @@ def test_parallel_tasks_with_completed_by_set_no_task_matches(self): with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual( - "Track 'unittest' is invalid. 'parallel' element for challenge 'default-challenge' is marked with 'completed-by' " - "with task name 'non-existing-task' but no task with this name exists.", ctx.exception.args[0]) + self.assertEqual("Track 'unittest' is invalid. 'parallel' element for challenge 'default-challenge' is marked with 'completed-by' " + "with task name 'non-existing-task' but no task with this name exists.", ctx.exception.args[0]) def test_parallel_tasks_with_completed_by_set_multiple_tasks_match(self): track_specification = { @@ -3104,7 +3068,6 @@ def test_parallel_tasks_with_completed_by_set_multiple_tasks_match(self): with self.assertRaises(loader.TrackSyntaxError) as ctx: reader("unittest", track_specification, "/mappings") - self.assertEqual( - "Track 'unittest' is invalid. 'parallel' element for challenge 'default-challenge' contains multiple tasks with " - "the name 'index-1' which are marked with 'completed-by' but only task is allowed to match.", - ctx.exception.args[0]) + self.assertEqual("Track 'unittest' is invalid. 'parallel' element for challenge 'default-challenge' contains multiple tasks with " + "the name 'index-1' which are marked with 'completed-by' but only task is allowed to match.", + ctx.exception.args[0]) diff --git a/tests/track/params_test.py b/tests/track/params_test.py index b801521e3..83833d767 100644 --- a/tests/track/params_test.py +++ b/tests/track/params_test.py @@ -211,8 +211,7 @@ def idx(type_name, id): def conflict(action, type_name, id): if type_name: - return action, '{"%s": {"_index": "test_index", "_type": "%s", "_id": "%s"}}\n' % ( - action, type_name, id) + return action, '{"%s": {"_index": "test_index", "_type": "%s", "_id": "%s"}}\n' % (action, type_name, id) else: return action, '{"%s": {"_index": "test_index", "_id": "%s"}}\n' % (action, id) @@ -626,28 +625,17 @@ def test_calculate_non_multiple_bounds_16_clients(self): num_docs = 16000 clients = 12 self.assertEqual((0, 1333, 1333), params.bounds(num_docs, 0, 0, clients, includes_action_and_meta_data=False)) - self.assertEqual((1333, 1334, 1334), - params.bounds(num_docs, 1, 1, clients, includes_action_and_meta_data=False)) - self.assertEqual((2667, 1333, 1333), - params.bounds(num_docs, 2, 2, clients, includes_action_and_meta_data=False)) - self.assertEqual((4000, 1333, 1333), - params.bounds(num_docs, 3, 3, clients, includes_action_and_meta_data=False)) - self.assertEqual((5333, 1334, 1334), - params.bounds(num_docs, 4, 4, clients, includes_action_and_meta_data=False)) - self.assertEqual((6667, 1333, 1333), - params.bounds(num_docs, 5, 5, clients, includes_action_and_meta_data=False)) - self.assertEqual((8000, 1333, 1333), - params.bounds(num_docs, 6, 6, clients, includes_action_and_meta_data=False)) - self.assertEqual((9333, 1334, 1334), - params.bounds(num_docs, 7, 7, clients, includes_action_and_meta_data=False)) - self.assertEqual((10667, 1333, 1333), - params.bounds(num_docs, 8, 8, clients, includes_action_and_meta_data=False)) - self.assertEqual((12000, 1333, 1333), - params.bounds(num_docs, 9, 9, clients, includes_action_and_meta_data=False)) - self.assertEqual((13333, 1334, 1334), - params.bounds(num_docs, 10, 10, clients, includes_action_and_meta_data=False)) - self.assertEqual((14667, 1333, 1333), - params.bounds(num_docs, 11, 11, clients, includes_action_and_meta_data=False)) + self.assertEqual((1333, 1334, 1334), params.bounds(num_docs, 1, 1, clients, includes_action_and_meta_data=False)) + self.assertEqual((2667, 1333, 1333), params.bounds(num_docs, 2, 2, clients, includes_action_and_meta_data=False)) + self.assertEqual((4000, 1333, 1333), params.bounds(num_docs, 3, 3, clients, includes_action_and_meta_data=False)) + self.assertEqual((5333, 1334, 1334), params.bounds(num_docs, 4, 4, clients, includes_action_and_meta_data=False)) + self.assertEqual((6667, 1333, 1333), params.bounds(num_docs, 5, 5, clients, includes_action_and_meta_data=False)) + self.assertEqual((8000, 1333, 1333), params.bounds(num_docs, 6, 6, clients, includes_action_and_meta_data=False)) + self.assertEqual((9333, 1334, 1334), params.bounds(num_docs, 7, 7, clients, includes_action_and_meta_data=False)) + self.assertEqual((10667, 1333, 1333), params.bounds(num_docs, 8, 8, clients, includes_action_and_meta_data=False)) + self.assertEqual((12000, 1333, 1333), params.bounds(num_docs, 9, 9, clients, includes_action_and_meta_data=False)) + self.assertEqual((13333, 1334, 1334), params.bounds(num_docs, 10, 10, clients, includes_action_and_meta_data=False)) + self.assertEqual((14667, 1333, 1333), params.bounds(num_docs, 11, 11, clients, includes_action_and_meta_data=False)) def test_calculate_non_multiple_bounds_6_clients(self): # With 3500 docs and 6 clients, every client needs to read 583.33 docs. We have two lines per doc, which makes it @@ -666,8 +654,7 @@ def test_calculate_bounds_for_multiple_clients_per_worker(self): clients = 8 # four clients per worker, each reads 250 lines self.assertEqual((0, 1000, 1000), params.bounds(num_docs, 0, 3, clients, includes_action_and_meta_data=False)) - self.assertEqual((1000, 1000, 1000), - params.bounds(num_docs, 4, 7, clients, includes_action_and_meta_data=False)) + self.assertEqual((1000, 1000, 1000), params.bounds(num_docs, 4, 7, clients, includes_action_and_meta_data=False)) # four clients per worker, each reads 500 lines (includes action and metadata) self.assertEqual((0, 1000, 2000), params.bounds(num_docs, 0, 3, clients, includes_action_and_meta_data=True)) @@ -738,8 +725,7 @@ def test_create_without_corpora_definition(self): params.BulkIndexParamSource(track=track.Track(name="unit-test"), params={}) self.assertEqual("There is no document corpus definition for track unit-test. " - "You must add at least one before making bulk requests to Elasticsearch.", - ctx.exception.args[0]) + "You must add at least one before making bulk requests to Elasticsearch.", ctx.exception.args[0]) def test_create_with_non_numeric_bulk_size(self): corpus = track.DocumentCorpus(name="default", documents=[ @@ -817,9 +803,8 @@ def test_create_with_metadata_in_source_file_but_conflicts(self): "conflicts": "random" }) - self.assertEqual( - "Cannot generate id conflicts [random] as [docs.json.bz2] in document corpus [default] already contains " - "an action and meta-data line.", ctx.exception.args[0]) + self.assertEqual("Cannot generate id conflicts [random] as [docs.json.bz2] in document corpus [default] already contains " + "an action and meta-data line.", ctx.exception.args[0]) def test_create_with_unknown_id_conflicts(self): with self.assertRaises(exceptions.InvalidSyntax) as ctx: @@ -1026,8 +1011,7 @@ def test_raises_exception_if_no_corpus_matches(self): "pipeline": "test-pipeline" }) - self.assertEqual("The provided corpus ['does_not_exist'] does not match any of the corpora ['default'].", - ctx.exception.args[0]) + self.assertEqual("The provided corpus ['does_not_exist'] does not match any of the corpora ['default'].", ctx.exception.args[0]) def test_ingests_all_documents_by_default(self): corpora = [ @@ -1143,8 +1127,7 @@ def test_create_with_conflict_probability_too_high(self): "conflict-probability": 100.1 }) - self.assertEqual("'conflict-probability' must be in the range [0.0, 100.0] but was 100.1", - ctx.exception.args[0]) + self.assertEqual("'conflict-probability' must be in the range [0.0, 100.0] but was 100.1", ctx.exception.args[0]) def test_create_with_conflict_probability_not_numeric(self): with self.assertRaises(exceptions.InvalidSyntax) as ctx: @@ -1177,8 +1160,7 @@ def test_generate_two_bulks(self): bulks = params.bulk_data_based(num_clients=1, start_client_index=0, end_client_index=0, corpora=[corpus], batch_size=5, bulk_size=5, - id_conflicts=params.IndexIdConflict.NoConflicts, conflict_probability=None, - on_conflict=None, + id_conflicts=params.IndexIdConflict.NoConflicts, conflict_probability=None, on_conflict=None, recency=None, pipeline=None, original_params={ "my-custom-parameter": "foo", @@ -1210,18 +1192,18 @@ def test_generate_two_bulks(self): def test_generate_bulks_from_multiple_corpora(self): corpora = [ track.DocumentCorpus(name="default", documents=[ - track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, - number_of_documents=5, - target_index="logs-2018-01", - target_type="docs" - ), - track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, - number_of_documents=5, - target_index="logs-2018-02", - target_type="docs" - ), - - ]), + track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + number_of_documents=5, + target_index="logs-2018-01", + target_type="docs" + ), + track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, + number_of_documents=5, + target_index="logs-2018-02", + target_type="docs" + ), + + ]), track.DocumentCorpus(name="special", documents=[ track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, number_of_documents=5, @@ -1230,12 +1212,11 @@ def test_generate_bulks_from_multiple_corpora(self): ) ]) - ] + ] bulks = params.bulk_data_based(num_clients=1, start_client_index=0, end_client_index=0, corpora=corpora, batch_size=5, bulk_size=5, - id_conflicts=params.IndexIdConflict.NoConflicts, conflict_probability=None, - on_conflict=None, + id_conflicts=params.IndexIdConflict.NoConflicts, conflict_probability=None, on_conflict=None, recency=None, pipeline=None, original_params={ "my-custom-parameter": "foo", @@ -1809,9 +1790,8 @@ def test_delete_index_template_by_name_and_matching_indices_missing_index_patter "template": "default", "delete-matching-indices": True }) - self.assertEqual( - "The property 'index-pattern' is required for delete-index-template if 'delete-matching-indices' is true.", - ctx.exception.args[0]) + self.assertEqual("The property 'index-pattern' is required for delete-index-template if 'delete-matching-indices' is true.", + ctx.exception.args[0]) def test_delete_index_template_from_track(self): tpl1 = track.IndexTemplate(name="metrics", pattern="metrics-*", delete_matching_indices=True, content={ @@ -1903,8 +1883,7 @@ def test_create_without_index(self): } }, operation_name="test_operation") - self.assertEqual("'index' or 'data-stream' is mandatory and is missing for operation 'test_operation'", - ctx.exception.args[0]) + self.assertEqual("'index' or 'data-stream' is mandatory and is missing for operation 'test_operation'", ctx.exception.args[0]) def test_passes_request_parameters(self): index1 = track.Index(name="index1", types=["type1"]) From 5d60e2109a62a961d6fe90ccbba8afcba68fe22d Mon Sep 17 00:00:00 2001 From: Dale McDiarmid Date: Thu, 22 Oct 2020 12:42:04 +0100 Subject: [PATCH 9/9] Formatting changes --- tests/track/loader_test.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/track/loader_test.py b/tests/track/loader_test.py index f08982f06..2795608fd 100644 --- a/tests/track/loader_test.py +++ b/tests/track/loader_test.py @@ -658,8 +658,7 @@ def test_prepare_bundled_document_set_error_compressed_docs_wrong_size(self, is_ uncompressed_size_in_bytes=2000), data_root=".") - self.assertEqual("./docs.json.bz2 is present but does not have the expected size of 200 bytes.", - ctx.exception.args[0]) + self.assertEqual("./docs.json.bz2 is present but does not have the expected size of 200 bytes.", ctx.exception.args[0]) @mock.patch("esrally.utils.io.prepare_file_offset_table") @mock.patch("esrally.utils.io.decompress") @@ -2458,7 +2457,7 @@ def test_parse_valid_track_specification_with_index_template(self): } } """], - })) + })) resulting_track = reader("unittest", track_specification, "/mappings") self.assertEqual( ["index_pattern", "number_of_shards"],