From 2d80949844de04ceb857ee08a71630a166ee6833 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Fri, 15 Apr 2022 15:22:13 +0200 Subject: [PATCH 01/30] Add data_stream_router processor --- docs/reference/ingest/processors.asciidoc | 1 + .../processors/data-stream-router.asciidoc | 43 +++++ .../common/DataStreamRouterProcessor.java | 148 ++++++++++++++++++ .../ingest/common/IngestCommonPlugin.java | 3 +- ...DataStreamRouterProcessorFactoryTests.java | 48 ++++++ .../DataStreamRouterProcessorTests.java | 115 ++++++++++++++ .../300_data_stream_router_processor.yml | 140 +++++++++++++++++ 7 files changed, 497 insertions(+), 1 deletion(-) create mode 100644 docs/reference/ingest/processors/data-stream-router.asciidoc create mode 100644 modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java create mode 100644 modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DataStreamRouterProcessorFactoryTests.java create mode 100644 modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DataStreamRouterProcessorTests.java create mode 100644 modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/300_data_stream_router_processor.yml diff --git a/docs/reference/ingest/processors.asciidoc b/docs/reference/ingest/processors.asciidoc index 70e3123171640..50ebed445fc3a 100644 --- a/docs/reference/ingest/processors.asciidoc +++ b/docs/reference/ingest/processors.asciidoc @@ -38,6 +38,7 @@ include::processors/circle.asciidoc[] include::processors/community-id.asciidoc[] include::processors/convert.asciidoc[] include::processors/csv.asciidoc[] +include::processors/data-stream-router.asciidoc[] include::processors/date.asciidoc[] include::processors/date-index-name.asciidoc[] include::processors/dissect.asciidoc[] diff --git a/docs/reference/ingest/processors/data-stream-router.asciidoc b/docs/reference/ingest/processors/data-stream-router.asciidoc new file mode 100644 index 0000000000000..8ef8050c407e0 --- /dev/null +++ b/docs/reference/ingest/processors/data-stream-router.asciidoc @@ -0,0 +1,43 @@ +[[data-stream-router-processor]] +=== Data stream router processor +++++ +Data stream router +++++ + +The `data_stream_router` processor allows to route a document from one data stream to another data stream. +It can use both static values or values from the document to determine the target data stream. + +The name of a data stream is comprised of three parts and looks like this: `--`. +See the {fleet-guide}/data-streams.html#data-streams-naming-scheme[data stream naming scheme] documentation for more details. + +NOTE: `data_stream_router` processor can only be used on data streams that follow the data streams naming scheme. +Trying to use this processor on a data stream with a non-compliant name will raise an exception. + +After a `data_stream_router` processor has been executed, all the other processors of the current pipeline are skipped. +This means that at most one `data_stream_router` processor is ever executed within a pipeline, +allowing to define mutually exclusive routing conditions, +similar to a if, else-if, else-if, … condition. + +[[data-stream-router-options]] +.Data stream router options +[options="header"] +|====== +| Name | Required | Default | Description +| `dataset` | no | - | A static value for the dataset part of the data stream name. In addition to the criteria for <>, cannot contain `-` and must be no longer than 100 characters. Example values are `nginx.access` and `nginx.error`. If not set, gets the value of the field `data_stream.dataset` from the document. When using values from the document, the processor replaces invalid characters with `_`. If the option is not set and the document also doesn't contain a corresponding field, it uses the `` part of the index name as a fallback. +| `namespace` | no | - | A static value for the namespace part of the data stream name. See the criteria for <> for allowed characters. Must be no longer than 100 characters. If not set, gets the value of the field `data_stream.namespace` from the document. When using values from the document, the processor replaces invalid characters with `_`. If the option is not set and the document also doesn't contain a corresponding field, it uses the `` part of the index name as a fallback. +include::common-options.asciidoc[] +|====== + +NOTE: It's not possible to change the `type` of the data stream by setting the `data_stream.type` in the document. + +[source,js] +-------------------------------------------------- +{ + "data_stream_router": { + "tag": "nginx", + "if" : "ctx?.log?.file?.path?.contains('nginx')", + "dataset": "nginx" + } +} +-------------------------------------------------- +// NOTCONSOLE diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java new file mode 100644 index 0000000000000..037153e049df0 --- /dev/null +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java @@ -0,0 +1,148 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.common; + +import org.elasticsearch.ingest.AbstractProcessor; +import org.elasticsearch.ingest.ConfigurationUtils; +import org.elasticsearch.ingest.IngestDocument; +import org.elasticsearch.ingest.Processor; + +import java.util.Locale; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException; + +public final class DataStreamRouterProcessor extends AbstractProcessor { + public static final String TYPE = "data_stream_router"; + + private static final String DATA_STREAM_PREFIX = "data_stream."; + private static final String DATA_STREAM_TYPE = DATA_STREAM_PREFIX + "type"; + private static final String DATA_STREAM_DATASET = DATA_STREAM_PREFIX + "dataset"; + private static final String DATA_STREAM_NAMESPACE = DATA_STREAM_PREFIX + "namespace"; + private static final String EVENT_DATASET = "event.dataset"; + + private static final char[] DISALLOWED_IN_DATASET = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':', '-' }; + private static final char[] DISALLOWED_IN_NAMESPACE = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':' }; + private static final int MAX_LENGTH = 100; + private static final char REPLACEMENT_CHAR = '_'; + private final String dataset; + private final String namespace; + + DataStreamRouterProcessor(String tag, String description, String dataset, String namespace) { + super(tag, description); + this.dataset = dataset; + this.namespace = namespace; + } + + private static String sanitizeDataStreamField(String s, char[] disallowedInDataset) { + if (s == null) { + return null; + } + s = s.toLowerCase(Locale.ROOT); + s = s.substring(0, Math.min(s.length(), MAX_LENGTH)); + for (char c : disallowedInDataset) { + s = s.replace(c, REPLACEMENT_CHAR); + } + return s; + } + + @Override + public IngestDocument execute(IngestDocument ingestDocument) throws Exception { + final String indexName = ingestDocument.getFieldValue(IngestDocument.Metadata.INDEX.getFieldName(), String.class); + final String type; + final String datasetFallback; + final String namespaceFallback; + int indexOfFirstDash = indexName.indexOf('-'); + String illegalDataStreamNameMessage = "invalid data stream name: [" + + indexName + + "]; must follow naming scheme --"; + if (indexOfFirstDash < 0) { + throw new IllegalArgumentException(illegalDataStreamNameMessage); + } + type = indexName.substring(0, indexOfFirstDash); + int indexOfSecondDash = indexName.indexOf('-', indexOfFirstDash + 1); + if (indexOfSecondDash < 0) { + throw new IllegalArgumentException(illegalDataStreamNameMessage); + } + datasetFallback = indexName.substring(indexOfFirstDash + 1, indexOfSecondDash); + namespaceFallback = indexName.substring(indexOfSecondDash + 1); + + String dataset = getDataset(ingestDocument, datasetFallback); + String namespace = getNamespace(ingestDocument, namespaceFallback); + ingestDocument.setFieldValue(DATA_STREAM_TYPE, type); + if (ingestDocument.hasField(EVENT_DATASET)) { + ingestDocument.setFieldValue(EVENT_DATASET, dataset); + } + ingestDocument.setFieldValue(DATA_STREAM_DATASET, dataset); + ingestDocument.setFieldValue(DATA_STREAM_NAMESPACE, namespace); + ingestDocument.setFieldValue(IngestDocument.Metadata.INDEX.getFieldName(), type + "-" + dataset + "-" + namespace); + ingestDocument.skipCurrentPipeline(); + return ingestDocument; + } + + private String getDataset(IngestDocument ingestDocument, String datasetFallback) { + String dataset = this.dataset; + if (dataset == null) { + dataset = sanitizeDataStreamField(ingestDocument.getFieldValue(DATA_STREAM_DATASET, String.class, true), DISALLOWED_IN_DATASET); + } + if (dataset == null) { + dataset = datasetFallback; + } + return dataset; + } + + private String getNamespace(IngestDocument ingestDocument, String namespaceFallback) { + String namespace = this.namespace; + if (namespace == null) { + namespace = sanitizeDataStreamField( + ingestDocument.getFieldValue(DATA_STREAM_NAMESPACE, String.class, true), + DISALLOWED_IN_NAMESPACE + ); + } + if (namespace == null) { + namespace = namespaceFallback; + } + return namespace; + } + + @Override + public String getType() { + return TYPE; + } + + public String getDataStreamDataset() { + return dataset; + } + + public String getDataStreamNamespace() { + return namespace; + } + + public static final class Factory implements Processor.Factory { + + @Override + public DataStreamRouterProcessor create( + Map processorFactories, + String tag, + String description, + Map config + ) throws Exception { + String dataset = ConfigurationUtils.readOptionalStringProperty(TYPE, tag, config, "dataset"); + if (Objects.equals(sanitizeDataStreamField(dataset, DISALLOWED_IN_DATASET), dataset) == false) { + throw newConfigurationException(TYPE, tag, "dataset", "contains illegal characters"); + } + String namespace = ConfigurationUtils.readOptionalStringProperty(TYPE, tag, config, "namespace"); + if (Objects.equals(sanitizeDataStreamField(namespace, DISALLOWED_IN_NAMESPACE), namespace) == false) { + throw newConfigurationException(TYPE, tag, "namespace", "contains illegal characters"); + } + return new DataStreamRouterProcessor(tag, description, dataset, namespace); + } + } +} diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java index 4aa57b7928e22..b2f1135ee92fe 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java @@ -86,7 +86,8 @@ public Map getProcessors(Processor.Parameters paramet entry(NetworkDirectionProcessor.TYPE, new NetworkDirectionProcessor.Factory(parameters.scriptService)), entry(CommunityIdProcessor.TYPE, new CommunityIdProcessor.Factory()), entry(FingerprintProcessor.TYPE, new FingerprintProcessor.Factory()), - entry(RegisteredDomainProcessor.TYPE, new RegisteredDomainProcessor.Factory()) + entry(RegisteredDomainProcessor.TYPE, new RegisteredDomainProcessor.Factory()), + entry(DataStreamRouterProcessor.TYPE, new DataStreamRouterProcessor.Factory()) ); } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DataStreamRouterProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DataStreamRouterProcessorFactoryTests.java new file mode 100644 index 0000000000000..f9b50a3a8b407 --- /dev/null +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DataStreamRouterProcessorFactoryTests.java @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.common; + +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.test.ESTestCase; +import org.hamcrest.Matchers; + +import java.util.HashMap; +import java.util.Map; + +import static org.hamcrest.CoreMatchers.nullValue; + +public class DataStreamRouterProcessorFactoryTests extends ESTestCase { + + public void testSuccess() throws Exception { + DataStreamRouterProcessor processor = create(null, null); + assertThat(processor.getDataStreamDataset(), nullValue()); + assertThat(processor.getDataStreamNamespace(), nullValue()); + } + + public void testInvalidDataset() throws Exception { + ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> create("my-service", null)); + assertThat(e.getMessage(), Matchers.equalTo("[dataset] contains illegal characters")); + } + + public void testInvalidNamespace() throws Exception { + ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> create("generic", "foo:bar")); + assertThat(e.getMessage(), Matchers.equalTo("[namespace] contains illegal characters")); + } + + private static DataStreamRouterProcessor create(String dataset, String namespace) throws Exception { + Map config = new HashMap<>(); + if (dataset != null) { + config.put("dataset", dataset); + } + if (namespace != null) { + config.put("namespace", namespace); + } + return new DataStreamRouterProcessor.Factory().create(null, null, null, config); + } +} diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DataStreamRouterProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DataStreamRouterProcessorTests.java new file mode 100644 index 0000000000000..c86aa4bdc5a59 --- /dev/null +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DataStreamRouterProcessorTests.java @@ -0,0 +1,115 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.common; + +import org.elasticsearch.ingest.CompoundProcessor; +import org.elasticsearch.ingest.IngestDocument; +import org.elasticsearch.ingest.Processor; +import org.elasticsearch.ingest.RandomDocumentPicks; +import org.elasticsearch.ingest.WrappingProcessor; +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.equalTo; + +public class DataStreamRouterProcessorTests extends ESTestCase { + + public void testDefaults() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + + DataStreamRouterProcessor processor = new DataStreamRouterProcessor(null, null, null, null); + processor.execute(ingestDocument); + assertDataSetFields(ingestDocument, "logs", "generic", "default"); + } + + public void testSkipFirstProcessor() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + + DataStreamRouterProcessor skippedProcessor = new DataStreamRouterProcessor(null, null, "skip", null); + DataStreamRouterProcessor executedProcessor = new DataStreamRouterProcessor(null, null, "executed", null); + CompoundProcessor processor = new CompoundProcessor(new SkipProcessor(skippedProcessor), executedProcessor); + processor.execute(ingestDocument); + assertDataSetFields(ingestDocument, "logs", "executed", "default"); + } + + public void testSkipLastProcessor() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + + DataStreamRouterProcessor executedProcessor = new DataStreamRouterProcessor(null, null, "executed", null); + DataStreamRouterProcessor skippedProcessor = new DataStreamRouterProcessor(null, null, "skip", null); + CompoundProcessor processor = new CompoundProcessor(executedProcessor, skippedProcessor); + processor.execute(ingestDocument); + assertDataSetFields(ingestDocument, "logs", "executed", "default"); + } + + public void testDataStreamFieldsFromDocument() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + ingestDocument.setFieldValue("data_stream.dataset", "foo"); + ingestDocument.setFieldValue("data_stream.namespace", "bar"); + + DataStreamRouterProcessor processor = new DataStreamRouterProcessor(null, null, null, null); + processor.execute(ingestDocument); + assertDataSetFields(ingestDocument, "logs", "foo", "bar"); + } + + public void testInvalidDataStreamFieldsFromDocument() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + ingestDocument.setFieldValue("data_stream.dataset", "foo-bar"); + ingestDocument.setFieldValue("data_stream.namespace", "baz#qux"); + + DataStreamRouterProcessor processor = new DataStreamRouterProcessor(null, null, null, null); + processor.execute(ingestDocument); + assertDataSetFields(ingestDocument, "logs", "foo_bar", "baz_qux"); + } + + private void assertDataSetFields(IngestDocument ingestDocument, String type, String dataset, String namespace) { + assertThat(ingestDocument.getFieldValue("data_stream.type", String.class), equalTo(type)); + assertThat(ingestDocument.getFieldValue("data_stream.dataset", String.class), equalTo(dataset)); + assertThat(ingestDocument.getFieldValue("data_stream.namespace", String.class), equalTo(namespace)); + assertThat(ingestDocument.getFieldValue("_index", String.class), equalTo(type + "-" + dataset + "-" + namespace)); + } + + private static IngestDocument createIngestDocument(String dataStream) { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + ingestDocument.setFieldValue("_index", dataStream); + return ingestDocument; + } + + private static class SkipProcessor implements WrappingProcessor { + private final Processor processor; + + SkipProcessor(Processor processor) { + this.processor = processor; + } + + @Override + public IngestDocument execute(IngestDocument ingestDocument) throws Exception { + return ingestDocument; + } + + @Override + public Processor getInnerProcessor() { + return processor; + } + + @Override + public String getType() { + return "skip"; + } + + @Override + public String getTag() { + return null; + } + + @Override + public String getDescription() { + return null; + } + } +} diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/300_data_stream_router_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/300_data_stream_router_processor.yml new file mode 100644 index 0000000000000..5952d279aa6d2 --- /dev/null +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/300_data_stream_router_processor.yml @@ -0,0 +1,140 @@ +--- +teardown: + - do: + ingest.delete_pipeline: + id: "pipeline-with-two-data-stream-processors" + ignore: 404 + - do: + ingest.delete_pipeline: + id: "logs-router-default" + ignore: 404 + - do: + ingest.delete_pipeline: + id: "logs-nginx-default" + ignore: 404 + - do: + indices.delete_index_template: + name: logs-router + ignore: 404 + - do: + indices.delete_index_template: + name: logs-nginx + ignore: 404 + +--- +"Test first matching router terminates pipeline": + - do: + ingest.put_pipeline: + id: "pipeline-with-two-data-stream-processors" + body: > + { + "processors": [ + { + "data_stream_router" : { + "dataset" : "first" + } + }, + { + "data_stream_router" : { + "dataset" : "second" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: logs-generic-default + id: "1" + pipeline: "pipeline-with-two-data-stream-processors" + body: { + foo: "bar" + } + + - do: + get: + index: logs-first-default + id: "1" + - match: { _source.foo: "bar" } +--- +"Test two stage routing": + - skip: + features: allowed_warnings + - do: + ingest.put_pipeline: + id: "logs-router" + body: > + { + "processors": [ + { + "data_stream_router" : { + "tag": "nginx", + "if" : "ctx?.log?.file?.path?.contains('nginx')", + "dataset": "nginx" + } + } + ] + } + - match: { acknowledged: true } + - do: + allowed_warnings: + - "index template [logs-router] has index patterns [logs-router-*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [logs-router] will take precedence during new index creation" + indices.put_index_template: + name: logs-router + body: + index_patterns: [ "logs-router-*" ] + template: + settings: + index.default_pipeline: "logs-router" + - do: + ingest.put_pipeline: + id: "logs-nginx" + body: > + { + "processors": [ + { + "data_stream_router": { + "tag": "nginx.access", + "if": "ctx?.log?.file?.path?.contains('access')", + "dataset": "nginx.access" + } + }, + { + "data_stream_router": { + "tag": "nginx.error", + "if": "ctx?.log?.file?.path?.contains('error')", + "dataset": "nginx.error" + } + } + ] + } + - match: { acknowledged: true } + - do: + allowed_warnings: + - "index template [logs-nginx] has index patterns [logs-nginx-*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [logs-nginx] will take precedence during new index creation" + indices.put_index_template: + name: logs-nginx + body: + index_patterns: [ "logs-nginx-*" ] + template: + settings: + index.default_pipeline: "logs-nginx" + + - do: + index: + index: logs-nginx-default + id: "example-log" + op_type: create + body: + "@timestamp": "2022-04-13" + message: "this is an error log" + log: + file: + path: "nginx-error.log" + + - do: + get: + index: logs-nginx.error-default + id: "example-log" + - match: { _source.message: "this is an error log" } From 1ede9bc5ab9fe465b8c8b7185e7b43229b99718a Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Wed, 27 Apr 2022 13:46:34 +0200 Subject: [PATCH 02/30] Don't override event.dataset --- .../ingest/common/DataStreamRouterProcessor.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java index 037153e049df0..e771deeba0919 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java @@ -26,8 +26,6 @@ public final class DataStreamRouterProcessor extends AbstractProcessor { private static final String DATA_STREAM_TYPE = DATA_STREAM_PREFIX + "type"; private static final String DATA_STREAM_DATASET = DATA_STREAM_PREFIX + "dataset"; private static final String DATA_STREAM_NAMESPACE = DATA_STREAM_PREFIX + "namespace"; - private static final String EVENT_DATASET = "event.dataset"; - private static final char[] DISALLOWED_IN_DATASET = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':', '-' }; private static final char[] DISALLOWED_IN_NAMESPACE = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':' }; private static final int MAX_LENGTH = 100; @@ -77,9 +75,6 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { String dataset = getDataset(ingestDocument, datasetFallback); String namespace = getNamespace(ingestDocument, namespaceFallback); ingestDocument.setFieldValue(DATA_STREAM_TYPE, type); - if (ingestDocument.hasField(EVENT_DATASET)) { - ingestDocument.setFieldValue(EVENT_DATASET, dataset); - } ingestDocument.setFieldValue(DATA_STREAM_DATASET, dataset); ingestDocument.setFieldValue(DATA_STREAM_NAMESPACE, namespace); ingestDocument.setFieldValue(IngestDocument.Metadata.INDEX.getFieldName(), type + "-" + dataset + "-" + namespace); From 39b57eea66f9b8c8adcfe35f1cb97e4bb112a50e Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Mon, 27 Jun 2022 09:43:02 +0200 Subject: [PATCH 03/30] Update docs/changelog/76511.yaml --- docs/changelog/76511.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/76511.yaml diff --git a/docs/changelog/76511.yaml b/docs/changelog/76511.yaml new file mode 100644 index 0000000000000..5e6f26af31074 --- /dev/null +++ b/docs/changelog/76511.yaml @@ -0,0 +1,5 @@ +pr: 76511 +summary: Add `data_stream_router` processor +area: Ingest Node +type: enhancement +issues: [] From 3e25d0e4e5cf5407f523b1d8ddcd192920c84999 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Wed, 22 Feb 2023 08:46:32 +0100 Subject: [PATCH 04/30] Use IngestDocument#redirect --- .../elasticsearch/ingest/common/DataStreamRouterProcessor.java | 3 +-- .../org/elasticsearch/ingest/common/IngestCommonPlugin.java | 2 +- ...uter_processor.yml => 310_data_stream_router_processor.yml} | 0 3 files changed, 2 insertions(+), 3 deletions(-) rename modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/{300_data_stream_router_processor.yml => 310_data_stream_router_processor.yml} (100%) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java index e771deeba0919..84a736df95291 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java @@ -77,8 +77,7 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { ingestDocument.setFieldValue(DATA_STREAM_TYPE, type); ingestDocument.setFieldValue(DATA_STREAM_DATASET, dataset); ingestDocument.setFieldValue(DATA_STREAM_NAMESPACE, namespace); - ingestDocument.setFieldValue(IngestDocument.Metadata.INDEX.getFieldName(), type + "-" + dataset + "-" + namespace); - ingestDocument.skipCurrentPipeline(); + ingestDocument.redirect(type + "-" + dataset + "-" + namespace); return ingestDocument; } diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java index 722f0d56c3a48..d64c7e9cbc6a5 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java @@ -88,7 +88,7 @@ public Map getProcessors(Processor.Parameters paramet entry(CommunityIdProcessor.TYPE, new CommunityIdProcessor.Factory()), entry(FingerprintProcessor.TYPE, new FingerprintProcessor.Factory()), entry(RegisteredDomainProcessor.TYPE, new RegisteredDomainProcessor.Factory()), - entry(RedactProcessor.TYPE, new RedactProcessor.Factory(matcherWatchdog)) + entry(RedactProcessor.TYPE, new RedactProcessor.Factory(matcherWatchdog)), entry(DataStreamRouterProcessor.TYPE, new DataStreamRouterProcessor.Factory()) ); } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/300_data_stream_router_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/310_data_stream_router_processor.yml similarity index 100% rename from modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/300_data_stream_router_processor.yml rename to modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/310_data_stream_router_processor.yml From c31b51f7ca6af9e7cd0c96c999673b85fa11059c Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Wed, 22 Feb 2023 09:12:36 +0100 Subject: [PATCH 05/30] Extract data stream parsing methods --- .../common/DataStreamRouterProcessor.java | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java index 84a736df95291..e949a17be8d14 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java @@ -58,19 +58,16 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { final String datasetFallback; final String namespaceFallback; int indexOfFirstDash = indexName.indexOf('-'); - String illegalDataStreamNameMessage = "invalid data stream name: [" - + indexName - + "]; must follow naming scheme --"; if (indexOfFirstDash < 0) { - throw new IllegalArgumentException(illegalDataStreamNameMessage); + throw createInvalidDataStreamNameException(indexName); } - type = indexName.substring(0, indexOfFirstDash); int indexOfSecondDash = indexName.indexOf('-', indexOfFirstDash + 1); if (indexOfSecondDash < 0) { - throw new IllegalArgumentException(illegalDataStreamNameMessage); + throw createInvalidDataStreamNameException(indexName); } - datasetFallback = indexName.substring(indexOfFirstDash + 1, indexOfSecondDash); - namespaceFallback = indexName.substring(indexOfSecondDash + 1); + type = parseDataStreamType(indexName, indexOfFirstDash); + datasetFallback = parseDataStreamDataset(indexName, indexOfFirstDash, indexOfSecondDash); + namespaceFallback = parseDataStreamNamespace(indexName, indexOfSecondDash); String dataset = getDataset(ingestDocument, datasetFallback); String namespace = getNamespace(ingestDocument, namespaceFallback); @@ -81,6 +78,24 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { return ingestDocument; } + private static IllegalArgumentException createInvalidDataStreamNameException(String indexName) { + return new IllegalArgumentException( + "invalid data stream name: [" + indexName + "]; must follow naming scheme --" + ); + } + + private static String parseDataStreamType(String dataStreamName, int indexOfFirstDash) { + return dataStreamName.substring(0, indexOfFirstDash); + } + + private static String parseDataStreamDataset(String dataStreamName, int indexOfFirstDash, int indexOfSecondDash) { + return dataStreamName.substring(indexOfFirstDash + 1, indexOfSecondDash); + } + + private static String parseDataStreamNamespace(String dataStreamName, int indexOfSecondDash) { + return dataStreamName.substring(indexOfSecondDash + 1); + } + private String getDataset(IngestDocument ingestDocument, String datasetFallback) { String dataset = this.dataset; if (dataset == null) { From ac5f282b61376c4a4a7383e2c4683ce2a38a9aa6 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Wed, 22 Feb 2023 09:23:39 +0100 Subject: [PATCH 06/30] Add changelog --- docs/changelog/94000.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/94000.yaml diff --git a/docs/changelog/94000.yaml b/docs/changelog/94000.yaml new file mode 100644 index 0000000000000..a67dce242960b --- /dev/null +++ b/docs/changelog/94000.yaml @@ -0,0 +1,5 @@ +pr: 94000 +summary: Introduce redirect method on IngestDocument +area: Ingest Node +type: enhancement +issues: [] From d4bc1ad6181912fc77a3e69d8b2a47110d5834e5 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Wed, 22 Feb 2023 09:24:09 +0100 Subject: [PATCH 07/30] Revert "Add changelog" This reverts commit ac5f282b61376c4a4a7383e2c4683ce2a38a9aa6. --- docs/changelog/94000.yaml | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 docs/changelog/94000.yaml diff --git a/docs/changelog/94000.yaml b/docs/changelog/94000.yaml deleted file mode 100644 index a67dce242960b..0000000000000 --- a/docs/changelog/94000.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 94000 -summary: Introduce redirect method on IngestDocument -area: Ingest Node -type: enhancement -issues: [] From c76d5da552cf15bed8b8d8ac8559cceff0fbe87a Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Wed, 1 Mar 2023 09:16:06 +0100 Subject: [PATCH 08/30] Rename processor to reroute and add destination option --- docs/changelog/76511.yaml | 2 +- docs/reference/ingest/processors.asciidoc | 2 +- .../processors/data-stream-router.asciidoc | 43 ------------- .../ingest/processors/reroute.asciidoc | 60 +++++++++++++++++++ .../ingest/common/IngestCommonPlugin.java | 2 +- ...erProcessor.java => RerouteProcessor.java} | 27 +++++++-- ...java => RerouteProcessorFactoryTests.java} | 29 +++++++-- ...rTests.java => RerouteProcessorTests.java} | 28 ++++++--- ...rocessor.yml => 310_reroute_processor.yml} | 10 ++-- 9 files changed, 134 insertions(+), 69 deletions(-) delete mode 100644 docs/reference/ingest/processors/data-stream-router.asciidoc create mode 100644 docs/reference/ingest/processors/reroute.asciidoc rename modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/{DataStreamRouterProcessor.java => RerouteProcessor.java} (85%) rename modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/{DataStreamRouterProcessorFactoryTests.java => RerouteProcessorFactoryTests.java} (57%) rename modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/{DataStreamRouterProcessorTests.java => RerouteProcessorTests.java} (77%) rename modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/{310_data_stream_router_processor.yml => 310_reroute_processor.yml} (94%) diff --git a/docs/changelog/76511.yaml b/docs/changelog/76511.yaml index 5e6f26af31074..ef98c99a03f95 100644 --- a/docs/changelog/76511.yaml +++ b/docs/changelog/76511.yaml @@ -1,5 +1,5 @@ pr: 76511 -summary: Add `data_stream_router` processor +summary: Add `reroute` processor area: Ingest Node type: enhancement issues: [] diff --git a/docs/reference/ingest/processors.asciidoc b/docs/reference/ingest/processors.asciidoc index 81a37c96ffde3..d9c3ce1f858d7 100644 --- a/docs/reference/ingest/processors.asciidoc +++ b/docs/reference/ingest/processors.asciidoc @@ -39,7 +39,7 @@ include::processors/circle.asciidoc[] include::processors/community-id.asciidoc[] include::processors/convert.asciidoc[] include::processors/csv.asciidoc[] -include::processors/data-stream-router.asciidoc[] +include::processors/reroute.asciidoc[] include::processors/date.asciidoc[] include::processors/date-index-name.asciidoc[] include::processors/dissect.asciidoc[] diff --git a/docs/reference/ingest/processors/data-stream-router.asciidoc b/docs/reference/ingest/processors/data-stream-router.asciidoc deleted file mode 100644 index 8ef8050c407e0..0000000000000 --- a/docs/reference/ingest/processors/data-stream-router.asciidoc +++ /dev/null @@ -1,43 +0,0 @@ -[[data-stream-router-processor]] -=== Data stream router processor -++++ -Data stream router -++++ - -The `data_stream_router` processor allows to route a document from one data stream to another data stream. -It can use both static values or values from the document to determine the target data stream. - -The name of a data stream is comprised of three parts and looks like this: `--`. -See the {fleet-guide}/data-streams.html#data-streams-naming-scheme[data stream naming scheme] documentation for more details. - -NOTE: `data_stream_router` processor can only be used on data streams that follow the data streams naming scheme. -Trying to use this processor on a data stream with a non-compliant name will raise an exception. - -After a `data_stream_router` processor has been executed, all the other processors of the current pipeline are skipped. -This means that at most one `data_stream_router` processor is ever executed within a pipeline, -allowing to define mutually exclusive routing conditions, -similar to a if, else-if, else-if, … condition. - -[[data-stream-router-options]] -.Data stream router options -[options="header"] -|====== -| Name | Required | Default | Description -| `dataset` | no | - | A static value for the dataset part of the data stream name. In addition to the criteria for <>, cannot contain `-` and must be no longer than 100 characters. Example values are `nginx.access` and `nginx.error`. If not set, gets the value of the field `data_stream.dataset` from the document. When using values from the document, the processor replaces invalid characters with `_`. If the option is not set and the document also doesn't contain a corresponding field, it uses the `` part of the index name as a fallback. -| `namespace` | no | - | A static value for the namespace part of the data stream name. See the criteria for <> for allowed characters. Must be no longer than 100 characters. If not set, gets the value of the field `data_stream.namespace` from the document. When using values from the document, the processor replaces invalid characters with `_`. If the option is not set and the document also doesn't contain a corresponding field, it uses the `` part of the index name as a fallback. -include::common-options.asciidoc[] -|====== - -NOTE: It's not possible to change the `type` of the data stream by setting the `data_stream.type` in the document. - -[source,js] --------------------------------------------------- -{ - "data_stream_router": { - "tag": "nginx", - "if" : "ctx?.log?.file?.path?.contains('nginx')", - "dataset": "nginx" - } -} --------------------------------------------------- -// NOTCONSOLE diff --git a/docs/reference/ingest/processors/reroute.asciidoc b/docs/reference/ingest/processors/reroute.asciidoc new file mode 100644 index 0000000000000..b4b28928623a9 --- /dev/null +++ b/docs/reference/ingest/processors/reroute.asciidoc @@ -0,0 +1,60 @@ +[[reroute-processor]] +=== Reroute processor +++++ +Reroute +++++ + +The `reroute` processor allows to route a document to another target index or data stream. +It has two main modes: + +When setting the `destination` option, the target is explicitly specified and the `dataset` and `namespace` options can't be set. + +When the `destination` option is not set, this processor is in a data stream mode. +Note that in this mode, the `reroute` processor can only be used on data streams that follow the {fleet-guide}/data-streams.html#data-streams-naming-scheme[data stream naming scheme]. +Trying to use this processor on a data stream with a non-compliant name will raise an exception. + +The name of a data stream is comprised of three parts and looks like this: `--`. +See the {fleet-guide}/data-streams.html#data-streams-naming-scheme[data stream naming scheme] documentation for more details. + +It can use both static values or values from the document to determine the target. +See <> for more details. + +After a `reroute` processor has been executed, all the other processors of the current pipeline are skipped. +If the current pipeline is executed in the context of a <>, the calling pipeline will be skipped, too. +This means that at most one `reroute` processor is ever executed within a pipeline, +allowing to define mutually exclusive routing conditions, +similar to a if, else-if, else-if, … condition. + +Note that the client needs to have permissions to the final target. +Otherwise, the document will be rejected with a security exception which looks like this: + +[source,json] +-------------------------------------------------- +{"type":"security_exception","reason":"action [indices:admin/auto_create] is unauthorized for API key id [8-dt9H8BqGblnY2uSI--] of user [elastic/fleet-server] on indices [logs-foo-default], this action is granted by the index privileges [auto_configure,create_index,manage,all]"} +-------------------------------------------------- +// NOTCONSOLE + +[[reroute-options]] +.Reroute options +[options="header"] +|====== +| Name | Required | Default | Description +| `destination` | no | - | A static value for the target. Can't be set when the `dataset` or `namespace` option is set. +| `dataset` | no | - | A static value for the dataset part of the data stream name. In addition to the criteria for <>, cannot contain `-` and must be no longer than 100 characters. Example values are `nginx.access` and `nginx.error`. If not set, gets the value of the field `data_stream.dataset` from the document. When using values from the document, the processor replaces invalid characters with `_`. If the option is not set and the document also doesn't contain a corresponding field, it uses the `` part of the index name as a fallback. +| `namespace` | no | - | A static value for the namespace part of the data stream name. See the criteria for <> for allowed characters. Must be no longer than 100 characters. If not set, gets the value of the field `data_stream.namespace` from the document. When using values from the document, the processor replaces invalid characters with `_`. If the option is not set and the document also doesn't contain a corresponding field, it uses the `` part of the index name as a fallback. +include::common-options.asciidoc[] +|====== + +NOTE: It's not possible to change the `type` of the data stream by setting the `data_stream.type` in the document. + +[source,js] +-------------------------------------------------- +{ + "reroute": { + "tag": "nginx", + "if" : "ctx?.log?.file?.path?.contains('nginx')", + "dataset": "nginx" + } +} +-------------------------------------------------- +// NOTCONSOLE diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java index d64c7e9cbc6a5..df5dad576dec0 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java @@ -89,7 +89,7 @@ public Map getProcessors(Processor.Parameters paramet entry(FingerprintProcessor.TYPE, new FingerprintProcessor.Factory()), entry(RegisteredDomainProcessor.TYPE, new RegisteredDomainProcessor.Factory()), entry(RedactProcessor.TYPE, new RedactProcessor.Factory(matcherWatchdog)), - entry(DataStreamRouterProcessor.TYPE, new DataStreamRouterProcessor.Factory()) + entry(RerouteProcessor.TYPE, new RerouteProcessor.Factory()) ); } diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java similarity index 85% rename from modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java rename to modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index e949a17be8d14..bea5fcee102ad 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DataStreamRouterProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -19,8 +19,8 @@ import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException; -public final class DataStreamRouterProcessor extends AbstractProcessor { - public static final String TYPE = "data_stream_router"; +public final class RerouteProcessor extends AbstractProcessor { + public static final String TYPE = "reroute"; private static final String DATA_STREAM_PREFIX = "data_stream."; private static final String DATA_STREAM_TYPE = DATA_STREAM_PREFIX + "type"; @@ -32,11 +32,13 @@ public final class DataStreamRouterProcessor extends AbstractProcessor { private static final char REPLACEMENT_CHAR = '_'; private final String dataset; private final String namespace; + private final String destination; - DataStreamRouterProcessor(String tag, String description, String dataset, String namespace) { + RerouteProcessor(String tag, String description, String dataset, String namespace, String destination) { super(tag, description); this.dataset = dataset; this.namespace = namespace; + this.destination = destination; } private static String sanitizeDataStreamField(String s, char[] disallowedInDataset) { @@ -53,6 +55,10 @@ private static String sanitizeDataStreamField(String s, char[] disallowedInDatas @Override public IngestDocument execute(IngestDocument ingestDocument) throws Exception { + if (destination != null) { + ingestDocument.reroute(destination); + return ingestDocument; + } final String indexName = ingestDocument.getFieldValue(IngestDocument.Metadata.INDEX.getFieldName(), String.class); final String type; final String datasetFallback; @@ -74,7 +80,7 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { ingestDocument.setFieldValue(DATA_STREAM_TYPE, type); ingestDocument.setFieldValue(DATA_STREAM_DATASET, dataset); ingestDocument.setFieldValue(DATA_STREAM_NAMESPACE, namespace); - ingestDocument.redirect(type + "-" + dataset + "-" + namespace); + ingestDocument.reroute(type + "-" + dataset + "-" + namespace); return ingestDocument; } @@ -134,10 +140,14 @@ public String getDataStreamNamespace() { return namespace; } + public String getDestination() { + return destination; + } + public static final class Factory implements Processor.Factory { @Override - public DataStreamRouterProcessor create( + public RerouteProcessor create( Map processorFactories, String tag, String description, @@ -151,7 +161,12 @@ public DataStreamRouterProcessor create( if (Objects.equals(sanitizeDataStreamField(namespace, DISALLOWED_IN_NAMESPACE), namespace) == false) { throw newConfigurationException(TYPE, tag, "namespace", "contains illegal characters"); } - return new DataStreamRouterProcessor(tag, description, dataset, namespace); + String destination = ConfigurationUtils.readOptionalStringProperty(TYPE, tag, config, "destination"); + if (destination != null && (dataset != null || namespace != null)) { + throw newConfigurationException(TYPE, tag, "destination", "can only be set if dataset and namespace are not set"); + } + + return new RerouteProcessor(tag, description, dataset, namespace, destination); } } } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DataStreamRouterProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java similarity index 57% rename from modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DataStreamRouterProcessorFactoryTests.java rename to modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java index f9b50a3a8b407..b3d325a2d93d0 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DataStreamRouterProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java @@ -15,12 +15,13 @@ import java.util.HashMap; import java.util.Map; +import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.nullValue; -public class DataStreamRouterProcessorFactoryTests extends ESTestCase { +public class RerouteProcessorFactoryTests extends ESTestCase { public void testSuccess() throws Exception { - DataStreamRouterProcessor processor = create(null, null); + RerouteProcessor processor = create(null, null); assertThat(processor.getDataStreamDataset(), nullValue()); assertThat(processor.getDataStreamNamespace(), nullValue()); } @@ -35,7 +36,23 @@ public void testInvalidNamespace() throws Exception { assertThat(e.getMessage(), Matchers.equalTo("[namespace] contains illegal characters")); } - private static DataStreamRouterProcessor create(String dataset, String namespace) throws Exception { + public void testDestinationSuccess() throws Exception { + RerouteProcessor processor = create(Map.of("destination", "foo")); + assertThat(processor.getDataStreamDataset(), nullValue()); + assertThat(processor.getDataStreamNamespace(), nullValue()); + assertThat(processor.getDestination(), equalTo("foo")); + } + + public void testDestinationAndDataset() { + ElasticsearchParseException e = expectThrows( + ElasticsearchParseException.class, + () -> create(Map.of("destination", "foo", "dataset", "bar")) + ); + assertThat(e.getMessage(), Matchers.equalTo("[destination] can only be set if dataset and namespace are not set")); + } + + + private static RerouteProcessor create(String dataset, String namespace) throws Exception { Map config = new HashMap<>(); if (dataset != null) { config.put("dataset", dataset); @@ -43,6 +60,10 @@ private static DataStreamRouterProcessor create(String dataset, String namespace if (namespace != null) { config.put("namespace", namespace); } - return new DataStreamRouterProcessor.Factory().create(null, null, null, config); + return create(config); + } + + private static RerouteProcessor create(Map config) throws Exception { + return new RerouteProcessor.Factory().create(null, null, null, new HashMap<>(config)); } } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DataStreamRouterProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java similarity index 77% rename from modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DataStreamRouterProcessorTests.java rename to modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java index c86aa4bdc5a59..1dc1c4b209fd9 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DataStreamRouterProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java @@ -15,14 +15,17 @@ import org.elasticsearch.ingest.WrappingProcessor; import org.elasticsearch.test.ESTestCase; +import java.util.Map; + import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.nullValue; -public class DataStreamRouterProcessorTests extends ESTestCase { +public class RerouteProcessorTests extends ESTestCase { public void testDefaults() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - DataStreamRouterProcessor processor = new DataStreamRouterProcessor(null, null, null, null); + RerouteProcessor processor = new RerouteProcessor(null, null, null, null, null); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "generic", "default"); } @@ -30,8 +33,8 @@ public void testDefaults() throws Exception { public void testSkipFirstProcessor() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - DataStreamRouterProcessor skippedProcessor = new DataStreamRouterProcessor(null, null, "skip", null); - DataStreamRouterProcessor executedProcessor = new DataStreamRouterProcessor(null, null, "executed", null); + RerouteProcessor skippedProcessor = new RerouteProcessor(null, null, "skip", null, null); + RerouteProcessor executedProcessor = new RerouteProcessor(null, null, "executed", null, null); CompoundProcessor processor = new CompoundProcessor(new SkipProcessor(skippedProcessor), executedProcessor); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "executed", "default"); @@ -40,8 +43,8 @@ public void testSkipFirstProcessor() throws Exception { public void testSkipLastProcessor() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - DataStreamRouterProcessor executedProcessor = new DataStreamRouterProcessor(null, null, "executed", null); - DataStreamRouterProcessor skippedProcessor = new DataStreamRouterProcessor(null, null, "skip", null); + RerouteProcessor executedProcessor = new RerouteProcessor(null, null, "executed", null, null); + RerouteProcessor skippedProcessor = new RerouteProcessor(null, null, "skip", null, null); CompoundProcessor processor = new CompoundProcessor(executedProcessor, skippedProcessor); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "executed", "default"); @@ -52,7 +55,7 @@ public void testDataStreamFieldsFromDocument() throws Exception { ingestDocument.setFieldValue("data_stream.dataset", "foo"); ingestDocument.setFieldValue("data_stream.namespace", "bar"); - DataStreamRouterProcessor processor = new DataStreamRouterProcessor(null, null, null, null); + RerouteProcessor processor = new RerouteProcessor(null, null, null, null, null); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "foo", "bar"); } @@ -62,11 +65,20 @@ public void testInvalidDataStreamFieldsFromDocument() throws Exception { ingestDocument.setFieldValue("data_stream.dataset", "foo-bar"); ingestDocument.setFieldValue("data_stream.namespace", "baz#qux"); - DataStreamRouterProcessor processor = new DataStreamRouterProcessor(null, null, null, null); + RerouteProcessor processor = new RerouteProcessor(null, null, null, null, null); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "foo_bar", "baz_qux"); } + public void testDestination() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + + RerouteProcessor processor = new RerouteProcessor(null, null, null, null, "foo"); + processor.execute(ingestDocument); + assertFalse(ingestDocument.hasField("data_stream")); + assertThat(ingestDocument.getFieldValue("_index", String.class), equalTo("foo")); + } + private void assertDataSetFields(IngestDocument ingestDocument, String type, String dataset, String namespace) { assertThat(ingestDocument.getFieldValue("data_stream.type", String.class), equalTo(type)); assertThat(ingestDocument.getFieldValue("data_stream.dataset", String.class), equalTo(dataset)); diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/310_data_stream_router_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/310_reroute_processor.yml similarity index 94% rename from modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/310_data_stream_router_processor.yml rename to modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/310_reroute_processor.yml index 5952d279aa6d2..dbdd9b9d7e519 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/310_data_stream_router_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/310_reroute_processor.yml @@ -30,12 +30,12 @@ teardown: { "processors": [ { - "data_stream_router" : { + "reroute" : { "dataset" : "first" } }, { - "data_stream_router" : { + "reroute" : { "dataset" : "second" } } @@ -68,7 +68,7 @@ teardown: { "processors": [ { - "data_stream_router" : { + "reroute" : { "tag": "nginx", "if" : "ctx?.log?.file?.path?.contains('nginx')", "dataset": "nginx" @@ -94,14 +94,14 @@ teardown: { "processors": [ { - "data_stream_router": { + "reroute": { "tag": "nginx.access", "if": "ctx?.log?.file?.path?.contains('access')", "dataset": "nginx.access" } }, { - "data_stream_router": { + "reroute": { "tag": "nginx.error", "if": "ctx?.log?.file?.path?.contains('error')", "dataset": "nginx.error" From b514a6cd5b373599eae504d20fa40d538f6ff4e6 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Wed, 1 Mar 2023 09:26:31 +0100 Subject: [PATCH 09/30] Fix docs build --- docs/reference/ingest/processors/reroute.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/ingest/processors/reroute.asciidoc b/docs/reference/ingest/processors/reroute.asciidoc index b4b28928623a9..cd6a233933999 100644 --- a/docs/reference/ingest/processors/reroute.asciidoc +++ b/docs/reference/ingest/processors/reroute.asciidoc @@ -28,7 +28,7 @@ similar to a if, else-if, else-if, … condition. Note that the client needs to have permissions to the final target. Otherwise, the document will be rejected with a security exception which looks like this: -[source,json] +[source,js] -------------------------------------------------- {"type":"security_exception","reason":"action [indices:admin/auto_create] is unauthorized for API key id [8-dt9H8BqGblnY2uSI--] of user [elastic/fleet-server] on indices [logs-foo-default], this action is granted by the index privileges [auto_configure,create_index,manage,all]"} -------------------------------------------------- From 6c44e352bc778fb4ec1e31ce1a84536e292d4c10 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sat, 4 Mar 2023 15:43:00 +0100 Subject: [PATCH 10/30] Add support for field references and multiple fallbacks --- .../ingest/common/RerouteProcessor.java | 133 ++++++++++++------ .../common/RerouteProcessorFactoryTests.java | 14 +- .../ingest/common/RerouteProcessorTests.java | 94 +++++++++++-- .../ingest/ConfigurationUtils.java | 21 +++ 4 files changed, 205 insertions(+), 57 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index bea5fcee102ad..d01afe2584cfd 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -13,9 +13,12 @@ import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.Processor; +import java.util.Iterator; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Objects; +import java.util.function.Function; import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException; @@ -30,11 +33,19 @@ public final class RerouteProcessor extends AbstractProcessor { private static final char[] DISALLOWED_IN_NAMESPACE = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':' }; private static final int MAX_LENGTH = 100; private static final char REPLACEMENT_CHAR = '_'; - private final String dataset; - private final String namespace; + private final List dataset; + private final List namespace; private final String destination; - RerouteProcessor(String tag, String description, String dataset, String namespace, String destination) { + RerouteProcessor(List dataset, List namespace) { + this(null, null, dataset, namespace, null); + } + + RerouteProcessor(String destination) { + this(null, null, null, null, destination); + } + + RerouteProcessor(String tag, String description, List dataset, List namespace, String destination) { super(tag, description); this.dataset = dataset; this.namespace = namespace; @@ -53,6 +64,14 @@ private static String sanitizeDataStreamField(String s, char[] disallowedInDatas return s; } + private static String sanitizeDataset(String dataset) { + return sanitizeDataStreamField(dataset, DISALLOWED_IN_DATASET); + } + + private static String sanitizeNamespace(String namespace) { + return sanitizeDataStreamField(namespace, DISALLOWED_IN_NAMESPACE); + } + @Override public IngestDocument execute(IngestDocument ingestDocument) throws Exception { if (destination != null) { @@ -61,8 +80,8 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { } final String indexName = ingestDocument.getFieldValue(IngestDocument.Metadata.INDEX.getFieldName(), String.class); final String type; - final String datasetFallback; - final String namespaceFallback; + final String currentDataset; + final String currentNamespace; int indexOfFirstDash = indexName.indexOf('-'); if (indexOfFirstDash < 0) { throw createInvalidDataStreamNameException(indexName); @@ -72,15 +91,19 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { throw createInvalidDataStreamNameException(indexName); } type = parseDataStreamType(indexName, indexOfFirstDash); - datasetFallback = parseDataStreamDataset(indexName, indexOfFirstDash, indexOfSecondDash); - namespaceFallback = parseDataStreamNamespace(indexName, indexOfSecondDash); + currentDataset = parseDataStreamDataset(indexName, indexOfFirstDash, indexOfSecondDash); + currentNamespace = parseDataStreamNamespace(indexName, indexOfSecondDash); - String dataset = getDataset(ingestDocument, datasetFallback); - String namespace = getNamespace(ingestDocument, namespaceFallback); + String dataset = determineDataset(ingestDocument, currentDataset); + String namespace = determineNamespace(ingestDocument, currentNamespace); + if (dataset == null || namespace == null) { + return ingestDocument; + } + String newTarget = type + "-" + dataset + "-" + namespace; + ingestDocument.reroute(newTarget); ingestDocument.setFieldValue(DATA_STREAM_TYPE, type); ingestDocument.setFieldValue(DATA_STREAM_DATASET, dataset); ingestDocument.setFieldValue(DATA_STREAM_NAMESPACE, namespace); - ingestDocument.reroute(type + "-" + dataset + "-" + namespace); return ingestDocument; } @@ -102,29 +125,50 @@ private static String parseDataStreamNamespace(String dataStreamName, int indexO return dataStreamName.substring(indexOfSecondDash + 1); } - private String getDataset(IngestDocument ingestDocument, String datasetFallback) { - String dataset = this.dataset; - if (dataset == null) { - dataset = sanitizeDataStreamField(ingestDocument.getFieldValue(DATA_STREAM_DATASET, String.class, true), DISALLOWED_IN_DATASET); - } - if (dataset == null) { - dataset = datasetFallback; - } - return dataset; + private String determineDataset(IngestDocument ingestDocument, String currentDataset) { + return determineDataStreamField(ingestDocument, dataset, currentDataset, RerouteProcessor::sanitizeDataset, DATA_STREAM_DATASET); + } + + private String determineNamespace(IngestDocument ingestDocument, String currentNamespace) { + return determineDataStreamField( + ingestDocument, + namespace, + currentNamespace, + RerouteProcessor::sanitizeNamespace, + DATA_STREAM_NAMESPACE + ); } - private String getNamespace(IngestDocument ingestDocument, String namespaceFallback) { - String namespace = this.namespace; - if (namespace == null) { - namespace = sanitizeDataStreamField( - ingestDocument.getFieldValue(DATA_STREAM_NAMESPACE, String.class, true), - DISALLOWED_IN_NAMESPACE - ); + private String determineDataStreamField( + IngestDocument ingestDocument, + List valueSources, + String fromCurrentTarget, + Function sanitization, + String dataStreamFieldName + ) { + String result = null; + for (Iterator iterator = valueSources.iterator(); iterator.hasNext(); ) { + String value = iterator.next(); + if (value.startsWith("{") && value.endsWith("}")) { + String fieldReference = value.substring(1, value.length() - 1); + result = sanitization.apply(ingestDocument.getFieldValue(fieldReference, String.class, true)); + if (fieldReference.equals(dataStreamFieldName) && fromCurrentTarget.equals(result)) { + result = null; + } + } else { + result = value; + } + if (result != null) { + break; + } } - if (namespace == null) { - namespace = namespaceFallback; + if (result == null) { + result = sanitization.apply(ingestDocument.getFieldValue(dataStreamFieldName, String.class, true)); } - return namespace; + if (result == null) { + result = fromCurrentTarget; + } + return result; } @Override @@ -132,11 +176,11 @@ public String getType() { return TYPE; } - public String getDataStreamDataset() { + public List getDataStreamDataset() { return dataset; } - public String getDataStreamNamespace() { + public List getDataStreamNamespace() { return namespace; } @@ -153,16 +197,25 @@ public RerouteProcessor create( String description, Map config ) throws Exception { - String dataset = ConfigurationUtils.readOptionalStringProperty(TYPE, tag, config, "dataset"); - if (Objects.equals(sanitizeDataStreamField(dataset, DISALLOWED_IN_DATASET), dataset) == false) { - throw newConfigurationException(TYPE, tag, "dataset", "contains illegal characters"); - } - String namespace = ConfigurationUtils.readOptionalStringProperty(TYPE, tag, config, "namespace"); - if (Objects.equals(sanitizeDataStreamField(namespace, DISALLOWED_IN_NAMESPACE), namespace) == false) { - throw newConfigurationException(TYPE, tag, "namespace", "contains illegal characters"); - } + List dataset = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "dataset"); + dataset.stream() + .filter(ds -> ds.startsWith("{") == false) + .filter(ds -> Objects.equals(sanitizeDataset(ds), ds) == false) + .findAny() + .ifPresent(ds -> { + throw newConfigurationException(TYPE, tag, "dataset", "'" + ds + "' contains disallowed characters"); + }); + List namespace = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "namespace"); + namespace.stream() + .filter(ns -> ns.startsWith("{") == false) + .filter(ns -> Objects.equals(sanitizeNamespace(ns), ns) == false) + .findAny() + .ifPresent(ns -> { + throw newConfigurationException(TYPE, tag, "namespace", "'" + ns + "' contains disallowed characters"); + }); + String destination = ConfigurationUtils.readOptionalStringProperty(TYPE, tag, config, "destination"); - if (destination != null && (dataset != null || namespace != null)) { + if (destination != null && (dataset.isEmpty() == false || namespace.isEmpty() == false)) { throw newConfigurationException(TYPE, tag, "destination", "can only be set if dataset and namespace are not set"); } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java index b3d325a2d93d0..d887ff0da3cf2 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java @@ -13,33 +13,33 @@ import org.hamcrest.Matchers; import java.util.HashMap; +import java.util.List; import java.util.Map; import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.nullValue; public class RerouteProcessorFactoryTests extends ESTestCase { public void testSuccess() throws Exception { RerouteProcessor processor = create(null, null); - assertThat(processor.getDataStreamDataset(), nullValue()); - assertThat(processor.getDataStreamNamespace(), nullValue()); + assertThat(processor.getDataStreamDataset(), equalTo(List.of())); + assertThat(processor.getDataStreamNamespace(), equalTo(List.of())); } public void testInvalidDataset() throws Exception { ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> create("my-service", null)); - assertThat(e.getMessage(), Matchers.equalTo("[dataset] contains illegal characters")); + assertThat(e.getMessage(), Matchers.equalTo("[dataset] 'my-service' contains disallowed characters")); } public void testInvalidNamespace() throws Exception { ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> create("generic", "foo:bar")); - assertThat(e.getMessage(), Matchers.equalTo("[namespace] contains illegal characters")); + assertThat(e.getMessage(), Matchers.equalTo("[namespace] 'foo:bar' contains disallowed characters")); } public void testDestinationSuccess() throws Exception { RerouteProcessor processor = create(Map.of("destination", "foo")); - assertThat(processor.getDataStreamDataset(), nullValue()); - assertThat(processor.getDataStreamNamespace(), nullValue()); + assertThat(processor.getDataStreamDataset(), equalTo(List.of())); + assertThat(processor.getDataStreamNamespace(), equalTo(List.of())); assertThat(processor.getDestination(), equalTo("foo")); } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java index 1dc1c4b209fd9..865a2bc75b5a9 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java @@ -12,20 +12,20 @@ import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.Processor; import org.elasticsearch.ingest.RandomDocumentPicks; +import org.elasticsearch.ingest.TestProcessor; import org.elasticsearch.ingest.WrappingProcessor; import org.elasticsearch.test.ESTestCase; -import java.util.Map; +import java.util.List; import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.nullValue; public class RerouteProcessorTests extends ESTestCase { public void testDefaults() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor processor = new RerouteProcessor(null, null, null, null, null); + RerouteProcessor processor = new RerouteProcessor(List.of(), List.of()); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "generic", "default"); } @@ -33,8 +33,8 @@ public void testDefaults() throws Exception { public void testSkipFirstProcessor() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor skippedProcessor = new RerouteProcessor(null, null, "skip", null, null); - RerouteProcessor executedProcessor = new RerouteProcessor(null, null, "executed", null, null); + RerouteProcessor skippedProcessor = new RerouteProcessor(List.of("skip"), List.of()); + RerouteProcessor executedProcessor = new RerouteProcessor(List.of("executed"), List.of()); CompoundProcessor processor = new CompoundProcessor(new SkipProcessor(skippedProcessor), executedProcessor); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "executed", "default"); @@ -43,8 +43,8 @@ public void testSkipFirstProcessor() throws Exception { public void testSkipLastProcessor() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor executedProcessor = new RerouteProcessor(null, null, "executed", null, null); - RerouteProcessor skippedProcessor = new RerouteProcessor(null, null, "skip", null, null); + RerouteProcessor executedProcessor = new RerouteProcessor(List.of("executed"), List.of()); + RerouteProcessor skippedProcessor = new RerouteProcessor(List.of("skip"), List.of()); CompoundProcessor processor = new CompoundProcessor(executedProcessor, skippedProcessor); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "executed", "default"); @@ -55,7 +55,7 @@ public void testDataStreamFieldsFromDocument() throws Exception { ingestDocument.setFieldValue("data_stream.dataset", "foo"); ingestDocument.setFieldValue("data_stream.namespace", "bar"); - RerouteProcessor processor = new RerouteProcessor(null, null, null, null, null); + RerouteProcessor processor = new RerouteProcessor(List.of(), List.of()); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "foo", "bar"); } @@ -65,7 +65,7 @@ public void testInvalidDataStreamFieldsFromDocument() throws Exception { ingestDocument.setFieldValue("data_stream.dataset", "foo-bar"); ingestDocument.setFieldValue("data_stream.namespace", "baz#qux"); - RerouteProcessor processor = new RerouteProcessor(null, null, null, null, null); + RerouteProcessor processor = new RerouteProcessor(List.of(), List.of()); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "foo_bar", "baz_qux"); } @@ -73,12 +73,86 @@ public void testInvalidDataStreamFieldsFromDocument() throws Exception { public void testDestination() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor processor = new RerouteProcessor(null, null, null, null, "foo"); + RerouteProcessor processor = new RerouteProcessor("foo"); processor.execute(ingestDocument); assertFalse(ingestDocument.hasField("data_stream")); assertThat(ingestDocument.getFieldValue("_index", String.class), equalTo("foo")); } + public void testFieldReference() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + ingestDocument.setFieldValue("service.name", "opbeans-java"); + ingestDocument.setFieldValue("service.environment", "dev"); + + RerouteProcessor processor = new RerouteProcessor(List.of("{service.name}"), List.of("{service.environment}")); + processor.execute(ingestDocument); + assertDataSetFields(ingestDocument, "logs", "opbeans_java", "dev"); + } + + public void testRerouteToCurrentTarget() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + + RerouteProcessor reroute = new RerouteProcessor(List.of("generic"), List.of("default")); + CompoundProcessor processor = new CompoundProcessor( + reroute, + new TestProcessor(doc -> doc.setFieldValue("pipeline_is_continued", true)) + ); + processor.execute(ingestDocument); + assertDataSetFields(ingestDocument, "logs", "generic", "default"); + assertFalse(ingestDocument.hasField("pipeline_is_continued")); + } + + public void testFieldReferenceWithMissingReroutesToCurrentTarget() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + + RerouteProcessor reroute = new RerouteProcessor(List.of("{service.name}"), List.of("{service.environment}")); + CompoundProcessor processor = new CompoundProcessor( + reroute, + new TestProcessor(doc -> doc.setFieldValue("pipeline_is_continued", true)) + ); + processor.execute(ingestDocument); + assertThat(ingestDocument.getFieldValue("_index", String.class), equalTo("logs-generic-default")); + assertDataSetFields(ingestDocument, "logs", "generic", "default"); + assertFalse(ingestDocument.hasField("pipeline_is_continued")); + } + + public void testDataStreamFieldReference() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + ingestDocument.setFieldValue("data_stream.dataset", "dataset_from_doc"); + ingestDocument.setFieldValue("data_stream.namespace", "namespace_from_doc"); + + RerouteProcessor processor = new RerouteProcessor( + List.of("{data_stream.dataset}", "fallback"), + List.of("{data_stream.namespace}", "fallback") + ); + processor.execute(ingestDocument); + assertDataSetFields(ingestDocument, "logs", "dataset_from_doc", "namespace_from_doc"); + } + + public void testDatasetFieldReferenceMissingValue() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + + RerouteProcessor processor = new RerouteProcessor( + List.of("{data_stream.dataset}", "fallback"), + List.of("{data_stream.namespace}", "fallback") + ); + processor.execute(ingestDocument); + assertDataSetFields(ingestDocument, "logs", "fallback", "fallback"); + } + + public void testDatasetFieldReference() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + ingestDocument.setFieldValue("data_stream.dataset", "generic"); + ingestDocument.setFieldValue("data_stream.namespace", "default"); + + RerouteProcessor processor = new RerouteProcessor( + List.of("{data_stream.dataset}", "fallback"), + List.of("{data_stream.namespace}", "fallback") + ); + processor.execute(ingestDocument); + assertDataSetFields(ingestDocument, "logs", "fallback", "fallback"); + } + private void assertDataSetFields(IngestDocument ingestDocument, String type, String dataset, String namespace) { assertThat(ingestDocument.getFieldValue("data_stream.type", String.class), equalTo(type)); assertThat(ingestDocument.getFieldValue("data_stream.dataset", String.class), equalTo(dataset)); diff --git a/server/src/main/java/org/elasticsearch/ingest/ConfigurationUtils.java b/server/src/main/java/org/elasticsearch/ingest/ConfigurationUtils.java index e8dce119d343b..df5104e9164ab 100644 --- a/server/src/main/java/org/elasticsearch/ingest/ConfigurationUtils.java +++ b/server/src/main/java/org/elasticsearch/ingest/ConfigurationUtils.java @@ -318,6 +318,27 @@ public static List readOptionalList( return readList(processorType, processorTag, propertyName, value); } + /** + * Returns and removes the specified property of type list from the specified configuration map. + * + * If the property value isn't of type list or string an {@link ElasticsearchParseException} is thrown. + */ + public static List readOptionalListOrString( + String processorType, + String processorTag, + Map configuration, + String propertyName + ) { + Object value = configuration.remove(propertyName); + if (value == null) { + return List.of(); + } + if (value instanceof String) { + return List.of(readString(processorType, processorTag, propertyName, value)); + } + return readList(processorType, processorTag, propertyName, value); + } + /** * Returns and removes the specified property of type list from the specified configuration map. * From 4888843c845983811a7aee345efedf322ba26994 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 9 Mar 2023 10:21:50 +0100 Subject: [PATCH 11/30] Add skip_if_target_unchanged option --- .../ingest/common/RerouteProcessor.java | 41 ++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index d01afe2584cfd..f55614164c7ca 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -36,20 +36,29 @@ public final class RerouteProcessor extends AbstractProcessor { private final List dataset; private final List namespace; private final String destination; + private final boolean skipIfTargetUnchanged; RerouteProcessor(List dataset, List namespace) { - this(null, null, dataset, namespace, null); + this(null, null, dataset, namespace, null, false); } RerouteProcessor(String destination) { - this(null, null, null, null, destination); + this(null, null, null, null, destination, false); } - RerouteProcessor(String tag, String description, List dataset, List namespace, String destination) { + RerouteProcessor( + String tag, + String description, + List dataset, + List namespace, + String destination, + boolean skipIfTargetUnchanged + ) { super(tag, description); this.dataset = dataset; this.namespace = namespace; this.destination = destination; + this.skipIfTargetUnchanged = skipIfTargetUnchanged; } private static String sanitizeDataStreamField(String s, char[] disallowedInDataset) { @@ -78,21 +87,21 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { ingestDocument.reroute(destination); return ingestDocument; } - final String indexName = ingestDocument.getFieldValue(IngestDocument.Metadata.INDEX.getFieldName(), String.class); + final String currentTarget = ingestDocument.getFieldValue(IngestDocument.Metadata.INDEX.getFieldName(), String.class); final String type; final String currentDataset; final String currentNamespace; - int indexOfFirstDash = indexName.indexOf('-'); + int indexOfFirstDash = currentTarget.indexOf('-'); if (indexOfFirstDash < 0) { - throw createInvalidDataStreamNameException(indexName); + throw createInvalidDataStreamNameException(currentTarget); } - int indexOfSecondDash = indexName.indexOf('-', indexOfFirstDash + 1); + int indexOfSecondDash = currentTarget.indexOf('-', indexOfFirstDash + 1); if (indexOfSecondDash < 0) { - throw createInvalidDataStreamNameException(indexName); + throw createInvalidDataStreamNameException(currentTarget); } - type = parseDataStreamType(indexName, indexOfFirstDash); - currentDataset = parseDataStreamDataset(indexName, indexOfFirstDash, indexOfSecondDash); - currentNamespace = parseDataStreamNamespace(indexName, indexOfSecondDash); + type = parseDataStreamType(currentTarget, indexOfFirstDash); + currentDataset = parseDataStreamDataset(currentTarget, indexOfFirstDash, indexOfSecondDash); + currentNamespace = parseDataStreamNamespace(currentTarget, indexOfSecondDash); String dataset = determineDataset(ingestDocument, currentDataset); String namespace = determineNamespace(ingestDocument, currentNamespace); @@ -100,6 +109,9 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { return ingestDocument; } String newTarget = type + "-" + dataset + "-" + namespace; + if (newTarget.equals(currentTarget) && skipIfTargetUnchanged) { + return ingestDocument; + } ingestDocument.reroute(newTarget); ingestDocument.setFieldValue(DATA_STREAM_TYPE, type); ingestDocument.setFieldValue(DATA_STREAM_DATASET, dataset); @@ -152,9 +164,6 @@ private String determineDataStreamField( if (value.startsWith("{") && value.endsWith("}")) { String fieldReference = value.substring(1, value.length() - 1); result = sanitization.apply(ingestDocument.getFieldValue(fieldReference, String.class, true)); - if (fieldReference.equals(dataStreamFieldName) && fromCurrentTarget.equals(result)) { - result = null; - } } else { result = value; } @@ -218,8 +227,10 @@ public RerouteProcessor create( if (destination != null && (dataset.isEmpty() == false || namespace.isEmpty() == false)) { throw newConfigurationException(TYPE, tag, "destination", "can only be set if dataset and namespace are not set"); } + boolean skipIfTargetUnchanged = ConfigurationUtils.readBooleanProperty(TYPE, tag, config, "skip_if_target_unchanged", false); + - return new RerouteProcessor(tag, description, dataset, namespace, destination); + return new RerouteProcessor(tag, description, dataset, namespace, destination, skipIfTargetUnchanged); } } } From 26b3c793416c9cf9b8a266f5e5226e2a111199dc Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sun, 19 Mar 2023 18:34:45 -0400 Subject: [PATCH 12/30] Use mustache scripts instead of custom field reference syntax --- .../ingest/common/IngestCommonPlugin.java | 2 +- .../ingest/common/RerouteProcessor.java | 110 +++++++++++------- .../common/RerouteProcessorFactoryTests.java | 7 +- .../ingest/common/RerouteProcessorTests.java | 70 +++++------ 4 files changed, 107 insertions(+), 82 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java index df5dad576dec0..2da82606430e5 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java @@ -89,7 +89,7 @@ public Map getProcessors(Processor.Parameters paramet entry(FingerprintProcessor.TYPE, new FingerprintProcessor.Factory()), entry(RegisteredDomainProcessor.TYPE, new RegisteredDomainProcessor.Factory()), entry(RedactProcessor.TYPE, new RedactProcessor.Factory(matcherWatchdog)), - entry(RerouteProcessor.TYPE, new RerouteProcessor.Factory()) + entry(RerouteProcessor.TYPE, new RerouteProcessor.Factory(parameters.scriptService)) ); } diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index f55614164c7ca..e9f0a0188c9bc 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -8,12 +8,14 @@ package org.elasticsearch.ingest.common; +import org.elasticsearch.common.Strings; import org.elasticsearch.ingest.AbstractProcessor; import org.elasticsearch.ingest.ConfigurationUtils; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.Processor; +import org.elasticsearch.script.ScriptService; +import org.elasticsearch.script.TemplateScript; -import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; @@ -33,25 +35,25 @@ public final class RerouteProcessor extends AbstractProcessor { private static final char[] DISALLOWED_IN_NAMESPACE = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':' }; private static final int MAX_LENGTH = 100; private static final char REPLACEMENT_CHAR = '_'; - private final List dataset; - private final List namespace; - private final String destination; + private final List dataset; + private final List namespace; + private final TemplateScript.Factory destination; private final boolean skipIfTargetUnchanged; - RerouteProcessor(List dataset, List namespace) { + RerouteProcessor(List dataset, List namespace) { this(null, null, dataset, namespace, null, false); } - RerouteProcessor(String destination) { + RerouteProcessor(TemplateScript.Factory destination) { this(null, null, null, null, destination, false); } RerouteProcessor( String tag, String description, - List dataset, - List namespace, - String destination, + List dataset, + List namespace, + TemplateScript.Factory destination, boolean skipIfTargetUnchanged ) { super(tag, description); @@ -84,7 +86,7 @@ private static String sanitizeNamespace(String namespace) { @Override public IngestDocument execute(IngestDocument ingestDocument) throws Exception { if (destination != null) { - ingestDocument.reroute(destination); + ingestDocument.reroute(ingestDocument.renderTemplate(destination)); return ingestDocument; } final String currentTarget = ingestDocument.getFieldValue(IngestDocument.Metadata.INDEX.getFieldName(), String.class); @@ -153,31 +155,25 @@ private String determineNamespace(IngestDocument ingestDocument, String currentN private String determineDataStreamField( IngestDocument ingestDocument, - List valueSources, + List valueSources, String fromCurrentTarget, Function sanitization, String dataStreamFieldName ) { - String result = null; - for (Iterator iterator = valueSources.iterator(); iterator.hasNext(); ) { - String value = iterator.next(); - if (value.startsWith("{") && value.endsWith("}")) { - String fieldReference = value.substring(1, value.length() - 1); - result = sanitization.apply(ingestDocument.getFieldValue(fieldReference, String.class, true)); - } else { - result = value; - } - if (result != null) { + String result = ""; + for (TemplateScript.Factory value : valueSources) { + result = ingestDocument.renderTemplate(value); + if (Strings.isNullOrEmpty(result) == false) { break; } } - if (result == null) { - result = sanitization.apply(ingestDocument.getFieldValue(dataStreamFieldName, String.class, true)); + if (Strings.isNullOrEmpty(result)) { + result = ingestDocument.getFieldValue(dataStreamFieldName, String.class, true); } - if (result == null) { + if (Strings.isNullOrEmpty(result)) { result = fromCurrentTarget; } - return result; + return sanitization.apply(result); } @Override @@ -185,20 +181,26 @@ public String getType() { return TYPE; } - public List getDataStreamDataset() { + public List getDataStreamDataset() { return dataset; } - public List getDataStreamNamespace() { + public List getDataStreamNamespace() { return namespace; } - public String getDestination() { + public TemplateScript.Factory getDestination() { return destination; } public static final class Factory implements Processor.Factory { + private final ScriptService scriptService; + + public Factory(ScriptService scriptService) { + this.scriptService = scriptService; + } + @Override public RerouteProcessor create( Map processorFactories, @@ -207,30 +209,50 @@ public RerouteProcessor create( Map config ) throws Exception { List dataset = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "dataset"); - dataset.stream() - .filter(ds -> ds.startsWith("{") == false) - .filter(ds -> Objects.equals(sanitizeDataset(ds), ds) == false) - .findAny() - .ifPresent(ds -> { - throw newConfigurationException(TYPE, tag, "dataset", "'" + ds + "' contains disallowed characters"); - }); + validateDataStreamValue(tag, dataset, "dataset", RerouteProcessor::sanitizeDataset); + List datasetTemplate = dataset.stream() + .map(ds -> ConfigurationUtils.compileTemplate(TYPE, tag, "dataset", ds, scriptService)) + .toList(); List namespace = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "namespace"); - namespace.stream() - .filter(ns -> ns.startsWith("{") == false) - .filter(ns -> Objects.equals(sanitizeNamespace(ns), ns) == false) - .findAny() - .ifPresent(ns -> { - throw newConfigurationException(TYPE, tag, "namespace", "'" + ns + "' contains disallowed characters"); - }); - + validateDataStreamValue(tag, namespace, "namespace", RerouteProcessor::sanitizeNamespace); + List namespaceTemplate = namespace.stream() + .map(nsp -> ConfigurationUtils.compileTemplate(TYPE, tag, "namespace", nsp, scriptService)) + .toList(); String destination = ConfigurationUtils.readOptionalStringProperty(TYPE, tag, config, "destination"); + TemplateScript.Factory destinationTemplate = null; + if (destination != null) { + destinationTemplate = ConfigurationUtils.compileTemplate( + TYPE, + tag, + "destination", + destination, + scriptService + ); + } + if (destination != null && (dataset.isEmpty() == false || namespace.isEmpty() == false)) { throw newConfigurationException(TYPE, tag, "destination", "can only be set if dataset and namespace are not set"); } boolean skipIfTargetUnchanged = ConfigurationUtils.readBooleanProperty(TYPE, tag, config, "skip_if_target_unchanged", false); + return new RerouteProcessor(tag, description, datasetTemplate, namespaceTemplate, destinationTemplate, skipIfTargetUnchanged); + } - return new RerouteProcessor(tag, description, dataset, namespace, destination, skipIfTargetUnchanged); + private static void validateDataStreamValue( + String tag, + List dataset, + String dataStreamComponent, + Function sanitizer + ) { + dataset.stream() + .filter(ds -> ds.contains("{{") == false) + .filter(ds -> Objects.equals(sanitizer.apply(ds), ds) == false) + .findAny() + .ifPresent( + ds -> { + throw newConfigurationException(TYPE, tag, dataStreamComponent, "'" + ds + "' contains disallowed characters"); + } + ); } } } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java index d887ff0da3cf2..97b0e6562990a 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java @@ -9,6 +9,7 @@ package org.elasticsearch.ingest.common; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.script.ScriptService; import org.elasticsearch.test.ESTestCase; import org.hamcrest.Matchers; @@ -17,6 +18,7 @@ import java.util.Map; import static org.hamcrest.CoreMatchers.equalTo; +import static org.mockito.Mockito.mock; public class RerouteProcessorFactoryTests extends ESTestCase { @@ -40,7 +42,7 @@ public void testDestinationSuccess() throws Exception { RerouteProcessor processor = create(Map.of("destination", "foo")); assertThat(processor.getDataStreamDataset(), equalTo(List.of())); assertThat(processor.getDataStreamNamespace(), equalTo(List.of())); - assertThat(processor.getDestination(), equalTo("foo")); + assertThat(processor.getDestination().newInstance(Map.of()).execute(), equalTo("foo")); } public void testDestinationAndDataset() { @@ -51,7 +53,6 @@ public void testDestinationAndDataset() { assertThat(e.getMessage(), Matchers.equalTo("[destination] can only be set if dataset and namespace are not set")); } - private static RerouteProcessor create(String dataset, String namespace) throws Exception { Map config = new HashMap<>(); if (dataset != null) { @@ -64,6 +65,6 @@ private static RerouteProcessor create(String dataset, String namespace) throws } private static RerouteProcessor create(Map config) throws Exception { - return new RerouteProcessor.Factory().create(null, null, null, new HashMap<>(config)); + return new RerouteProcessor.Factory(mock(ScriptService.class)).create(null, null, null, new HashMap<>(config)); } } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java index 865a2bc75b5a9..fde40efa46e11 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java @@ -13,10 +13,13 @@ import org.elasticsearch.ingest.Processor; import org.elasticsearch.ingest.RandomDocumentPicks; import org.elasticsearch.ingest.TestProcessor; +import org.elasticsearch.ingest.TestTemplateService; import org.elasticsearch.ingest.WrappingProcessor; +import org.elasticsearch.script.TemplateScript; import org.elasticsearch.test.ESTestCase; import java.util.List; +import java.util.stream.Collectors; import static org.hamcrest.Matchers.equalTo; @@ -25,7 +28,7 @@ public class RerouteProcessorTests extends ESTestCase { public void testDefaults() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor processor = new RerouteProcessor(List.of(), List.of()); + RerouteProcessor processor = createRerouteProcessor(List.of(), List.of()); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "generic", "default"); } @@ -33,8 +36,8 @@ public void testDefaults() throws Exception { public void testSkipFirstProcessor() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor skippedProcessor = new RerouteProcessor(List.of("skip"), List.of()); - RerouteProcessor executedProcessor = new RerouteProcessor(List.of("executed"), List.of()); + RerouteProcessor skippedProcessor = createRerouteProcessor(List.of("skip"), List.of()); + RerouteProcessor executedProcessor = createRerouteProcessor(List.of("executed"), List.of()); CompoundProcessor processor = new CompoundProcessor(new SkipProcessor(skippedProcessor), executedProcessor); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "executed", "default"); @@ -43,8 +46,8 @@ public void testSkipFirstProcessor() throws Exception { public void testSkipLastProcessor() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor executedProcessor = new RerouteProcessor(List.of("executed"), List.of()); - RerouteProcessor skippedProcessor = new RerouteProcessor(List.of("skip"), List.of()); + RerouteProcessor executedProcessor = createRerouteProcessor(List.of("executed"), List.of()); + RerouteProcessor skippedProcessor = createRerouteProcessor(List.of("skip"), List.of()); CompoundProcessor processor = new CompoundProcessor(executedProcessor, skippedProcessor); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "executed", "default"); @@ -55,7 +58,7 @@ public void testDataStreamFieldsFromDocument() throws Exception { ingestDocument.setFieldValue("data_stream.dataset", "foo"); ingestDocument.setFieldValue("data_stream.namespace", "bar"); - RerouteProcessor processor = new RerouteProcessor(List.of(), List.of()); + RerouteProcessor processor = createRerouteProcessor(List.of(), List.of()); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "foo", "bar"); } @@ -65,7 +68,7 @@ public void testInvalidDataStreamFieldsFromDocument() throws Exception { ingestDocument.setFieldValue("data_stream.dataset", "foo-bar"); ingestDocument.setFieldValue("data_stream.namespace", "baz#qux"); - RerouteProcessor processor = new RerouteProcessor(List.of(), List.of()); + RerouteProcessor processor = createRerouteProcessor(List.of(), List.of()); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "foo_bar", "baz_qux"); } @@ -73,26 +76,16 @@ public void testInvalidDataStreamFieldsFromDocument() throws Exception { public void testDestination() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor processor = new RerouteProcessor("foo"); + RerouteProcessor processor = createRerouteProcessor("foo"); processor.execute(ingestDocument); assertFalse(ingestDocument.hasField("data_stream")); assertThat(ingestDocument.getFieldValue("_index", String.class), equalTo("foo")); } - public void testFieldReference() throws Exception { - IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - ingestDocument.setFieldValue("service.name", "opbeans-java"); - ingestDocument.setFieldValue("service.environment", "dev"); - - RerouteProcessor processor = new RerouteProcessor(List.of("{service.name}"), List.of("{service.environment}")); - processor.execute(ingestDocument); - assertDataSetFields(ingestDocument, "logs", "opbeans_java", "dev"); - } - public void testRerouteToCurrentTarget() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor reroute = new RerouteProcessor(List.of("generic"), List.of("default")); + RerouteProcessor reroute = createRerouteProcessor(List.of("generic"), List.of("default")); CompoundProcessor processor = new CompoundProcessor( reroute, new TestProcessor(doc -> doc.setFieldValue("pipeline_is_continued", true)) @@ -105,7 +98,7 @@ public void testRerouteToCurrentTarget() throws Exception { public void testFieldReferenceWithMissingReroutesToCurrentTarget() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor reroute = new RerouteProcessor(List.of("{service.name}"), List.of("{service.environment}")); + RerouteProcessor reroute = createRerouteProcessor(List.of(""), List.of("")); CompoundProcessor processor = new CompoundProcessor( reroute, new TestProcessor(doc -> doc.setFieldValue("pipeline_is_continued", true)) @@ -118,12 +111,10 @@ public void testFieldReferenceWithMissingReroutesToCurrentTarget() throws Except public void testDataStreamFieldReference() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - ingestDocument.setFieldValue("data_stream.dataset", "dataset_from_doc"); - ingestDocument.setFieldValue("data_stream.namespace", "namespace_from_doc"); - RerouteProcessor processor = new RerouteProcessor( - List.of("{data_stream.dataset}", "fallback"), - List.of("{data_stream.namespace}", "fallback") + RerouteProcessor processor = createRerouteProcessor( + List.of("dataset_from_doc", "fallback"), + List.of("namespace_from_doc", "fallback") ); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "dataset_from_doc", "namespace_from_doc"); @@ -132,9 +123,9 @@ public void testDataStreamFieldReference() throws Exception { public void testDatasetFieldReferenceMissingValue() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor processor = new RerouteProcessor( - List.of("{data_stream.dataset}", "fallback"), - List.of("{data_stream.namespace}", "fallback") + RerouteProcessor processor = createRerouteProcessor( + List.of("", "fallback"), + List.of("", "fallback") ); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "fallback", "fallback"); @@ -142,15 +133,13 @@ public void testDatasetFieldReferenceMissingValue() throws Exception { public void testDatasetFieldReference() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - ingestDocument.setFieldValue("data_stream.dataset", "generic"); - ingestDocument.setFieldValue("data_stream.namespace", "default"); - RerouteProcessor processor = new RerouteProcessor( - List.of("{data_stream.dataset}", "fallback"), - List.of("{data_stream.namespace}", "fallback") + RerouteProcessor processor = createRerouteProcessor( + List.of("generic", "fallback"), + List.of("default", "fallback") ); processor.execute(ingestDocument); - assertDataSetFields(ingestDocument, "logs", "fallback", "fallback"); + assertDataSetFields(ingestDocument, "logs", "generic", "default"); } private void assertDataSetFields(IngestDocument ingestDocument, String type, String dataset, String namespace) { @@ -166,7 +155,20 @@ private static IngestDocument createIngestDocument(String dataStream) { return ingestDocument; } + private RerouteProcessor createRerouteProcessor(String destination) { + return new RerouteProcessor(new TestTemplateService.MockTemplateScript.Factory(destination)); + } + + private RerouteProcessor createRerouteProcessor(List dataset, List namespace) { + return new RerouteProcessor(asTemplate(dataset), asTemplate(namespace)); + } + + private static List asTemplate(List dataset) { + return dataset.stream().map(TestTemplateService.MockTemplateScript.Factory::new).collect(Collectors.toList()); + } + private static class SkipProcessor implements WrappingProcessor { + private final Processor processor; SkipProcessor(Processor processor) { From 2d02830881093c2d78ec80910f872e3b584ea7b0 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Mon, 27 Mar 2023 15:34:41 +0200 Subject: [PATCH 13/30] Revert "Use mustache scripts instead of custom field reference syntax" This reverts commit 26b3c793416c9cf9b8a266f5e5226e2a111199dc. --- .../ingest/common/IngestCommonPlugin.java | 2 +- .../ingest/common/RerouteProcessor.java | 110 +++++++----------- .../common/RerouteProcessorFactoryTests.java | 7 +- .../ingest/common/RerouteProcessorTests.java | 70 ++++++----- 4 files changed, 82 insertions(+), 107 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java index 2da82606430e5..df5dad576dec0 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java @@ -89,7 +89,7 @@ public Map getProcessors(Processor.Parameters paramet entry(FingerprintProcessor.TYPE, new FingerprintProcessor.Factory()), entry(RegisteredDomainProcessor.TYPE, new RegisteredDomainProcessor.Factory()), entry(RedactProcessor.TYPE, new RedactProcessor.Factory(matcherWatchdog)), - entry(RerouteProcessor.TYPE, new RerouteProcessor.Factory(parameters.scriptService)) + entry(RerouteProcessor.TYPE, new RerouteProcessor.Factory()) ); } diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index e9f0a0188c9bc..f55614164c7ca 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -8,14 +8,12 @@ package org.elasticsearch.ingest.common; -import org.elasticsearch.common.Strings; import org.elasticsearch.ingest.AbstractProcessor; import org.elasticsearch.ingest.ConfigurationUtils; import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.Processor; -import org.elasticsearch.script.ScriptService; -import org.elasticsearch.script.TemplateScript; +import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; @@ -35,25 +33,25 @@ public final class RerouteProcessor extends AbstractProcessor { private static final char[] DISALLOWED_IN_NAMESPACE = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':' }; private static final int MAX_LENGTH = 100; private static final char REPLACEMENT_CHAR = '_'; - private final List dataset; - private final List namespace; - private final TemplateScript.Factory destination; + private final List dataset; + private final List namespace; + private final String destination; private final boolean skipIfTargetUnchanged; - RerouteProcessor(List dataset, List namespace) { + RerouteProcessor(List dataset, List namespace) { this(null, null, dataset, namespace, null, false); } - RerouteProcessor(TemplateScript.Factory destination) { + RerouteProcessor(String destination) { this(null, null, null, null, destination, false); } RerouteProcessor( String tag, String description, - List dataset, - List namespace, - TemplateScript.Factory destination, + List dataset, + List namespace, + String destination, boolean skipIfTargetUnchanged ) { super(tag, description); @@ -86,7 +84,7 @@ private static String sanitizeNamespace(String namespace) { @Override public IngestDocument execute(IngestDocument ingestDocument) throws Exception { if (destination != null) { - ingestDocument.reroute(ingestDocument.renderTemplate(destination)); + ingestDocument.reroute(destination); return ingestDocument; } final String currentTarget = ingestDocument.getFieldValue(IngestDocument.Metadata.INDEX.getFieldName(), String.class); @@ -155,25 +153,31 @@ private String determineNamespace(IngestDocument ingestDocument, String currentN private String determineDataStreamField( IngestDocument ingestDocument, - List valueSources, + List valueSources, String fromCurrentTarget, Function sanitization, String dataStreamFieldName ) { - String result = ""; - for (TemplateScript.Factory value : valueSources) { - result = ingestDocument.renderTemplate(value); - if (Strings.isNullOrEmpty(result) == false) { + String result = null; + for (Iterator iterator = valueSources.iterator(); iterator.hasNext(); ) { + String value = iterator.next(); + if (value.startsWith("{") && value.endsWith("}")) { + String fieldReference = value.substring(1, value.length() - 1); + result = sanitization.apply(ingestDocument.getFieldValue(fieldReference, String.class, true)); + } else { + result = value; + } + if (result != null) { break; } } - if (Strings.isNullOrEmpty(result)) { - result = ingestDocument.getFieldValue(dataStreamFieldName, String.class, true); + if (result == null) { + result = sanitization.apply(ingestDocument.getFieldValue(dataStreamFieldName, String.class, true)); } - if (Strings.isNullOrEmpty(result)) { + if (result == null) { result = fromCurrentTarget; } - return sanitization.apply(result); + return result; } @Override @@ -181,26 +185,20 @@ public String getType() { return TYPE; } - public List getDataStreamDataset() { + public List getDataStreamDataset() { return dataset; } - public List getDataStreamNamespace() { + public List getDataStreamNamespace() { return namespace; } - public TemplateScript.Factory getDestination() { + public String getDestination() { return destination; } public static final class Factory implements Processor.Factory { - private final ScriptService scriptService; - - public Factory(ScriptService scriptService) { - this.scriptService = scriptService; - } - @Override public RerouteProcessor create( Map processorFactories, @@ -209,50 +207,30 @@ public RerouteProcessor create( Map config ) throws Exception { List dataset = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "dataset"); - validateDataStreamValue(tag, dataset, "dataset", RerouteProcessor::sanitizeDataset); - List datasetTemplate = dataset.stream() - .map(ds -> ConfigurationUtils.compileTemplate(TYPE, tag, "dataset", ds, scriptService)) - .toList(); + dataset.stream() + .filter(ds -> ds.startsWith("{") == false) + .filter(ds -> Objects.equals(sanitizeDataset(ds), ds) == false) + .findAny() + .ifPresent(ds -> { + throw newConfigurationException(TYPE, tag, "dataset", "'" + ds + "' contains disallowed characters"); + }); List namespace = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "namespace"); - validateDataStreamValue(tag, namespace, "namespace", RerouteProcessor::sanitizeNamespace); - List namespaceTemplate = namespace.stream() - .map(nsp -> ConfigurationUtils.compileTemplate(TYPE, tag, "namespace", nsp, scriptService)) - .toList(); - String destination = ConfigurationUtils.readOptionalStringProperty(TYPE, tag, config, "destination"); - TemplateScript.Factory destinationTemplate = null; - if (destination != null) { - destinationTemplate = ConfigurationUtils.compileTemplate( - TYPE, - tag, - "destination", - destination, - scriptService - ); - } + namespace.stream() + .filter(ns -> ns.startsWith("{") == false) + .filter(ns -> Objects.equals(sanitizeNamespace(ns), ns) == false) + .findAny() + .ifPresent(ns -> { + throw newConfigurationException(TYPE, tag, "namespace", "'" + ns + "' contains disallowed characters"); + }); + String destination = ConfigurationUtils.readOptionalStringProperty(TYPE, tag, config, "destination"); if (destination != null && (dataset.isEmpty() == false || namespace.isEmpty() == false)) { throw newConfigurationException(TYPE, tag, "destination", "can only be set if dataset and namespace are not set"); } boolean skipIfTargetUnchanged = ConfigurationUtils.readBooleanProperty(TYPE, tag, config, "skip_if_target_unchanged", false); - return new RerouteProcessor(tag, description, datasetTemplate, namespaceTemplate, destinationTemplate, skipIfTargetUnchanged); - } - private static void validateDataStreamValue( - String tag, - List dataset, - String dataStreamComponent, - Function sanitizer - ) { - dataset.stream() - .filter(ds -> ds.contains("{{") == false) - .filter(ds -> Objects.equals(sanitizer.apply(ds), ds) == false) - .findAny() - .ifPresent( - ds -> { - throw newConfigurationException(TYPE, tag, dataStreamComponent, "'" + ds + "' contains disallowed characters"); - } - ); + return new RerouteProcessor(tag, description, dataset, namespace, destination, skipIfTargetUnchanged); } } } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java index 97b0e6562990a..d887ff0da3cf2 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java @@ -9,7 +9,6 @@ package org.elasticsearch.ingest.common; import org.elasticsearch.ElasticsearchParseException; -import org.elasticsearch.script.ScriptService; import org.elasticsearch.test.ESTestCase; import org.hamcrest.Matchers; @@ -18,7 +17,6 @@ import java.util.Map; import static org.hamcrest.CoreMatchers.equalTo; -import static org.mockito.Mockito.mock; public class RerouteProcessorFactoryTests extends ESTestCase { @@ -42,7 +40,7 @@ public void testDestinationSuccess() throws Exception { RerouteProcessor processor = create(Map.of("destination", "foo")); assertThat(processor.getDataStreamDataset(), equalTo(List.of())); assertThat(processor.getDataStreamNamespace(), equalTo(List.of())); - assertThat(processor.getDestination().newInstance(Map.of()).execute(), equalTo("foo")); + assertThat(processor.getDestination(), equalTo("foo")); } public void testDestinationAndDataset() { @@ -53,6 +51,7 @@ public void testDestinationAndDataset() { assertThat(e.getMessage(), Matchers.equalTo("[destination] can only be set if dataset and namespace are not set")); } + private static RerouteProcessor create(String dataset, String namespace) throws Exception { Map config = new HashMap<>(); if (dataset != null) { @@ -65,6 +64,6 @@ private static RerouteProcessor create(String dataset, String namespace) throws } private static RerouteProcessor create(Map config) throws Exception { - return new RerouteProcessor.Factory(mock(ScriptService.class)).create(null, null, null, new HashMap<>(config)); + return new RerouteProcessor.Factory().create(null, null, null, new HashMap<>(config)); } } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java index fde40efa46e11..865a2bc75b5a9 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java @@ -13,13 +13,10 @@ import org.elasticsearch.ingest.Processor; import org.elasticsearch.ingest.RandomDocumentPicks; import org.elasticsearch.ingest.TestProcessor; -import org.elasticsearch.ingest.TestTemplateService; import org.elasticsearch.ingest.WrappingProcessor; -import org.elasticsearch.script.TemplateScript; import org.elasticsearch.test.ESTestCase; import java.util.List; -import java.util.stream.Collectors; import static org.hamcrest.Matchers.equalTo; @@ -28,7 +25,7 @@ public class RerouteProcessorTests extends ESTestCase { public void testDefaults() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor processor = createRerouteProcessor(List.of(), List.of()); + RerouteProcessor processor = new RerouteProcessor(List.of(), List.of()); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "generic", "default"); } @@ -36,8 +33,8 @@ public void testDefaults() throws Exception { public void testSkipFirstProcessor() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor skippedProcessor = createRerouteProcessor(List.of("skip"), List.of()); - RerouteProcessor executedProcessor = createRerouteProcessor(List.of("executed"), List.of()); + RerouteProcessor skippedProcessor = new RerouteProcessor(List.of("skip"), List.of()); + RerouteProcessor executedProcessor = new RerouteProcessor(List.of("executed"), List.of()); CompoundProcessor processor = new CompoundProcessor(new SkipProcessor(skippedProcessor), executedProcessor); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "executed", "default"); @@ -46,8 +43,8 @@ public void testSkipFirstProcessor() throws Exception { public void testSkipLastProcessor() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor executedProcessor = createRerouteProcessor(List.of("executed"), List.of()); - RerouteProcessor skippedProcessor = createRerouteProcessor(List.of("skip"), List.of()); + RerouteProcessor executedProcessor = new RerouteProcessor(List.of("executed"), List.of()); + RerouteProcessor skippedProcessor = new RerouteProcessor(List.of("skip"), List.of()); CompoundProcessor processor = new CompoundProcessor(executedProcessor, skippedProcessor); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "executed", "default"); @@ -58,7 +55,7 @@ public void testDataStreamFieldsFromDocument() throws Exception { ingestDocument.setFieldValue("data_stream.dataset", "foo"); ingestDocument.setFieldValue("data_stream.namespace", "bar"); - RerouteProcessor processor = createRerouteProcessor(List.of(), List.of()); + RerouteProcessor processor = new RerouteProcessor(List.of(), List.of()); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "foo", "bar"); } @@ -68,7 +65,7 @@ public void testInvalidDataStreamFieldsFromDocument() throws Exception { ingestDocument.setFieldValue("data_stream.dataset", "foo-bar"); ingestDocument.setFieldValue("data_stream.namespace", "baz#qux"); - RerouteProcessor processor = createRerouteProcessor(List.of(), List.of()); + RerouteProcessor processor = new RerouteProcessor(List.of(), List.of()); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "foo_bar", "baz_qux"); } @@ -76,16 +73,26 @@ public void testInvalidDataStreamFieldsFromDocument() throws Exception { public void testDestination() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor processor = createRerouteProcessor("foo"); + RerouteProcessor processor = new RerouteProcessor("foo"); processor.execute(ingestDocument); assertFalse(ingestDocument.hasField("data_stream")); assertThat(ingestDocument.getFieldValue("_index", String.class), equalTo("foo")); } + public void testFieldReference() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + ingestDocument.setFieldValue("service.name", "opbeans-java"); + ingestDocument.setFieldValue("service.environment", "dev"); + + RerouteProcessor processor = new RerouteProcessor(List.of("{service.name}"), List.of("{service.environment}")); + processor.execute(ingestDocument); + assertDataSetFields(ingestDocument, "logs", "opbeans_java", "dev"); + } + public void testRerouteToCurrentTarget() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor reroute = createRerouteProcessor(List.of("generic"), List.of("default")); + RerouteProcessor reroute = new RerouteProcessor(List.of("generic"), List.of("default")); CompoundProcessor processor = new CompoundProcessor( reroute, new TestProcessor(doc -> doc.setFieldValue("pipeline_is_continued", true)) @@ -98,7 +105,7 @@ public void testRerouteToCurrentTarget() throws Exception { public void testFieldReferenceWithMissingReroutesToCurrentTarget() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor reroute = createRerouteProcessor(List.of(""), List.of("")); + RerouteProcessor reroute = new RerouteProcessor(List.of("{service.name}"), List.of("{service.environment}")); CompoundProcessor processor = new CompoundProcessor( reroute, new TestProcessor(doc -> doc.setFieldValue("pipeline_is_continued", true)) @@ -111,10 +118,12 @@ public void testFieldReferenceWithMissingReroutesToCurrentTarget() throws Except public void testDataStreamFieldReference() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + ingestDocument.setFieldValue("data_stream.dataset", "dataset_from_doc"); + ingestDocument.setFieldValue("data_stream.namespace", "namespace_from_doc"); - RerouteProcessor processor = createRerouteProcessor( - List.of("dataset_from_doc", "fallback"), - List.of("namespace_from_doc", "fallback") + RerouteProcessor processor = new RerouteProcessor( + List.of("{data_stream.dataset}", "fallback"), + List.of("{data_stream.namespace}", "fallback") ); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "dataset_from_doc", "namespace_from_doc"); @@ -123,9 +132,9 @@ public void testDataStreamFieldReference() throws Exception { public void testDatasetFieldReferenceMissingValue() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor processor = createRerouteProcessor( - List.of("", "fallback"), - List.of("", "fallback") + RerouteProcessor processor = new RerouteProcessor( + List.of("{data_stream.dataset}", "fallback"), + List.of("{data_stream.namespace}", "fallback") ); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "fallback", "fallback"); @@ -133,13 +142,15 @@ public void testDatasetFieldReferenceMissingValue() throws Exception { public void testDatasetFieldReference() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + ingestDocument.setFieldValue("data_stream.dataset", "generic"); + ingestDocument.setFieldValue("data_stream.namespace", "default"); - RerouteProcessor processor = createRerouteProcessor( - List.of("generic", "fallback"), - List.of("default", "fallback") + RerouteProcessor processor = new RerouteProcessor( + List.of("{data_stream.dataset}", "fallback"), + List.of("{data_stream.namespace}", "fallback") ); processor.execute(ingestDocument); - assertDataSetFields(ingestDocument, "logs", "generic", "default"); + assertDataSetFields(ingestDocument, "logs", "fallback", "fallback"); } private void assertDataSetFields(IngestDocument ingestDocument, String type, String dataset, String namespace) { @@ -155,20 +166,7 @@ private static IngestDocument createIngestDocument(String dataStream) { return ingestDocument; } - private RerouteProcessor createRerouteProcessor(String destination) { - return new RerouteProcessor(new TestTemplateService.MockTemplateScript.Factory(destination)); - } - - private RerouteProcessor createRerouteProcessor(List dataset, List namespace) { - return new RerouteProcessor(asTemplate(dataset), asTemplate(namespace)); - } - - private static List asTemplate(List dataset) { - return dataset.stream().map(TestTemplateService.MockTemplateScript.Factory::new).collect(Collectors.toList()); - } - private static class SkipProcessor implements WrappingProcessor { - private final Processor processor; SkipProcessor(Processor processor) { From 584f8c53d138b25896d2922e81df07dac8b0ead9 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Mon, 27 Mar 2023 18:36:06 +0200 Subject: [PATCH 14/30] Set event.dataset to be consistent with data_stream.dataset --- .../ingest/common/RerouteProcessor.java | 6 +++++ .../ingest/common/RerouteProcessorTests.java | 26 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index f55614164c7ca..c8c108c467645 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -29,6 +29,7 @@ public final class RerouteProcessor extends AbstractProcessor { private static final String DATA_STREAM_TYPE = DATA_STREAM_PREFIX + "type"; private static final String DATA_STREAM_DATASET = DATA_STREAM_PREFIX + "dataset"; private static final String DATA_STREAM_NAMESPACE = DATA_STREAM_PREFIX + "namespace"; + private static final String EVENT_DATASET = "event.dataset"; private static final char[] DISALLOWED_IN_DATASET = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':', '-' }; private static final char[] DISALLOWED_IN_NAMESPACE = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':' }; private static final int MAX_LENGTH = 100; @@ -116,6 +117,11 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { ingestDocument.setFieldValue(DATA_STREAM_TYPE, type); ingestDocument.setFieldValue(DATA_STREAM_DATASET, dataset); ingestDocument.setFieldValue(DATA_STREAM_NAMESPACE, namespace); + if (ingestDocument.hasField(EVENT_DATASET)) { + // ECS specifies that "event.dataset should have the same value as data_stream.dataset" + // not eagerly set event.dataset but only if the doc contains it already to ensure it's consistent with data_stream.dataset + ingestDocument.setFieldValue(EVENT_DATASET, dataset); + } return ingestDocument; } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java index 865a2bc75b5a9..16397c933428f 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java @@ -30,6 +30,26 @@ public void testDefaults() throws Exception { assertDataSetFields(ingestDocument, "logs", "generic", "default"); } + public void testEventDataset() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + ingestDocument.setFieldValue("event.dataset", "foo"); + + RerouteProcessor processor = new RerouteProcessor(List.of("{event.dataset}"), List.of()); + processor.execute(ingestDocument); + assertDataSetFields(ingestDocument, "logs", "foo", "default"); + assertThat(ingestDocument.getFieldValue("event.dataset", String.class), equalTo("foo")); + } + + public void testNoDataset() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + ingestDocument.setFieldValue("ds", "foo"); + + RerouteProcessor processor = new RerouteProcessor(List.of("{ds}"), List.of()); + processor.execute(ingestDocument); + assertDataSetFields(ingestDocument, "logs", "foo", "default"); + assertFalse(ingestDocument.hasField("event.dataset")); + } + public void testSkipFirstProcessor() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); @@ -158,6 +178,12 @@ private void assertDataSetFields(IngestDocument ingestDocument, String type, Str assertThat(ingestDocument.getFieldValue("data_stream.dataset", String.class), equalTo(dataset)); assertThat(ingestDocument.getFieldValue("data_stream.namespace", String.class), equalTo(namespace)); assertThat(ingestDocument.getFieldValue("_index", String.class), equalTo(type + "-" + dataset + "-" + namespace)); + if (ingestDocument.hasField("event.dataset")) { + assertThat( + ingestDocument.getFieldValue("event.dataset", String.class), + equalTo(ingestDocument.getFieldValue("data_stream.dataset", String.class)) + ); + } } private static IngestDocument createIngestDocument(String dataStream) { From 60df140086484c3a06f2da69a75ffe3a98942688 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Mon, 27 Mar 2023 18:36:56 +0200 Subject: [PATCH 15/30] Revert "Add skip_if_target_unchanged option" This reverts commit 4888843c845983811a7aee345efedf322ba26994. --- .../ingest/common/RerouteProcessor.java | 41 +++++++------------ 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index c8c108c467645..7bde2d700b2b0 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -37,29 +37,20 @@ public final class RerouteProcessor extends AbstractProcessor { private final List dataset; private final List namespace; private final String destination; - private final boolean skipIfTargetUnchanged; RerouteProcessor(List dataset, List namespace) { - this(null, null, dataset, namespace, null, false); + this(null, null, dataset, namespace, null); } RerouteProcessor(String destination) { - this(null, null, null, null, destination, false); + this(null, null, null, null, destination); } - RerouteProcessor( - String tag, - String description, - List dataset, - List namespace, - String destination, - boolean skipIfTargetUnchanged - ) { + RerouteProcessor(String tag, String description, List dataset, List namespace, String destination) { super(tag, description); this.dataset = dataset; this.namespace = namespace; this.destination = destination; - this.skipIfTargetUnchanged = skipIfTargetUnchanged; } private static String sanitizeDataStreamField(String s, char[] disallowedInDataset) { @@ -88,21 +79,21 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { ingestDocument.reroute(destination); return ingestDocument; } - final String currentTarget = ingestDocument.getFieldValue(IngestDocument.Metadata.INDEX.getFieldName(), String.class); + final String indexName = ingestDocument.getFieldValue(IngestDocument.Metadata.INDEX.getFieldName(), String.class); final String type; final String currentDataset; final String currentNamespace; - int indexOfFirstDash = currentTarget.indexOf('-'); + int indexOfFirstDash = indexName.indexOf('-'); if (indexOfFirstDash < 0) { - throw createInvalidDataStreamNameException(currentTarget); + throw createInvalidDataStreamNameException(indexName); } - int indexOfSecondDash = currentTarget.indexOf('-', indexOfFirstDash + 1); + int indexOfSecondDash = indexName.indexOf('-', indexOfFirstDash + 1); if (indexOfSecondDash < 0) { - throw createInvalidDataStreamNameException(currentTarget); + throw createInvalidDataStreamNameException(indexName); } - type = parseDataStreamType(currentTarget, indexOfFirstDash); - currentDataset = parseDataStreamDataset(currentTarget, indexOfFirstDash, indexOfSecondDash); - currentNamespace = parseDataStreamNamespace(currentTarget, indexOfSecondDash); + type = parseDataStreamType(indexName, indexOfFirstDash); + currentDataset = parseDataStreamDataset(indexName, indexOfFirstDash, indexOfSecondDash); + currentNamespace = parseDataStreamNamespace(indexName, indexOfSecondDash); String dataset = determineDataset(ingestDocument, currentDataset); String namespace = determineNamespace(ingestDocument, currentNamespace); @@ -110,9 +101,6 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { return ingestDocument; } String newTarget = type + "-" + dataset + "-" + namespace; - if (newTarget.equals(currentTarget) && skipIfTargetUnchanged) { - return ingestDocument; - } ingestDocument.reroute(newTarget); ingestDocument.setFieldValue(DATA_STREAM_TYPE, type); ingestDocument.setFieldValue(DATA_STREAM_DATASET, dataset); @@ -170,6 +158,9 @@ private String determineDataStreamField( if (value.startsWith("{") && value.endsWith("}")) { String fieldReference = value.substring(1, value.length() - 1); result = sanitization.apply(ingestDocument.getFieldValue(fieldReference, String.class, true)); + if (fieldReference.equals(dataStreamFieldName) && fromCurrentTarget.equals(result)) { + result = null; + } } else { result = value; } @@ -233,10 +224,8 @@ public RerouteProcessor create( if (destination != null && (dataset.isEmpty() == false || namespace.isEmpty() == false)) { throw newConfigurationException(TYPE, tag, "destination", "can only be set if dataset and namespace are not set"); } - boolean skipIfTargetUnchanged = ConfigurationUtils.readBooleanProperty(TYPE, tag, config, "skip_if_target_unchanged", false); - - return new RerouteProcessor(tag, description, dataset, namespace, destination, skipIfTargetUnchanged); + return new RerouteProcessor(tag, description, dataset, namespace, destination); } } } From a3124321a286bf4a265f644c04faa95916d7a0e3 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Tue, 28 Mar 2023 18:34:49 +0200 Subject: [PATCH 16/30] Field references to use same syntax as mustache templates --- .../ingest/common/RerouteProcessor.java | 184 ++++++++++-------- .../common/RerouteProcessorFactoryTests.java | 11 ++ .../ingest/common/RerouteProcessorTests.java | 65 ++++--- 3 files changed, 158 insertions(+), 102 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index 7bde2d700b2b0..ae9bfa1a06164 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -13,40 +13,40 @@ import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.Processor; -import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.function.Function; +import java.util.stream.Collectors; import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException; public final class RerouteProcessor extends AbstractProcessor { public static final String TYPE = "reroute"; + private static final char[] DISALLOWED_IN_DATASET = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':', '-' }; + private static final char[] DISALLOWED_IN_NAMESPACE = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':' }; private static final String DATA_STREAM_PREFIX = "data_stream."; private static final String DATA_STREAM_TYPE = DATA_STREAM_PREFIX + "type"; private static final String DATA_STREAM_DATASET = DATA_STREAM_PREFIX + "dataset"; + private static final DataStreamValueSource DATASET_VALUE_SOURCE = DataStreamValueSource.dataset("{{" + DATA_STREAM_DATASET + "}}"); private static final String DATA_STREAM_NAMESPACE = DATA_STREAM_PREFIX + "namespace"; + private static final DataStreamValueSource NAMESPACE_VALUE_SOURCE = DataStreamValueSource.namespace("{{" + DATA_STREAM_NAMESPACE + "}}"); private static final String EVENT_DATASET = "event.dataset"; - private static final char[] DISALLOWED_IN_DATASET = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':', '-' }; - private static final char[] DISALLOWED_IN_NAMESPACE = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':' }; private static final int MAX_LENGTH = 100; private static final char REPLACEMENT_CHAR = '_'; - private final List dataset; - private final List namespace; + private final List dataset; + private final List namespace; private final String destination; - RerouteProcessor(List dataset, List namespace) { - this(null, null, dataset, namespace, null); - } - - RerouteProcessor(String destination) { - this(null, null, null, null, destination); - } - - RerouteProcessor(String tag, String description, List dataset, List namespace, String destination) { + RerouteProcessor( + String tag, + String description, + List dataset, + List namespace, + String destination + ) { super(tag, description); this.dataset = dataset; this.namespace = namespace; @@ -65,14 +65,6 @@ private static String sanitizeDataStreamField(String s, char[] disallowedInDatas return s; } - private static String sanitizeDataset(String dataset) { - return sanitizeDataStreamField(dataset, DISALLOWED_IN_DATASET); - } - - private static String sanitizeNamespace(String namespace) { - return sanitizeDataStreamField(namespace, DISALLOWED_IN_NAMESPACE); - } - @Override public IngestDocument execute(IngestDocument ingestDocument) throws Exception { if (destination != null) { @@ -95,8 +87,8 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { currentDataset = parseDataStreamDataset(indexName, indexOfFirstDash, indexOfSecondDash); currentNamespace = parseDataStreamNamespace(indexName, indexOfSecondDash); - String dataset = determineDataset(ingestDocument, currentDataset); - String namespace = determineNamespace(ingestDocument, currentNamespace); + String dataset = determineDataStreamField(ingestDocument, this.dataset, DATASET_VALUE_SOURCE, currentDataset); + String namespace = determineDataStreamField(ingestDocument, this.namespace, NAMESPACE_VALUE_SOURCE, currentNamespace); if (dataset == null || namespace == null) { return ingestDocument; } @@ -131,50 +123,23 @@ private static String parseDataStreamNamespace(String dataStreamName, int indexO return dataStreamName.substring(indexOfSecondDash + 1); } - private String determineDataset(IngestDocument ingestDocument, String currentDataset) { - return determineDataStreamField(ingestDocument, dataset, currentDataset, RerouteProcessor::sanitizeDataset, DATA_STREAM_DATASET); - } - - private String determineNamespace(IngestDocument ingestDocument, String currentNamespace) { - return determineDataStreamField( - ingestDocument, - namespace, - currentNamespace, - RerouteProcessor::sanitizeNamespace, - DATA_STREAM_NAMESPACE - ); - } - private String determineDataStreamField( IngestDocument ingestDocument, - List valueSources, - String fromCurrentTarget, - Function sanitization, - String dataStreamFieldName + List valueSources, + DataStreamValueSource dataStreamFieldReference, + String fromCurrentTarget ) { - String result = null; - for (Iterator iterator = valueSources.iterator(); iterator.hasNext(); ) { - String value = iterator.next(); - if (value.startsWith("{") && value.endsWith("}")) { - String fieldReference = value.substring(1, value.length() - 1); - result = sanitization.apply(ingestDocument.getFieldValue(fieldReference, String.class, true)); - if (fieldReference.equals(dataStreamFieldName) && fromCurrentTarget.equals(result)) { - result = null; - } - } else { - result = value; - } + for (DataStreamValueSource value : valueSources) { + String result = value.resolve(ingestDocument); if (result != null) { - break; + return result; } } - if (result == null) { - result = sanitization.apply(ingestDocument.getFieldValue(dataStreamFieldName, String.class, true)); + String fromDataStreamField = dataStreamFieldReference.resolve(ingestDocument); + if (fromDataStreamField != null) { + return fromDataStreamField; } - if (result == null) { - result = fromCurrentTarget; - } - return result; + return fromCurrentTarget; } @Override @@ -182,18 +147,75 @@ public String getType() { return TYPE; } - public List getDataStreamDataset() { + List getDataStreamDataset() { return dataset; } - public List getDataStreamNamespace() { + List getDataStreamNamespace() { return namespace; } - public String getDestination() { + String getDestination() { return destination; } + public static final class DataStreamValueSource { + private final String value; + private final String fieldReference; + private final Function sanitizer; + + public static DataStreamValueSource dataset(String dataset) { + return new DataStreamValueSource(dataset, ds -> sanitizeDataStreamField(ds, DISALLOWED_IN_DATASET)); + } + + public static DataStreamValueSource namespace(String namespace) { + return new DataStreamValueSource(namespace, nsp -> sanitizeDataStreamField(nsp, DISALLOWED_IN_NAMESPACE)); + } + + private DataStreamValueSource(String value, Function sanitizer) { + this.sanitizer = sanitizer; + this.value = value; + if (value.contains("{{") || value.contains("}}")) { + if (value.startsWith("{{") == false || value.endsWith("}}") == false) { + throw new IllegalArgumentException("'" + value + "' is not a valid field reference"); + } + String fieldReference = value.substring(2, value.length() - 2); + // field references may have two or three curly braces + if (fieldReference.startsWith("{") && fieldReference.endsWith("}")) { + fieldReference = fieldReference.substring(1, fieldReference.length() - 1); + } + // only a single field reference is allowed + // so something like this is disallowed: {{foo}}-{{bar}} + if (fieldReference.contains("{") || fieldReference.contains("}")) { + throw new IllegalArgumentException("'" + value + "' is not a valid field reference"); + } + this.fieldReference = fieldReference; + } else { + this.fieldReference = null; + if (Objects.equals(sanitizer.apply(value), value) == false) { + throw new IllegalArgumentException("'" + value + "' contains disallowed characters"); + } + } + } + + public String resolve(IngestDocument ingestDocument) { + if (fieldReference != null) { + try { + return sanitizer.apply(ingestDocument.getFieldValue(fieldReference, String.class, true)); + } catch (IllegalArgumentException e) { + return null; + } + } else { + return value; + } + } + + @Override + public String toString() { + return value; + } + } + public static final class Factory implements Processor.Factory { @Override @@ -203,22 +225,24 @@ public RerouteProcessor create( String description, Map config ) throws Exception { - List dataset = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "dataset"); - dataset.stream() - .filter(ds -> ds.startsWith("{") == false) - .filter(ds -> Objects.equals(sanitizeDataset(ds), ds) == false) - .findAny() - .ifPresent(ds -> { - throw newConfigurationException(TYPE, tag, "dataset", "'" + ds + "' contains disallowed characters"); - }); - List namespace = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "namespace"); - namespace.stream() - .filter(ns -> ns.startsWith("{") == false) - .filter(ns -> Objects.equals(sanitizeNamespace(ns), ns) == false) - .findAny() - .ifPresent(ns -> { - throw newConfigurationException(TYPE, tag, "namespace", "'" + ns + "' contains disallowed characters"); - }); + List dataset; + try { + dataset = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "dataset") + .stream() + .map(ds -> DataStreamValueSource.dataset(ds)) + .collect(Collectors.toList()); + } catch (IllegalArgumentException e) { + throw newConfigurationException(TYPE, tag, "dataset", e.getMessage()); + } + List namespace; + try { + namespace = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "namespace") + .stream() + .map(ds -> DataStreamValueSource.namespace(ds)) + .collect(Collectors.toList()); + } catch (IllegalArgumentException e) { + throw newConfigurationException(TYPE, tag, "namespace", e.getMessage()); + } String destination = ConfigurationUtils.readOptionalStringProperty(TYPE, tag, config, "destination"); if (destination != null && (dataset.isEmpty() == false || namespace.isEmpty() == false)) { diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java index d887ff0da3cf2..ae4043918c8b9 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java @@ -51,6 +51,17 @@ public void testDestinationAndDataset() { assertThat(e.getMessage(), Matchers.equalTo("[destination] can only be set if dataset and namespace are not set")); } + public void testFieldReference() throws Exception { + create("{{foo}}", "{{{bar}}}"); + } + + public void testInvalidFieldReference() throws Exception { + ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> create("{{foo}}-{{bar}}", "foo")); + assertThat(e.getMessage(), Matchers.equalTo("[dataset] '{{foo}}-{{bar}}' is not a valid field reference")); + + e = expectThrows(ElasticsearchParseException.class, () -> create("{{{{foo}}}}", "foo")); + assertThat(e.getMessage(), Matchers.equalTo("[dataset] '{{{{foo}}}}' is not a valid field reference")); + } private static RerouteProcessor create(String dataset, String namespace) throws Exception { Map config = new HashMap<>(); diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java index 16397c933428f..1c21576873e09 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.test.ESTestCase; import java.util.List; +import java.util.stream.Collectors; import static org.hamcrest.Matchers.equalTo; @@ -25,7 +26,7 @@ public class RerouteProcessorTests extends ESTestCase { public void testDefaults() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor processor = new RerouteProcessor(List.of(), List.of()); + RerouteProcessor processor = createRerouteProcessor(List.of(), List.of()); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "generic", "default"); } @@ -34,7 +35,7 @@ public void testEventDataset() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); ingestDocument.setFieldValue("event.dataset", "foo"); - RerouteProcessor processor = new RerouteProcessor(List.of("{event.dataset}"), List.of()); + RerouteProcessor processor = createRerouteProcessor(List.of("{{event.dataset}}"), List.of()); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "foo", "default"); assertThat(ingestDocument.getFieldValue("event.dataset", String.class), equalTo("foo")); @@ -44,7 +45,7 @@ public void testNoDataset() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); ingestDocument.setFieldValue("ds", "foo"); - RerouteProcessor processor = new RerouteProcessor(List.of("{ds}"), List.of()); + RerouteProcessor processor = createRerouteProcessor(List.of("{{ds}}"), List.of()); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "foo", "default"); assertFalse(ingestDocument.hasField("event.dataset")); @@ -53,8 +54,8 @@ public void testNoDataset() throws Exception { public void testSkipFirstProcessor() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor skippedProcessor = new RerouteProcessor(List.of("skip"), List.of()); - RerouteProcessor executedProcessor = new RerouteProcessor(List.of("executed"), List.of()); + RerouteProcessor skippedProcessor = createRerouteProcessor(List.of("skip"), List.of()); + RerouteProcessor executedProcessor = createRerouteProcessor(List.of("executed"), List.of()); CompoundProcessor processor = new CompoundProcessor(new SkipProcessor(skippedProcessor), executedProcessor); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "executed", "default"); @@ -63,8 +64,8 @@ public void testSkipFirstProcessor() throws Exception { public void testSkipLastProcessor() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor executedProcessor = new RerouteProcessor(List.of("executed"), List.of()); - RerouteProcessor skippedProcessor = new RerouteProcessor(List.of("skip"), List.of()); + RerouteProcessor executedProcessor = createRerouteProcessor(List.of("executed"), List.of()); + RerouteProcessor skippedProcessor = createRerouteProcessor(List.of("skip"), List.of()); CompoundProcessor processor = new CompoundProcessor(executedProcessor, skippedProcessor); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "executed", "default"); @@ -75,7 +76,7 @@ public void testDataStreamFieldsFromDocument() throws Exception { ingestDocument.setFieldValue("data_stream.dataset", "foo"); ingestDocument.setFieldValue("data_stream.namespace", "bar"); - RerouteProcessor processor = new RerouteProcessor(List.of(), List.of()); + RerouteProcessor processor = createRerouteProcessor(List.of(), List.of()); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "foo", "bar"); } @@ -85,7 +86,7 @@ public void testInvalidDataStreamFieldsFromDocument() throws Exception { ingestDocument.setFieldValue("data_stream.dataset", "foo-bar"); ingestDocument.setFieldValue("data_stream.namespace", "baz#qux"); - RerouteProcessor processor = new RerouteProcessor(List.of(), List.of()); + RerouteProcessor processor = createRerouteProcessor(List.of(), List.of()); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "foo_bar", "baz_qux"); } @@ -93,7 +94,7 @@ public void testInvalidDataStreamFieldsFromDocument() throws Exception { public void testDestination() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor processor = new RerouteProcessor("foo"); + RerouteProcessor processor = createRerouteProcessor("foo"); processor.execute(ingestDocument); assertFalse(ingestDocument.hasField("data_stream")); assertThat(ingestDocument.getFieldValue("_index", String.class), equalTo("foo")); @@ -104,7 +105,7 @@ public void testFieldReference() throws Exception { ingestDocument.setFieldValue("service.name", "opbeans-java"); ingestDocument.setFieldValue("service.environment", "dev"); - RerouteProcessor processor = new RerouteProcessor(List.of("{service.name}"), List.of("{service.environment}")); + RerouteProcessor processor = createRerouteProcessor(List.of("{{service.name}}"), List.of("{{service.environment}}")); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "opbeans_java", "dev"); } @@ -112,7 +113,7 @@ public void testFieldReference() throws Exception { public void testRerouteToCurrentTarget() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor reroute = new RerouteProcessor(List.of("generic"), List.of("default")); + RerouteProcessor reroute = createRerouteProcessor(List.of("generic"), List.of("default")); CompoundProcessor processor = new CompoundProcessor( reroute, new TestProcessor(doc -> doc.setFieldValue("pipeline_is_continued", true)) @@ -125,7 +126,7 @@ public void testRerouteToCurrentTarget() throws Exception { public void testFieldReferenceWithMissingReroutesToCurrentTarget() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor reroute = new RerouteProcessor(List.of("{service.name}"), List.of("{service.environment}")); + RerouteProcessor reroute = createRerouteProcessor(List.of("{{service.name}}"), List.of("{{service.environment}}")); CompoundProcessor processor = new CompoundProcessor( reroute, new TestProcessor(doc -> doc.setFieldValue("pipeline_is_continued", true)) @@ -141,9 +142,9 @@ public void testDataStreamFieldReference() throws Exception { ingestDocument.setFieldValue("data_stream.dataset", "dataset_from_doc"); ingestDocument.setFieldValue("data_stream.namespace", "namespace_from_doc"); - RerouteProcessor processor = new RerouteProcessor( - List.of("{data_stream.dataset}", "fallback"), - List.of("{data_stream.namespace}", "fallback") + RerouteProcessor processor = createRerouteProcessor( + List.of("{{{data_stream.dataset}}}", "fallback"), + List.of("{{data_stream.namespace}}", "fallback") ); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "dataset_from_doc", "namespace_from_doc"); @@ -152,9 +153,9 @@ public void testDataStreamFieldReference() throws Exception { public void testDatasetFieldReferenceMissingValue() throws Exception { IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); - RerouteProcessor processor = new RerouteProcessor( - List.of("{data_stream.dataset}", "fallback"), - List.of("{data_stream.namespace}", "fallback") + RerouteProcessor processor = createRerouteProcessor( + List.of("{{data_stream.dataset}}", "fallback"), + List.of("{{data_stream.namespace}}", "fallback") ); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "fallback", "fallback"); @@ -165,14 +166,34 @@ public void testDatasetFieldReference() throws Exception { ingestDocument.setFieldValue("data_stream.dataset", "generic"); ingestDocument.setFieldValue("data_stream.namespace", "default"); - RerouteProcessor processor = new RerouteProcessor( - List.of("{data_stream.dataset}", "fallback"), - List.of("{data_stream.namespace}", "fallback") + RerouteProcessor processor = createRerouteProcessor( + List.of("{{data_stream.dataset}}", "fallback"), + List.of("{{{data_stream.namespace}}}", "fallback") ); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "fallback", "fallback"); } + private RerouteProcessor createRerouteProcessor(List dataset, List namespace) { + return new RerouteProcessor( + null, + null, + dataset.stream().map(RerouteProcessor.DataStreamValueSource::dataset).collect(Collectors.toList()), + namespace.stream().map(RerouteProcessor.DataStreamValueSource::namespace).collect(Collectors.toList()), + null + ); + } + + private RerouteProcessor createRerouteProcessor(String destination) { + return new RerouteProcessor( + null, + null, + null, + null, + destination + ); + } + private void assertDataSetFields(IngestDocument ingestDocument, String type, String dataset, String namespace) { assertThat(ingestDocument.getFieldValue("data_stream.type", String.class), equalTo(type)); assertThat(ingestDocument.getFieldValue("data_stream.dataset", String.class), equalTo(dataset)); From 7dbd47ea6d4888de42f286842679b778bdc27c65 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 30 Mar 2023 12:05:20 +0200 Subject: [PATCH 17/30] Add comments to RerouteProcessor --- .../ingest/common/RerouteProcessor.java | 131 ++++++++++-------- 1 file changed, 76 insertions(+), 55 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index ae9bfa1a06164..6c337b95626d6 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -8,6 +8,7 @@ package org.elasticsearch.ingest.common; +import org.elasticsearch.core.Nullable; import org.elasticsearch.ingest.AbstractProcessor; import org.elasticsearch.ingest.ConfigurationUtils; import org.elasticsearch.ingest.IngestDocument; @@ -21,18 +22,15 @@ import java.util.stream.Collectors; import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException; +import static org.elasticsearch.ingest.common.RerouteProcessor.DataStreamValueSource.DATASET_VALUE_SOURCE; +import static org.elasticsearch.ingest.common.RerouteProcessor.DataStreamValueSource.NAMESPACE_VALUE_SOURCE; public final class RerouteProcessor extends AbstractProcessor { public static final String TYPE = "reroute"; - - private static final char[] DISALLOWED_IN_DATASET = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':', '-' }; - private static final char[] DISALLOWED_IN_NAMESPACE = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':' }; private static final String DATA_STREAM_PREFIX = "data_stream."; private static final String DATA_STREAM_TYPE = DATA_STREAM_PREFIX + "type"; private static final String DATA_STREAM_DATASET = DATA_STREAM_PREFIX + "dataset"; - private static final DataStreamValueSource DATASET_VALUE_SOURCE = DataStreamValueSource.dataset("{{" + DATA_STREAM_DATASET + "}}"); private static final String DATA_STREAM_NAMESPACE = DATA_STREAM_PREFIX + "namespace"; - private static final DataStreamValueSource NAMESPACE_VALUE_SOURCE = DataStreamValueSource.namespace("{{" + DATA_STREAM_NAMESPACE + "}}"); private static final String EVENT_DATASET = "event.dataset"; private static final int MAX_LENGTH = 100; private static final char REPLACEMENT_CHAR = '_'; @@ -53,18 +51,6 @@ public final class RerouteProcessor extends AbstractProcessor { this.destination = destination; } - private static String sanitizeDataStreamField(String s, char[] disallowedInDataset) { - if (s == null) { - return null; - } - s = s.toLowerCase(Locale.ROOT); - s = s.substring(0, Math.min(s.length(), MAX_LENGTH)); - for (char c : disallowedInDataset) { - s = s.replace(c, REPLACEMENT_CHAR); - } - return s; - } - @Override public IngestDocument execute(IngestDocument ingestDocument) throws Exception { if (destination != null) { @@ -75,6 +61,8 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { final String type; final String currentDataset; final String currentNamespace; + + // parse out the -- components from _index int indexOfFirstDash = indexName.indexOf('-'); if (indexOfFirstDash < 0) { throw createInvalidDataStreamNameException(indexName); @@ -129,16 +117,21 @@ private String determineDataStreamField( DataStreamValueSource dataStreamFieldReference, String fromCurrentTarget ) { + // first try to get value from the configured dataset/namespace field references + // if the user has configured a static value rather than a field reference, this is guaranteed to return for (DataStreamValueSource value : valueSources) { String result = value.resolve(ingestDocument); if (result != null) { return result; } } + // if all field references have evaluated to null or missing, + // try to get the value from the data_stream. value from the doc String fromDataStreamField = dataStreamFieldReference.resolve(ingestDocument); if (fromDataStreamField != null) { return fromDataStreamField; } + // as a last resort, use the dataset/namespace value we parsed out from _index return fromCurrentTarget; } @@ -159,7 +152,53 @@ String getDestination() { return destination; } - public static final class DataStreamValueSource { + public static final class Factory implements Processor.Factory { + + @Override + public RerouteProcessor create( + Map processorFactories, + String tag, + String description, + Map config + ) throws Exception { + List dataset; + try { + dataset = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "dataset") + .stream() + .map(DataStreamValueSource::dataset) + .collect(Collectors.toList()); + } catch (IllegalArgumentException e) { + throw newConfigurationException(TYPE, tag, "dataset", e.getMessage()); + } + List namespace; + try { + namespace = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "namespace") + .stream() + .map(DataStreamValueSource::namespace) + .collect(Collectors.toList()); + } catch (IllegalArgumentException e) { + throw newConfigurationException(TYPE, tag, "namespace", e.getMessage()); + } + + String destination = ConfigurationUtils.readOptionalStringProperty(TYPE, tag, config, "destination"); + if (destination != null && (dataset.isEmpty() == false || namespace.isEmpty() == false)) { + throw newConfigurationException(TYPE, tag, "destination", "can only be set if dataset and namespace are not set"); + } + + return new RerouteProcessor(tag, description, dataset, namespace, destination); + } + } + + /** + * Contains either a {{field reference}} or a static value for a dataset or a namespace field + */ + static final class DataStreamValueSource { + + private static final char[] DISALLOWED_IN_DATASET = new char[] { '\\', '/', '*', '?', '"', '<', '>', '|', ' ', ',', '#', ':', '-' }; + private static final char[] DISALLOWED_IN_NAMESPACE = new char[] { '\\', '/', '*', '?', '"', '<', '>', '|', ' ', ',', '#', ':' }; + static final DataStreamValueSource DATASET_VALUE_SOURCE = dataset("{{" + DATA_STREAM_DATASET + "}}"); + static final DataStreamValueSource NAMESPACE_VALUE_SOURCE = namespace("{{" + DATA_STREAM_NAMESPACE + "}}"); + private final String value; private final String fieldReference; private final Function sanitizer; @@ -172,6 +211,18 @@ public static DataStreamValueSource namespace(String namespace) { return new DataStreamValueSource(namespace, nsp -> sanitizeDataStreamField(nsp, DISALLOWED_IN_NAMESPACE)); } + private static String sanitizeDataStreamField(String s, char[] disallowedInDataset) { + if (s == null) { + return null; + } + s = s.toLowerCase(Locale.ROOT); + s = s.substring(0, Math.min(s.length(), MAX_LENGTH)); + for (char c : disallowedInDataset) { + s = s.replace(c, REPLACEMENT_CHAR); + } + return s; + } + private DataStreamValueSource(String value, Function sanitizer) { this.sanitizer = sanitizer; this.value = value; @@ -198,6 +249,13 @@ private DataStreamValueSource(String value, Function sanitizer) } } + /** + * Resolves the field reference from the provided ingest document or returns the static value if this value source doesn't represent + * a field reference. + * @param ingestDocument + * @return the resolved field reference or static value + */ + @Nullable public String resolve(IngestDocument ingestDocument) { if (fieldReference != null) { try { @@ -215,41 +273,4 @@ public String toString() { return value; } } - - public static final class Factory implements Processor.Factory { - - @Override - public RerouteProcessor create( - Map processorFactories, - String tag, - String description, - Map config - ) throws Exception { - List dataset; - try { - dataset = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "dataset") - .stream() - .map(ds -> DataStreamValueSource.dataset(ds)) - .collect(Collectors.toList()); - } catch (IllegalArgumentException e) { - throw newConfigurationException(TYPE, tag, "dataset", e.getMessage()); - } - List namespace; - try { - namespace = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "namespace") - .stream() - .map(ds -> DataStreamValueSource.namespace(ds)) - .collect(Collectors.toList()); - } catch (IllegalArgumentException e) { - throw newConfigurationException(TYPE, tag, "namespace", e.getMessage()); - } - - String destination = ConfigurationUtils.readOptionalStringProperty(TYPE, tag, config, "destination"); - if (destination != null && (dataset.isEmpty() == false || namespace.isEmpty() == false)) { - throw newConfigurationException(TYPE, tag, "destination", "can only be set if dataset and namespace are not set"); - } - - return new RerouteProcessor(tag, description, dataset, namespace, destination); - } - } } From c8cdaaecee839a0559acb647fe40835c6e87955c Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 30 Mar 2023 14:53:21 +0200 Subject: [PATCH 18/30] Use {{data_stream.dataset}} and {{data_sream.namespace}} as default values --- .../ingest/common/RerouteProcessor.java | 34 +++++++++---------- .../common/RerouteProcessorFactoryTests.java | 15 +++++--- .../ingest/common/RerouteProcessorTests.java | 19 +++++++++-- 3 files changed, 42 insertions(+), 26 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index 6c337b95626d6..ca70596b339c3 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -46,8 +46,16 @@ public final class RerouteProcessor extends AbstractProcessor { String destination ) { super(tag, description); - this.dataset = dataset; - this.namespace = namespace; + if (dataset.isEmpty()) { + this.dataset = List.of(DATASET_VALUE_SOURCE); + } else { + this.dataset = dataset; + } + if (namespace.isEmpty()) { + this.namespace = List.of(NAMESPACE_VALUE_SOURCE); + } else { + this.namespace = namespace; + } this.destination = destination; } @@ -75,11 +83,8 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { currentDataset = parseDataStreamDataset(indexName, indexOfFirstDash, indexOfSecondDash); currentNamespace = parseDataStreamNamespace(indexName, indexOfSecondDash); - String dataset = determineDataStreamField(ingestDocument, this.dataset, DATASET_VALUE_SOURCE, currentDataset); - String namespace = determineDataStreamField(ingestDocument, this.namespace, NAMESPACE_VALUE_SOURCE, currentNamespace); - if (dataset == null || namespace == null) { - return ingestDocument; - } + String dataset = determineDataStreamField(ingestDocument, this.dataset, currentDataset); + String namespace = determineDataStreamField(ingestDocument, this.namespace, currentNamespace); String newTarget = type + "-" + dataset + "-" + namespace; ingestDocument.reroute(newTarget); ingestDocument.setFieldValue(DATA_STREAM_TYPE, type); @@ -114,25 +119,18 @@ private static String parseDataStreamNamespace(String dataStreamName, int indexO private String determineDataStreamField( IngestDocument ingestDocument, List valueSources, - DataStreamValueSource dataStreamFieldReference, - String fromCurrentTarget + String fallbackFromCurrentTarget ) { // first try to get value from the configured dataset/namespace field references - // if the user has configured a static value rather than a field reference, this is guaranteed to return + // if this contains a static value rather than a field reference, this is guaranteed to return for (DataStreamValueSource value : valueSources) { String result = value.resolve(ingestDocument); if (result != null) { return result; } } - // if all field references have evaluated to null or missing, - // try to get the value from the data_stream. value from the doc - String fromDataStreamField = dataStreamFieldReference.resolve(ingestDocument); - if (fromDataStreamField != null) { - return fromDataStreamField; - } - // as a last resort, use the dataset/namespace value we parsed out from _index - return fromCurrentTarget; + // use the dataset/namespace value we parsed out from the current target (_index) as a fallback + return fallbackFromCurrentTarget; } @Override diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java index ae4043918c8b9..12d8126515dea 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java @@ -15,15 +15,22 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import static org.hamcrest.CoreMatchers.equalTo; public class RerouteProcessorFactoryTests extends ESTestCase { - public void testSuccess() throws Exception { + public void testDefaults() throws Exception { RerouteProcessor processor = create(null, null); - assertThat(processor.getDataStreamDataset(), equalTo(List.of())); - assertThat(processor.getDataStreamNamespace(), equalTo(List.of())); + assertThat( + processor.getDataStreamDataset().stream().map(RerouteProcessor.DataStreamValueSource::toString).collect(Collectors.toList()), + equalTo(List.of("{{data_stream.dataset}}")) + ); + assertThat( + processor.getDataStreamNamespace().stream().map(RerouteProcessor.DataStreamValueSource::toString).collect(Collectors.toList()), + equalTo(List.of("{{data_stream.namespace}}")) + ); } public void testInvalidDataset() throws Exception { @@ -38,8 +45,6 @@ public void testInvalidNamespace() throws Exception { public void testDestinationSuccess() throws Exception { RerouteProcessor processor = create(Map.of("destination", "foo")); - assertThat(processor.getDataStreamDataset(), equalTo(List.of())); - assertThat(processor.getDataStreamNamespace(), equalTo(List.of())); assertThat(processor.getDestination(), equalTo("foo")); } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java index 1c21576873e09..e952dbf03cd33 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java @@ -171,7 +171,20 @@ public void testDatasetFieldReference() throws Exception { List.of("{{{data_stream.namespace}}}", "fallback") ); processor.execute(ingestDocument); - assertDataSetFields(ingestDocument, "logs", "fallback", "fallback"); + assertDataSetFields(ingestDocument, "logs", "generic", "default"); + } + + public void testFallbackToValuesFrom_index() throws Exception { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + ingestDocument.setFieldValue("data_stream.dataset", "foo"); + ingestDocument.setFieldValue("data_stream.namespace", "bar"); + + RerouteProcessor processor = createRerouteProcessor( + List.of("{{foo}}"), + List.of("{{bar}}") + ); + processor.execute(ingestDocument); + assertDataSetFields(ingestDocument, "logs", "generic", "default"); } private RerouteProcessor createRerouteProcessor(List dataset, List namespace) { @@ -188,8 +201,8 @@ private RerouteProcessor createRerouteProcessor(String destination) { return new RerouteProcessor( null, null, - null, - null, + List.of(), + List.of(), destination ); } From 24aaab93c5de5342a37d64e8b99de5964db0b24d Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 30 Mar 2023 15:14:29 +0200 Subject: [PATCH 19/30] Update docs --- docs/reference/ingest/processors.asciidoc | 2 +- .../ingest/processors/reroute.asciidoc | 47 ++++++++++++++++--- 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/docs/reference/ingest/processors.asciidoc b/docs/reference/ingest/processors.asciidoc index d9c3ce1f858d7..4132773e3d427 100644 --- a/docs/reference/ingest/processors.asciidoc +++ b/docs/reference/ingest/processors.asciidoc @@ -39,7 +39,6 @@ include::processors/circle.asciidoc[] include::processors/community-id.asciidoc[] include::processors/convert.asciidoc[] include::processors/csv.asciidoc[] -include::processors/reroute.asciidoc[] include::processors/date.asciidoc[] include::processors/date-index-name.asciidoc[] include::processors/dissect.asciidoc[] @@ -65,6 +64,7 @@ include::processors/redact.asciidoc[] include::processors/registered-domain.asciidoc[] include::processors/remove.asciidoc[] include::processors/rename.asciidoc[] +include::processors/reroute.asciidoc[] include::processors/script.asciidoc[] include::processors/set.asciidoc[] include::processors/set-security-user.asciidoc[] diff --git a/docs/reference/ingest/processors/reroute.asciidoc b/docs/reference/ingest/processors/reroute.asciidoc index cd6a233933999..6928ee8220e6c 100644 --- a/docs/reference/ingest/processors/reroute.asciidoc +++ b/docs/reference/ingest/processors/reroute.asciidoc @@ -4,6 +4,8 @@ Reroute ++++ +experimental::[] + The `reroute` processor allows to route a document to another target index or data stream. It has two main modes: @@ -13,18 +15,23 @@ When the `destination` option is not set, this processor is in a data stream mod Note that in this mode, the `reroute` processor can only be used on data streams that follow the {fleet-guide}/data-streams.html#data-streams-naming-scheme[data stream naming scheme]. Trying to use this processor on a data stream with a non-compliant name will raise an exception. -The name of a data stream is comprised of three parts and looks like this: `--`. +The name of a data stream consists of three parts: `--`. See the {fleet-guide}/data-streams.html#data-streams-naming-scheme[data stream naming scheme] documentation for more details. -It can use both static values or values from the document to determine the target. +This processor can use both static values or reference fields from the document to determine the `dataset` and `namespace` components of the new target. See <> for more details. +NOTE: It's not possible to change the `type` of the data stream with the `reroute` processor. + After a `reroute` processor has been executed, all the other processors of the current pipeline are skipped. If the current pipeline is executed in the context of a <>, the calling pipeline will be skipped, too. This means that at most one `reroute` processor is ever executed within a pipeline, allowing to define mutually exclusive routing conditions, similar to a if, else-if, else-if, … condition. +The reroute processor ensures that the `data_stream.` fields are set according to the new target. +If the document contains a `event.dataset` value, it will be updated to reflect the same value as `data_stream.dataset`. + Note that the client needs to have permissions to the final target. Otherwise, the document will be rejected with a security exception which looks like this: @@ -38,14 +45,18 @@ Otherwise, the document will be rejected with a security exception which looks l .Reroute options [options="header"] |====== -| Name | Required | Default | Description -| `destination` | no | - | A static value for the target. Can't be set when the `dataset` or `namespace` option is set. -| `dataset` | no | - | A static value for the dataset part of the data stream name. In addition to the criteria for <>, cannot contain `-` and must be no longer than 100 characters. Example values are `nginx.access` and `nginx.error`. If not set, gets the value of the field `data_stream.dataset` from the document. When using values from the document, the processor replaces invalid characters with `_`. If the option is not set and the document also doesn't contain a corresponding field, it uses the `` part of the index name as a fallback. -| `namespace` | no | - | A static value for the namespace part of the data stream name. See the criteria for <> for allowed characters. Must be no longer than 100 characters. If not set, gets the value of the field `data_stream.namespace` from the document. When using values from the document, the processor replaces invalid characters with `_`. If the option is not set and the document also doesn't contain a corresponding field, it uses the `` part of the index name as a fallback. +| Name | Required | Default | Description +| `destination` | no | - | A static value for the target. Can't be set when the `dataset` or `namespace` option is set. +| `dataset` | no | `{{data_stream.dataset}}` a| Field references or a static value for the dataset part of the data stream name. In addition to the criteria for <>, cannot contain `-` and must be no longer than 100 characters. Example values are `nginx.access` and `nginx.error`. + +Supports field references with a mustache-like syntax (denoted as `{{double}}` or `{{{triple}}}` curly braces). When resolving field references, the processor replaces invalid characters with `_`. Uses the `` part of the index name as a fallback if all field references resolve to a `null`, missing, or non-string value. +| `namespace` | no | `{{data_stream.namespace}}` a| Field references or a static value for the namespace part of the data stream name. See the criteria for <> for allowed characters. Must be no longer than 100 characters. + +Supports field references with a mustache-like syntax (denoted as `{{double}}` or `{{{triple}}}` curly braces). When resolving field references, the processor replaces invalid characters with `_`. Uses the `` part of the index name as a fallback if all field references resolve to a `null`, missing, or non-string value. include::common-options.asciidoc[] |====== -NOTE: It's not possible to change the `type` of the data stream by setting the `data_stream.type` in the document. +The `if` option can be used to define the condition in which the document should be rerouted to a new target. [source,js] -------------------------------------------------- @@ -58,3 +69,25 @@ NOTE: It's not possible to change the `type` of the data stream by setting the ` } -------------------------------------------------- // NOTCONSOLE + +The dataset and namespace options can contain either a single value or a list of values that are used as a fallback. +If a field reference evaluates to `null`, is not present in the document, or if the value is not a `String`, the next value or field reference is used. + +In the following example, the processor would first try to resolve the value for the `service.name` field to determine the value for `dataset`. +If that field resolves to `null`, is missing, or is a non-string value, it would try the next element in the list. +In this case, this is the static value `"generic`". +The `namespace` option is configured with just a single static value. + +[source,js] +-------------------------------------------------- +{ + "reroute": { + "dataset": [ + "{{service.name}}", + "generic" + ], + "namespace": "default" + } +} +-------------------------------------------------- +// NOTCONSOLE From 1d454b6b94622b379f59e52f4062c3f30370bfdc Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 30 Mar 2023 15:28:50 +0200 Subject: [PATCH 20/30] Apply spotless suggestions --- .../ingest/common/RerouteProcessorTests.java | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java index e952dbf03cd33..7179308bc94d7 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java @@ -179,10 +179,7 @@ public void testFallbackToValuesFrom_index() throws Exception { ingestDocument.setFieldValue("data_stream.dataset", "foo"); ingestDocument.setFieldValue("data_stream.namespace", "bar"); - RerouteProcessor processor = createRerouteProcessor( - List.of("{{foo}}"), - List.of("{{bar}}") - ); + RerouteProcessor processor = createRerouteProcessor(List.of("{{foo}}"), List.of("{{bar}}")); processor.execute(ingestDocument); assertDataSetFields(ingestDocument, "logs", "generic", "default"); } @@ -198,13 +195,7 @@ private RerouteProcessor createRerouteProcessor(List dataset, List Date: Tue, 11 Apr 2023 09:23:04 +0200 Subject: [PATCH 21/30] Update docs/reference/ingest/processors/reroute.asciidoc Co-authored-by: Nicolas Ruflin --- docs/reference/ingest/processors/reroute.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/ingest/processors/reroute.asciidoc b/docs/reference/ingest/processors/reroute.asciidoc index 6928ee8220e6c..946da03218c52 100644 --- a/docs/reference/ingest/processors/reroute.asciidoc +++ b/docs/reference/ingest/processors/reroute.asciidoc @@ -23,7 +23,7 @@ See <> for more details. NOTE: It's not possible to change the `type` of the data stream with the `reroute` processor. -After a `reroute` processor has been executed, all the other processors of the current pipeline are skipped. +After a `reroute` processor has been executed, all the other processors of the current pipeline are skipped, including the final pipeline. If the current pipeline is executed in the context of a <>, the calling pipeline will be skipped, too. This means that at most one `reroute` processor is ever executed within a pipeline, allowing to define mutually exclusive routing conditions, From d5491deef24618fbdbfeed6fbfd968f9c199bc0a Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Tue, 11 Apr 2023 14:22:41 -0400 Subject: [PATCH 22/30] Tidy up imports --- .../ingest/common/RerouteProcessorFactoryTests.java | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java index 12d8126515dea..164e3d37eb692 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java @@ -10,14 +10,13 @@ import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.test.ESTestCase; -import org.hamcrest.Matchers; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.Matchers.equalTo; public class RerouteProcessorFactoryTests extends ESTestCase { @@ -35,12 +34,12 @@ public void testDefaults() throws Exception { public void testInvalidDataset() throws Exception { ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> create("my-service", null)); - assertThat(e.getMessage(), Matchers.equalTo("[dataset] 'my-service' contains disallowed characters")); + assertThat(e.getMessage(), equalTo("[dataset] 'my-service' contains disallowed characters")); } public void testInvalidNamespace() throws Exception { ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> create("generic", "foo:bar")); - assertThat(e.getMessage(), Matchers.equalTo("[namespace] 'foo:bar' contains disallowed characters")); + assertThat(e.getMessage(), equalTo("[namespace] 'foo:bar' contains disallowed characters")); } public void testDestinationSuccess() throws Exception { @@ -53,7 +52,7 @@ public void testDestinationAndDataset() { ElasticsearchParseException.class, () -> create(Map.of("destination", "foo", "dataset", "bar")) ); - assertThat(e.getMessage(), Matchers.equalTo("[destination] can only be set if dataset and namespace are not set")); + assertThat(e.getMessage(), equalTo("[destination] can only be set if dataset and namespace are not set")); } public void testFieldReference() throws Exception { @@ -62,10 +61,10 @@ public void testFieldReference() throws Exception { public void testInvalidFieldReference() throws Exception { ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> create("{{foo}}-{{bar}}", "foo")); - assertThat(e.getMessage(), Matchers.equalTo("[dataset] '{{foo}}-{{bar}}' is not a valid field reference")); + assertThat(e.getMessage(), equalTo("[dataset] '{{foo}}-{{bar}}' is not a valid field reference")); e = expectThrows(ElasticsearchParseException.class, () -> create("{{{{foo}}}}", "foo")); - assertThat(e.getMessage(), Matchers.equalTo("[dataset] '{{{{foo}}}}' is not a valid field reference")); + assertThat(e.getMessage(), equalTo("[dataset] '{{{{foo}}}}' is not a valid field reference")); } private static RerouteProcessor create(String dataset, String namespace) throws Exception { From e3c3ad1870a5d429bb12d657422154a6e1e5766d Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Tue, 11 Apr 2023 14:24:35 -0400 Subject: [PATCH 23/30] Prefer toList --- .../org/elasticsearch/ingest/common/RerouteProcessor.java | 5 ++--- .../ingest/common/RerouteProcessorFactoryTests.java | 5 ++--- .../elasticsearch/ingest/common/RerouteProcessorTests.java | 5 ++--- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index ca70596b339c3..6abf3ade20d9d 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -19,7 +19,6 @@ import java.util.Map; import java.util.Objects; import java.util.function.Function; -import java.util.stream.Collectors; import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException; import static org.elasticsearch.ingest.common.RerouteProcessor.DataStreamValueSource.DATASET_VALUE_SOURCE; @@ -164,7 +163,7 @@ public RerouteProcessor create( dataset = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "dataset") .stream() .map(DataStreamValueSource::dataset) - .collect(Collectors.toList()); + .toList(); } catch (IllegalArgumentException e) { throw newConfigurationException(TYPE, tag, "dataset", e.getMessage()); } @@ -173,7 +172,7 @@ public RerouteProcessor create( namespace = ConfigurationUtils.readOptionalListOrString(TYPE, tag, config, "namespace") .stream() .map(DataStreamValueSource::namespace) - .collect(Collectors.toList()); + .toList(); } catch (IllegalArgumentException e) { throw newConfigurationException(TYPE, tag, "namespace", e.getMessage()); } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java index 164e3d37eb692..87c9d9dfffc60 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java @@ -14,7 +14,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import static org.hamcrest.Matchers.equalTo; @@ -23,11 +22,11 @@ public class RerouteProcessorFactoryTests extends ESTestCase { public void testDefaults() throws Exception { RerouteProcessor processor = create(null, null); assertThat( - processor.getDataStreamDataset().stream().map(RerouteProcessor.DataStreamValueSource::toString).collect(Collectors.toList()), + processor.getDataStreamDataset().stream().map(RerouteProcessor.DataStreamValueSource::toString).toList(), equalTo(List.of("{{data_stream.dataset}}")) ); assertThat( - processor.getDataStreamNamespace().stream().map(RerouteProcessor.DataStreamValueSource::toString).collect(Collectors.toList()), + processor.getDataStreamNamespace().stream().map(RerouteProcessor.DataStreamValueSource::toString).toList(), equalTo(List.of("{{data_stream.namespace}}")) ); } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java index 7179308bc94d7..254272274af4f 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java @@ -17,7 +17,6 @@ import org.elasticsearch.test.ESTestCase; import java.util.List; -import java.util.stream.Collectors; import static org.hamcrest.Matchers.equalTo; @@ -188,8 +187,8 @@ private RerouteProcessor createRerouteProcessor(List dataset, List Date: Tue, 11 Apr 2023 17:24:10 -0400 Subject: [PATCH 24/30] Add a comment --- .../java/org/elasticsearch/ingest/common/RerouteProcessor.java | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index 6abf3ade20d9d..5c5e869e7c68e 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -258,6 +258,7 @@ public String resolve(IngestDocument ingestDocument) { try { return sanitizer.apply(ingestDocument.getFieldValue(fieldReference, String.class, true)); } catch (IllegalArgumentException e) { + // thrown if fieldReference refers to something that isn't a String return null; } } else { From 7647f9ef24d9f6cb50c25936aab01fc56a4bef5d Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Tue, 11 Apr 2023 17:15:58 -0400 Subject: [PATCH 25/30] Drop a utility method, add a test --- .../ingest/common/RerouteProcessor.java | 16 ++++++++-------- .../ingest/common/RerouteProcessorTests.java | 17 +++++++++++++++++ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index 5c5e869e7c68e..3deb8daf6fa0d 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -20,12 +20,18 @@ import java.util.Objects; import java.util.function.Function; +import static org.elasticsearch.core.Strings.format; import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException; import static org.elasticsearch.ingest.common.RerouteProcessor.DataStreamValueSource.DATASET_VALUE_SOURCE; import static org.elasticsearch.ingest.common.RerouteProcessor.DataStreamValueSource.NAMESPACE_VALUE_SOURCE; public final class RerouteProcessor extends AbstractProcessor { + public static final String TYPE = "reroute"; + + private static final String NAMING_SCHEME_ERROR_MESSAGE = + "invalid data stream name: [%s]; must follow naming scheme --"; + private static final String DATA_STREAM_PREFIX = "data_stream."; private static final String DATA_STREAM_TYPE = DATA_STREAM_PREFIX + "type"; private static final String DATA_STREAM_DATASET = DATA_STREAM_PREFIX + "dataset"; @@ -72,11 +78,11 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { // parse out the -- components from _index int indexOfFirstDash = indexName.indexOf('-'); if (indexOfFirstDash < 0) { - throw createInvalidDataStreamNameException(indexName); + throw new IllegalArgumentException(format(NAMING_SCHEME_ERROR_MESSAGE, indexName)); } int indexOfSecondDash = indexName.indexOf('-', indexOfFirstDash + 1); if (indexOfSecondDash < 0) { - throw createInvalidDataStreamNameException(indexName); + throw new IllegalArgumentException(format(NAMING_SCHEME_ERROR_MESSAGE, indexName)); } type = parseDataStreamType(indexName, indexOfFirstDash); currentDataset = parseDataStreamDataset(indexName, indexOfFirstDash, indexOfSecondDash); @@ -97,12 +103,6 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { return ingestDocument; } - private static IllegalArgumentException createInvalidDataStreamNameException(String indexName) { - return new IllegalArgumentException( - "invalid data stream name: [" + indexName + "]; must follow naming scheme --" - ); - } - private static String parseDataStreamType(String dataStreamName, int indexOfFirstDash) { return dataStreamName.substring(0, indexOfFirstDash); } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java index 254272274af4f..811237328656f 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java @@ -183,6 +183,23 @@ public void testFallbackToValuesFrom_index() throws Exception { assertDataSetFields(ingestDocument, "logs", "generic", "default"); } + public void testInvalidDataStreamName() throws Exception { + { + IngestDocument ingestDocument = createIngestDocument("foo"); + RerouteProcessor processor = createRerouteProcessor(List.of(), List.of()); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> processor.execute(ingestDocument)); + assertThat(e.getMessage(), equalTo("invalid data stream name: [foo]; must follow naming scheme --")); + } + + { + // naturally, though, a plain destination doesn't have to match the data stream naming convention + IngestDocument ingestDocument = createIngestDocument("foo"); + RerouteProcessor processor = createRerouteProcessor("bar"); + processor.execute(ingestDocument); + assertThat(ingestDocument.getFieldValue("_index", String.class), equalTo("bar")); + } + } + private RerouteProcessor createRerouteProcessor(List dataset, List namespace) { return new RerouteProcessor( null, From 0ef9503e9cc2231e058a9c1e6377db3a967c65a6 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Wed, 12 Apr 2023 19:24:11 +0200 Subject: [PATCH 26/30] Add sanitization tests --- .../ingest/common/RerouteProcessorTests.java | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java index 811237328656f..b0b7f61c482df 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.test.ESTestCase; import java.util.List; +import java.util.Map; import static org.hamcrest.Matchers.equalTo; @@ -200,6 +201,32 @@ public void testInvalidDataStreamName() throws Exception { } } + public void testDatasetSanitization() { + assertDatasetSanitization("\\/*?\"<>| ,#:-", "_____________"); + assertDatasetSanitization("foo*bar", "foo_bar"); + } + + public void testNamespaceSanitization() { + assertNamespaceSanitization("\\/*?\"<>| ,#:-", "____________-"); + assertNamespaceSanitization("foo*bar", "foo_bar"); + } + + private static void assertDatasetSanitization(String dataset, String sanitizedDataset) { + assertThat( + RerouteProcessor.DataStreamValueSource.dataset("{{foo}}") + .resolve(RandomDocumentPicks.randomIngestDocument(random(), Map.of("foo", dataset))), + equalTo(sanitizedDataset) + ); + } + + private static void assertNamespaceSanitization(String namespace, String sanitizedNamespace) { + assertThat( + RerouteProcessor.DataStreamValueSource.namespace("{{foo}}") + .resolve(RandomDocumentPicks.randomIngestDocument(random(), Map.of("foo", namespace))), + equalTo(sanitizedNamespace) + ); + } + private RerouteProcessor createRerouteProcessor(List dataset, List namespace) { return new RerouteProcessor( null, From 755363860211be181d863af825f5b26e40dc1907 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 13 Apr 2023 11:28:47 +0200 Subject: [PATCH 27/30] Move sanitization constants --- .../org/elasticsearch/ingest/common/RerouteProcessor.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index 3deb8daf6fa0d..4402c4606241e 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -37,8 +37,6 @@ public final class RerouteProcessor extends AbstractProcessor { private static final String DATA_STREAM_DATASET = DATA_STREAM_PREFIX + "dataset"; private static final String DATA_STREAM_NAMESPACE = DATA_STREAM_PREFIX + "namespace"; private static final String EVENT_DATASET = "event.dataset"; - private static final int MAX_LENGTH = 100; - private static final char REPLACEMENT_CHAR = '_'; private final List dataset; private final List namespace; private final String destination; @@ -191,6 +189,8 @@ public RerouteProcessor create( */ static final class DataStreamValueSource { + private static final int MAX_LENGTH = 100; + private static final char REPLACEMENT_CHAR = '_'; private static final char[] DISALLOWED_IN_DATASET = new char[] { '\\', '/', '*', '?', '"', '<', '>', '|', ' ', ',', '#', ':', '-' }; private static final char[] DISALLOWED_IN_NAMESPACE = new char[] { '\\', '/', '*', '?', '"', '<', '>', '|', ' ', ',', '#', ':' }; static final DataStreamValueSource DATASET_VALUE_SOURCE = dataset("{{" + DATA_STREAM_DATASET + "}}"); From c49e70b53eb0937d353ad4a44e71a6ea700b2573 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 13 Apr 2023 11:40:21 +0200 Subject: [PATCH 28/30] Use regex for sanitization --- .../ingest/common/RerouteProcessor.java | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index 4402c4606241e..3b5d67c192c4f 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -19,6 +19,7 @@ import java.util.Map; import java.util.Objects; import java.util.function.Function; +import java.util.regex.Pattern; import static org.elasticsearch.core.Strings.format; import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException; @@ -190,9 +191,9 @@ public RerouteProcessor create( static final class DataStreamValueSource { private static final int MAX_LENGTH = 100; - private static final char REPLACEMENT_CHAR = '_'; - private static final char[] DISALLOWED_IN_DATASET = new char[] { '\\', '/', '*', '?', '"', '<', '>', '|', ' ', ',', '#', ':', '-' }; - private static final char[] DISALLOWED_IN_NAMESPACE = new char[] { '\\', '/', '*', '?', '"', '<', '>', '|', ' ', ',', '#', ':' }; + private static final String REPLACEMENT = "_"; + private static final Pattern DISALLOWED_IN_DATASET = Pattern.compile("[\\\\/*?\"<>| ,#:-]"); + private static final Pattern DISALLOWED_IN_NAMESPACE = Pattern.compile("[\\\\/*?\"<>| ,#:]"); static final DataStreamValueSource DATASET_VALUE_SOURCE = dataset("{{" + DATA_STREAM_DATASET + "}}"); static final DataStreamValueSource NAMESPACE_VALUE_SOURCE = namespace("{{" + DATA_STREAM_NAMESPACE + "}}"); @@ -208,16 +209,13 @@ public static DataStreamValueSource namespace(String namespace) { return new DataStreamValueSource(namespace, nsp -> sanitizeDataStreamField(nsp, DISALLOWED_IN_NAMESPACE)); } - private static String sanitizeDataStreamField(String s, char[] disallowedInDataset) { + private static String sanitizeDataStreamField(String s, Pattern disallowedInDataset) { if (s == null) { return null; } s = s.toLowerCase(Locale.ROOT); s = s.substring(0, Math.min(s.length(), MAX_LENGTH)); - for (char c : disallowedInDataset) { - s = s.replace(c, REPLACEMENT_CHAR); - } - return s; + return disallowedInDataset.matcher(s).replaceAll(REPLACEMENT); } private DataStreamValueSource(String value, Function sanitizer) { From 0c29f7371547f0cc2095c9dc7b664337e19d936f Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Tue, 11 Apr 2023 17:36:50 -0400 Subject: [PATCH 29/30] Drop toString --- .../elasticsearch/ingest/common/RerouteProcessor.java | 5 ----- .../ingest/common/RerouteProcessorFactoryTests.java | 11 +++-------- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index 3b5d67c192c4f..2f700750aaebd 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -263,10 +263,5 @@ public String resolve(IngestDocument ingestDocument) { return value; } } - - @Override - public String toString() { - return value; - } } } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java index 87c9d9dfffc60..580645a4ef46a 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorFactoryTests.java @@ -9,6 +9,7 @@ package org.elasticsearch.ingest.common; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.ingest.common.RerouteProcessor.DataStreamValueSource; import org.elasticsearch.test.ESTestCase; import java.util.HashMap; @@ -21,14 +22,8 @@ public class RerouteProcessorFactoryTests extends ESTestCase { public void testDefaults() throws Exception { RerouteProcessor processor = create(null, null); - assertThat( - processor.getDataStreamDataset().stream().map(RerouteProcessor.DataStreamValueSource::toString).toList(), - equalTo(List.of("{{data_stream.dataset}}")) - ); - assertThat( - processor.getDataStreamNamespace().stream().map(RerouteProcessor.DataStreamValueSource::toString).toList(), - equalTo(List.of("{{data_stream.namespace}}")) - ); + assertThat(processor.getDataStreamDataset(), equalTo(List.of(DataStreamValueSource.DATASET_VALUE_SOURCE))); + assertThat(processor.getDataStreamNamespace(), equalTo(List.of(DataStreamValueSource.NAMESPACE_VALUE_SOURCE))); } public void testInvalidDataset() throws Exception { From e13451ec3d71b7a09f65bcdd49b0fb942c1c564d Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Tue, 18 Apr 2023 17:51:33 +0200 Subject: [PATCH 30/30] Be more strict regarding non-string field references --- docs/reference/ingest/processors/reroute.asciidoc | 3 ++- .../org/elasticsearch/ingest/common/RerouteProcessor.java | 7 +------ .../ingest/common/RerouteProcessorTests.java | 8 ++++++++ 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/docs/reference/ingest/processors/reroute.asciidoc b/docs/reference/ingest/processors/reroute.asciidoc index 946da03218c52..eb7eb211cd62f 100644 --- a/docs/reference/ingest/processors/reroute.asciidoc +++ b/docs/reference/ingest/processors/reroute.asciidoc @@ -71,7 +71,8 @@ The `if` option can be used to define the condition in which the document should // NOTCONSOLE The dataset and namespace options can contain either a single value or a list of values that are used as a fallback. -If a field reference evaluates to `null`, is not present in the document, or if the value is not a `String`, the next value or field reference is used. +If a field reference evaluates to `null`, is not present in the document, the next value or field reference is used. +If a field reference evaluates to a non-`String` value, the processor fails. In the following example, the processor would first try to resolve the value for the `service.name` field to determine the value for `dataset`. If that field resolves to `null`, is missing, or is a non-string value, it would try the next element in the list. diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java index 2f700750aaebd..6c2b321112821 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RerouteProcessor.java @@ -253,12 +253,7 @@ private DataStreamValueSource(String value, Function sanitizer) @Nullable public String resolve(IngestDocument ingestDocument) { if (fieldReference != null) { - try { - return sanitizer.apply(ingestDocument.getFieldValue(fieldReference, String.class, true)); - } catch (IllegalArgumentException e) { - // thrown if fieldReference refers to something that isn't a String - return null; - } + return sanitizer.apply(ingestDocument.getFieldValue(fieldReference, String.class, true)); } else { return value; } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java index b0b7f61c482df..3da394575d625 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RerouteProcessorTests.java @@ -201,6 +201,14 @@ public void testInvalidDataStreamName() throws Exception { } } + public void testRouteOnNonStringFieldFails() { + IngestDocument ingestDocument = createIngestDocument("logs-generic-default"); + ingestDocument.setFieldValue("numeric_field", 42); + RerouteProcessor processor = createRerouteProcessor(List.of("{{numeric_field}}"), List.of()); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> processor.execute(ingestDocument)); + assertThat(e.getMessage(), equalTo("field [numeric_field] of type [java.lang.Integer] cannot be cast to [java.lang.String]")); + } + public void testDatasetSanitization() { assertDatasetSanitization("\\/*?\"<>| ,#:-", "_____________"); assertDatasetSanitization("foo*bar", "foo_bar");