diff --git a/docs/changelog/113757.yaml b/docs/changelog/113757.yaml new file mode 100644 index 0000000000000..30e173d80b2a7 --- /dev/null +++ b/docs/changelog/113757.yaml @@ -0,0 +1,5 @@ +pr: 113757 +summary: Store arrays offsets for keyword fields natively with synthetic source instead of falling back to ignored source. +area: Mapping +type: enhancement +issues: [] diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle index 205b02a8936bb..1f5de727fbc0c 100644 --- a/rest-api-spec/build.gradle +++ b/rest-api-spec/build.gradle @@ -83,4 +83,8 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task -> "node_version warning is removed in 9.0" ) task.skipTest("tsdb/20_mapping/nested fields", "nested field support in tsdb indices is now supported") + task.skipTest("logsdb/10_settings/routing path allowed in logs mode with routing on sort fields", "Unknown feature routing.logsb_route_on_sort_fields") + task.skipTest("indices.create/21_synthetic_source_stored/index param - field ordering", "Synthetic source keep arrays now stores leaf arrays natively") + task.skipTest("indices.create/21_synthetic_source_stored/field param - keep nested array", "Synthetic source keep arrays now stores leaf arrays natively") + task.skipTest("indices.create/21_synthetic_source_stored/field param - keep root array", "Synthetic source keep arrays now stores leaf arrays natively") }) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index dc476147c9601..b19915d179b04 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -922,7 +922,7 @@ subobjects auto: - match: { hits.hits.0._source.foo: 10 } - match: { hits.hits.0._source.foo\.bar: 100 } - match: { hits.hits.0._source.regular.span.id: "1" } - - match: { hits.hits.0._source.regular.trace.id: [ "a", "b" ] } + - match: { hits.hits.0._source.regular.trace.id: ["a", "b" ] } - match: { hits.hits.1._source.id: 2 } - match: { hits.hits.1._source.foo: 20 } - match: { hits.hits.1._source.foo\.bar: 200 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml index e51d527593d45..c78ac4c493fe5 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml @@ -1024,7 +1024,7 @@ index param - field ordering: index: test - length: { hits.hits.0._source: 4 } - - match: { hits.hits.0._source: { "a": "2", "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ], "c": [30, 20, 10], "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ] } } + - match: { hits.hits.0._source: { "a": "2", "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ], "c": ["30", "20", "10"], "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ] } } --- diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 6152f50fe297a..3e032894cf023 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -148,6 +148,7 @@ private static Version parseUnchecked(String version) { public static final IndexVersion USE_SYNTHETIC_SOURCE_FOR_RECOVERY_BY_DEFAULT = def(9_010_00_0, Version.LUCENE_10_1_0); public static final IndexVersion TIMESTAMP_DOC_VALUES_SPARSE_INDEX = def(9_011_0_00, Version.LUCENE_10_1_0); public static final IndexVersion TIME_SERIES_ID_DOC_VALUES_SPARSE_INDEX = def(9_012_0_00, Version.LUCENE_10_1_0); + public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD = def(9_013_0_00, Version.LUCENE_10_1_0); /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index c12a350057694..06dec6a090352 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -154,6 +154,7 @@ private void internalParseDocument(MetadataFieldMapper[] metadataFieldsMappers, executeIndexTimeScripts(context); + context.processArrayOffsets(context); for (MetadataFieldMapper metadataMapper : metadataFieldsMappers) { metadataMapper.postParse(context); } @@ -519,6 +520,7 @@ private static void throwOnCopyToOnObject(Mapper mapper, List copyToFiel private static void parseObject(final DocumentParserContext context, String currentFieldName) throws IOException { assert currentFieldName != null; + context.setImmediateXContentParent(context.parser().currentToken()); Mapper objectMapper = context.getMapper(currentFieldName); if (objectMapper != null) { doParseObject(context, currentFieldName, objectMapper); @@ -611,6 +613,12 @@ private static void throwOnCreateDynamicNestedViaCopyTo(Mapper dynamicObjectMapp } private static void parseArray(DocumentParserContext context, String lastFieldName) throws IOException { + // Record previous immediate parent, so that it can be reset after array has been parsed. + // This is for recording array offset with synthetic source. Only if the immediate parent is an array, + // then the offsets can be accounted accurately. + var prev = context.getImmediateXContentParent(); + context.setImmediateXContentParent(context.parser().currentToken()); + Mapper mapper = getLeafMapper(context, lastFieldName); if (mapper != null) { // There is a concrete mapper for this field already. Need to check if the mapper @@ -624,6 +632,8 @@ private static void parseArray(DocumentParserContext context, String lastFieldNa } else { parseArrayDynamic(context, lastFieldName); } + // Reset previous immediate parent + context.setImmediateXContentParent(prev); } private static void parseArrayDynamic(DocumentParserContext context, String currentFieldName) throws IOException { @@ -688,11 +698,12 @@ private static void parseNonDynamicArray( final String lastFieldName, String arrayFieldName ) throws IOException { + boolean supportStoringArrayOffsets = mapper != null && mapper.supportStoringArrayOffsets(); String fullPath = context.path().pathAsText(arrayFieldName); // Check if we need to record the array source. This only applies to synthetic source. boolean canRemoveSingleLeafElement = false; - if (context.canAddIgnoredField()) { + if (context.canAddIgnoredField() && supportStoringArrayOffsets == false) { Mapper.SourceKeepMode mode = Mapper.SourceKeepMode.NONE; boolean objectWithFallbackSyntheticSource = false; if (mapper instanceof ObjectMapper objectMapper) { @@ -736,6 +747,7 @@ private static void parseNonDynamicArray( XContentParser parser = context.parser(); XContentParser.Token token; + XContentParser.Token previousToken = parser.currentToken(); int elements = 0; while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { if (token == XContentParser.Token.START_OBJECT) { @@ -754,6 +766,14 @@ private static void parseNonDynamicArray( elements++; parseValue(context, lastFieldName); } + previousToken = token; + } + if (mapper != null + && context.canAddIgnoredField() + && mapper.supportStoringArrayOffsets() + && previousToken == XContentParser.Token.START_ARRAY + && context.isImmediateParentAnArray()) { + context.getOffSetContext().maybeRecordEmptyArray(mapper.getOffsetFieldName()); } if (elements <= 1 && canRemoveSingleLeafElement) { context.removeLastIgnoredField(fullPath); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index 7b5e28dc1fbef..e20c592e46aab 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -91,6 +91,31 @@ public LuceneDocument doc() { protected void addDoc(LuceneDocument doc) { in.addDoc(doc); } + + @Override + public void processArrayOffsets(DocumentParserContext context) throws IOException { + in.processArrayOffsets(context); + } + + @Override + public FieldArrayContext getOffSetContext() { + return in.getOffSetContext(); + } + + @Override + public void setImmediateXContentParent(XContentParser.Token token) { + in.setImmediateXContentParent(token); + } + + @Override + public XContentParser.Token getImmediateXContentParent() { + return in.getImmediateXContentParent(); + } + + @Override + public boolean isImmediateParentAnArray() { + return in.isImmediateParentAnArray(); + } } /** @@ -141,6 +166,8 @@ private enum Scope { private final SeqNoFieldMapper.SequenceIDFields seqID; private final Set fieldsAppliedFromTemplates; + private FieldArrayContext fieldArrayContext; + /** * Fields that are copied from values of other fields via copy_to. * This per-document state is needed since it is possible @@ -460,6 +487,33 @@ public boolean isCopyToDestinationField(String name) { return copyToFields.contains(name); } + public void processArrayOffsets(DocumentParserContext context) throws IOException { + if (fieldArrayContext != null) { + fieldArrayContext.addToLuceneDocument(context); + } + } + + public FieldArrayContext getOffSetContext() { + if (fieldArrayContext == null) { + fieldArrayContext = new FieldArrayContext(); + } + return fieldArrayContext; + } + + private XContentParser.Token lastSetToken; + + public void setImmediateXContentParent(XContentParser.Token token) { + this.lastSetToken = token; + } + + public XContentParser.Token getImmediateXContentParent() { + return lastSetToken; + } + + public boolean isImmediateParentAnArray() { + return lastSetToken == XContentParser.Token.START_ARRAY; + } + /** * Add a new mapper dynamically created while parsing. * diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldArrayContext.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldArrayContext.java new file mode 100644 index 0000000000000..523ac19524ee2 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldArrayContext.java @@ -0,0 +1,93 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.util.BitUtil; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.io.stream.StreamInput; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +public class FieldArrayContext { + + private final Map offsetsPerField = new HashMap<>(); + + void recordOffset(String field, String value) { + Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets()); + int nextOffset = arrayOffsets.currentOffset++; + var offsets = arrayOffsets.valueToOffsets.computeIfAbsent(value, s -> new ArrayList<>(2)); + offsets.add(nextOffset); + } + + void recordNull(String field) { + Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets()); + int nextOffset = arrayOffsets.currentOffset++; + arrayOffsets.nullValueOffsets.add(nextOffset); + } + + void maybeRecordEmptyArray(String field) { + offsetsPerField.computeIfAbsent(field, k -> new Offsets()); + } + + void addToLuceneDocument(DocumentParserContext context) throws IOException { + for (var entry : offsetsPerField.entrySet()) { + var fieldName = entry.getKey(); + var offset = entry.getValue(); + + int currentOrd = 0; + // This array allows to retain the original ordering of elements in leaf arrays and retain duplicates. + int[] offsetToOrd = new int[offset.currentOffset]; + for (var offsetEntry : offset.valueToOffsets.entrySet()) { + for (var offsetAndLevel : offsetEntry.getValue()) { + offsetToOrd[offsetAndLevel] = currentOrd; + } + currentOrd++; + } + for (var nullOffset : offset.nullValueOffsets) { + offsetToOrd[nullOffset] = -1; + } + + try (var streamOutput = new BytesStreamOutput()) { + // Could just use vint for array length, but this allows for decoding my_field: null as -1 + streamOutput.writeVInt(BitUtil.zigZagEncode(offsetToOrd.length)); + for (int ord : offsetToOrd) { + streamOutput.writeVInt(BitUtil.zigZagEncode(ord)); + } + context.doc().add(new SortedDocValuesField(fieldName, streamOutput.bytes().toBytesRef())); + } + } + } + + static int[] parseOffsetArray(StreamInput in) throws IOException { + int[] offsetToOrd = new int[BitUtil.zigZagDecode(in.readVInt())]; + for (int i = 0; i < offsetToOrd.length; i++) { + offsetToOrd[i] = BitUtil.zigZagDecode(in.readVInt()); + } + return offsetToOrd; + } + + private static class Offsets { + + int currentOffset; + // Need to use TreeMap here, so that we maintain the order in which each value (with offset) stored inserted, + // (which is in the same order the document gets parsed) so we store offsets in right order. This is the same + // order in what the values get stored in SortedSetDocValues. + final Map> valueToOffsets = new TreeMap<>(); + final List nullValueOffsets = new ArrayList<>(2); + + } + +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 11db4d3d5aa64..03f463b25a967 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -200,7 +200,7 @@ public void parse(DocumentParserContext context) throws IOException { } } - private void doParseMultiFields(DocumentParserContext context) throws IOException { + protected void doParseMultiFields(DocumentParserContext context) throws IOException { context.path().add(leafName()); for (FieldMapper mapper : builderParams.multiFields.mappers) { mapper.parse(context); @@ -208,7 +208,7 @@ private void doParseMultiFields(DocumentParserContext context) throws IOExceptio context.path().remove(); } - private static void throwIndexingWithScriptParam() { + protected static void throwIndexingWithScriptParam() { throw new IllegalArgumentException("Cannot index data directly into a field with a [script] parameter"); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index df308a4bf983c..0b40d3aa4b474 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -95,6 +95,7 @@ public final class KeywordFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "keyword"; private static final String HOST_NAME = "host.name"; + public static final String OFFSETS_FIELD_NAME_SUFFIX = ".offsets"; public static class Defaults { public static final FieldType FIELD_TYPE; @@ -203,6 +204,7 @@ public static final class Builder extends FieldMapper.DimensionBuilder { private final ScriptCompiler scriptCompiler; private final IndexVersion indexCreatedVersion; private final boolean useDocValuesSkipper; + private final SourceKeepMode indexSourceKeepMode; public Builder(final String name, final MappingParserContext mappingParserContext) { this( @@ -213,7 +215,8 @@ public Builder(final String name, final MappingParserContext mappingParserContex mappingParserContext.getIndexSettings().getIndexVersionCreated(), mappingParserContext.getIndexSettings().getMode(), mappingParserContext.getIndexSettings().getIndexSortConfig(), - USE_DOC_VALUES_SKIPPER.get(mappingParserContext.getSettings()) + USE_DOC_VALUES_SKIPPER.get(mappingParserContext.getSettings()), + mappingParserContext.getIndexSettings().sourceKeepMode() ); } @@ -222,9 +225,20 @@ public Builder(final String name, final MappingParserContext mappingParserContex IndexAnalyzers indexAnalyzers, ScriptCompiler scriptCompiler, int ignoreAboveDefault, - IndexVersion indexCreatedVersion + IndexVersion indexCreatedVersion, + SourceKeepMode sourceKeepMode ) { - this(name, indexAnalyzers, scriptCompiler, ignoreAboveDefault, indexCreatedVersion, IndexMode.STANDARD, null, false); + this( + name, + indexAnalyzers, + scriptCompiler, + ignoreAboveDefault, + indexCreatedVersion, + IndexMode.STANDARD, + null, + false, + sourceKeepMode + ); } private Builder( @@ -235,7 +249,8 @@ private Builder( IndexVersion indexCreatedVersion, IndexMode indexMode, IndexSortConfig indexSortConfig, - boolean useDocValuesSkipper + boolean useDocValuesSkipper, + SourceKeepMode indexSourceKeepMode ) { super(name); this.indexAnalyzers = indexAnalyzers; @@ -273,10 +288,11 @@ private Builder( this.indexSortConfig = indexSortConfig; this.indexMode = indexMode; this.useDocValuesSkipper = useDocValuesSkipper; + this.indexSourceKeepMode = indexSourceKeepMode; } public Builder(String name, IndexVersion indexCreatedVersion) { - this(name, null, ScriptCompiler.NONE, Integer.MAX_VALUE, indexCreatedVersion); + this(name, null, ScriptCompiler.NONE, Integer.MAX_VALUE, indexCreatedVersion, SourceKeepMode.NONE); } public Builder ignoreAbove(int ignoreAbove) { @@ -422,6 +438,27 @@ public KeywordFieldMapper build(MapperBuilderContext context) { } super.hasScript = script.get() != null; super.onScriptError = onScriptError.getValue(); + + var sourceKeepMode = this.sourceKeepMode.orElse(indexSourceKeepMode); + String offsetsFieldName; + if (context.isSourceSynthetic() + && sourceKeepMode == SourceKeepMode.ARRAYS + && hasDocValues() + && fieldtype.stored() == false + && copyTo.copyToFields().isEmpty() + && multiFieldsBuilder.hasMultiFields() == false + && indexCreatedVersion.onOrAfter(IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD)) { + // Skip stored, we will be synthesizing from stored fields, no point to keep track of the offsets + // Skip copy_to and multi fields, supporting that requires more work. However, copy_to usage is rare in metrics and + // logging use cases + + // keep track of value offsets so that we can reconstruct arrays from doc values in order as was specified during indexing + // (if field is stored then there is no point of doing this) + offsetsFieldName = context.buildFullName(leafName() + OFFSETS_FIELD_NAME_SUFFIX); + } else { + offsetsFieldName = null; + } + return new KeywordFieldMapper( leafName(), fieldtype, @@ -429,7 +466,9 @@ public KeywordFieldMapper build(MapperBuilderContext context) { builderParams(this, context), context.isSourceSynthetic(), useDocValuesSkipper, - this + this, + offsetsFieldName, + indexSourceKeepMode ); } @@ -1028,6 +1067,8 @@ public boolean hasDocValuesSkipper() { private final IndexMode indexMode; private final IndexSortConfig indexSortConfig; private final boolean useDocValuesSkipper; + private final String offsetsFieldName; + private final SourceKeepMode indexSourceKeepMode; private KeywordFieldMapper( String simpleName, @@ -1036,7 +1077,9 @@ private KeywordFieldMapper( BuilderParams builderParams, boolean isSyntheticSource, boolean useDocValuesSkipper, - Builder builder + Builder builder, + String offsetsFieldName, + SourceKeepMode indexSourceKeepMode ) { super(simpleName, mappedFieldType, builderParams); assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0; @@ -1055,6 +1098,8 @@ private KeywordFieldMapper( this.indexMode = builder.indexMode; this.indexSortConfig = builder.indexSortConfig; this.useDocValuesSkipper = useDocValuesSkipper; + this.offsetsFieldName = offsetsFieldName; + this.indexSourceKeepMode = indexSourceKeepMode; } @Override @@ -1063,9 +1108,24 @@ public KeywordFieldType fieldType() { } @Override + public String getOffsetFieldName() { + return offsetsFieldName; + } + protected void parseCreateField(DocumentParserContext context) throws IOException { - final String value = context.parser().textOrNull(); - indexValue(context, value == null ? fieldType().nullValue : value); + String value = context.parser().textOrNull(); + if (value == null) { + value = fieldType().nullValue; + } + + boolean indexed = indexValue(context, value); + if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.getRecordedSource() == false) { + if (indexed) { + context.getOffSetContext().recordOffset(offsetsFieldName, value); + } else if (value == null) { + context.getOffSetContext().recordNull(offsetsFieldName); + } + } } @Override @@ -1078,13 +1138,13 @@ protected void indexScriptValues( this.fieldType().scriptValues.valuesForDoc(searchLookup, readerContext, doc, value -> indexValue(documentParserContext, value)); } - private void indexValue(DocumentParserContext context, String value) { + private boolean indexValue(DocumentParserContext context, String value) { if (value == null) { - return; + return false; } // if field is disabled, skip indexing if ((fieldType.indexOptions() == IndexOptions.NONE) && (fieldType.stored() == false) && (fieldType().hasDocValues() == false)) { - return; + return false; } if (value.length() > fieldType().ignoreAbove()) { @@ -1093,7 +1153,7 @@ private void indexValue(DocumentParserContext context, String value) { // Save a copy of the field so synthetic source can load it context.doc().add(new StoredField(originalName(), new BytesRef(value))); } - return; + return false; } value = normalizeValue(fieldType().normalizer(), fullPath(), value); @@ -1131,6 +1191,8 @@ private void indexValue(DocumentParserContext context, String value) { if (fieldType().hasDocValues() == false && fieldType.omitNorms()) { context.addToFieldNames(fieldType().name()); } + + return true; } private static String normalizeValue(NamedAnalyzer normalizer, String field, String value) { @@ -1180,7 +1242,8 @@ public FieldMapper.Builder getMergeBuilder() { indexCreatedVersion, indexMode, indexSortConfig, - useDocValuesSkipper + useDocValuesSkipper, + indexSourceKeepMode ).dimension(fieldType().isDimension()).init(this); } @@ -1234,19 +1297,23 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException { } }); } else if (hasDocValues) { - layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) { + if (offsetsFieldName != null) { + layers.add(new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName)); + } else { + layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) { - @Override - protected BytesRef convert(BytesRef value) { - return value; - } + @Override + protected BytesRef convert(BytesRef value) { + return value; + } - @Override - protected BytesRef preserve(BytesRef value) { - // Preserve must make a deep copy because convert gets a shallow copy from the iterator - return BytesRef.deepCopyOf(value); - } - }); + @Override + protected BytesRef preserve(BytesRef value) { + // Preserve must make a deep copy because convert gets a shallow copy from the iterator + return BytesRef.deepCopyOf(value); + } + }); + } } if (fieldType().ignoreAbove != Integer.MAX_VALUE) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java b/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java index bafa74b662f00..cf3261d88bf10 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java @@ -212,4 +212,19 @@ public static FieldType freezeAndDeduplicateFieldType(FieldType fieldType) { * Defines how this mapper counts towards {@link MapperService#INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING}. */ public abstract int getTotalFieldsCount(); + + /** + * @return whether this mapper supports storing leaf array elements natively when synthetic source is enabled. + */ + public final boolean supportStoringArrayOffsets() { + return getOffsetFieldName() != null; + } + + /** + * @return the offset field name used to store offsets iff {@link #supportStoringArrayOffsets()} returns + * true. + */ + public String getOffsetFieldName() { + return null; + } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer.java b/server/src/main/java/org/elasticsearch/index/mapper/SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer.java new file mode 100644 index 0000000000000..09a63eb6ab4a7 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer.java @@ -0,0 +1,167 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.ByteArrayStreamInput; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Objects; + +/** + * Load {@code _source} fields from {@link SortedSetDocValues} and associated {@link BinaryDocValues}. The former contains the unique values + * in sorted order and the latter the offsets for each instance of the values. This allows synthesizing array elements in order as was + * specified at index time. Note that this works only for leaf arrays. + */ +final class SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer implements CompositeSyntheticFieldLoader.DocValuesLayer { + + private final String name; + private final String offsetsFieldName; + private DocValuesWithOffsetsLoader docValues; + + SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(String name, String offsetsFieldName) { + this.name = Objects.requireNonNull(name); + this.offsetsFieldName = Objects.requireNonNull(offsetsFieldName); + } + + @Override + public String fieldName() { + return name; + } + + @Override + public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { + SortedSetDocValues valueDocValues = DocValues.getSortedSet(leafReader, name); + SortedDocValues offsetDocValues = DocValues.getSorted(leafReader, offsetsFieldName); + + return docValues = new DocValuesWithOffsetsLoader(valueDocValues, offsetDocValues); + } + + @Override + public boolean hasValue() { + if (docValues != null) { + return docValues.count() > 0; + } else { + return false; + } + } + + @Override + public long valueCount() { + if (docValues != null) { + return docValues.count(); + } else { + return 0; + } + } + + @Override + public void write(XContentBuilder b) throws IOException { + if (docValues != null) { + docValues.write(b); + } + } + + static final class DocValuesWithOffsetsLoader implements DocValuesLoader { + private final SortedDocValues offsetDocValues; + private final SortedSetDocValues valueDocValues; + private final ByteArrayStreamInput scratch = new ByteArrayStreamInput(); + + private boolean hasValue; + private boolean hasOffset; + private int[] offsetToOrd; + + DocValuesWithOffsetsLoader(SortedSetDocValues valueDocValues, SortedDocValues offsetDocValues) { + this.valueDocValues = valueDocValues; + this.offsetDocValues = offsetDocValues; + } + + @Override + public boolean advanceToDoc(int docId) throws IOException { + hasValue = valueDocValues.advanceExact(docId); + hasOffset = offsetDocValues.advanceExact(docId); + if (hasValue || hasOffset) { + if (hasOffset) { + int offsetOrd = offsetDocValues.ordValue(); + var encodedValue = offsetDocValues.lookupOrd(offsetOrd); + scratch.reset(encodedValue.bytes, encodedValue.offset, encodedValue.length); + offsetToOrd = FieldArrayContext.parseOffsetArray(scratch); + } else { + offsetToOrd = null; + } + return true; + } else { + offsetToOrd = null; + return false; + } + } + + public int count() { + if (hasValue) { + if (offsetToOrd != null) { + // HACK: trick CompositeSyntheticFieldLoader to serialize this layer as array. + // (if offsetToOrd is not null, then at index time an array was always specified even if there is just one value) + return offsetToOrd.length + 1; + } else { + return valueDocValues.docValueCount(); + } + } else { + if (hasOffset) { + // trick CompositeSyntheticFieldLoader to serialize this layer as empty array. + return 2; + } else { + return 0; + } + } + } + + public void write(XContentBuilder b) throws IOException { + if (hasValue == false && hasOffset == false) { + return; + } + if (offsetToOrd != null && hasValue) { + long[] ords = new long[valueDocValues.docValueCount()]; + for (int i = 0; i < valueDocValues.docValueCount(); i++) { + ords[i] = valueDocValues.nextOrd(); + } + + for (int offset : offsetToOrd) { + if (offset == -1) { + b.nullValue(); + continue; + } + + long ord = ords[offset]; + BytesRef c = valueDocValues.lookupOrd(ord); + // This is keyword specific and needs to be updated once support is added for other field types: + b.utf8Value(c.bytes, c.offset, c.length); + } + } else if (offsetToOrd != null) { + // in case all values are NULLs + for (int offset : offsetToOrd) { + assert offset == -1; + b.nullValue(); + } + } else { + for (int i = 0; i < valueDocValues.docValueCount(); i++) { + BytesRef c = valueDocValues.lookupOrd(valueDocValues.nextOrd()); + b.utf8Value(c.bytes, c.offset, c.length); + } + } + } + } + +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/FieldArrayContextTests.java b/server/src/test/java/org/elasticsearch/index/mapper/FieldArrayContextTests.java new file mode 100644 index 0000000000000..a1fa3024d7973 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/FieldArrayContextTests.java @@ -0,0 +1,67 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.elasticsearch.common.io.stream.ByteArrayStreamInput; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; + +import static org.elasticsearch.index.mapper.FieldArrayContext.parseOffsetArray; + +public class FieldArrayContextTests extends ESTestCase { + + public void testOffsets() throws IOException { + var context = new FieldArrayContext(); + context.recordOffset("field", "a"); + context.recordOffset("field", "a"); + context.recordOffset("field", "b"); + context.recordOffset("field", "z"); + context.recordOffset("field", "a"); + context.recordOffset("field", "b"); + + var parserContext = new TestDocumentParserContext(); + context.addToLuceneDocument(parserContext); + + var binaryDocValues = parserContext.doc().getField("field"); + int[] offsetToOrd = parseOffsetArray(new ByteArrayStreamInput(binaryDocValues.binaryValue().bytes)); + assertArrayEquals(new int[] { 0, 0, 1, 2, 0, 1 }, offsetToOrd); + } + + public void testOffsetsWithNull() throws IOException { + var context = new FieldArrayContext(); + context.recordNull("field"); + context.recordOffset("field", "a"); + context.recordOffset("field", "b"); + context.recordOffset("field", "z"); + context.recordNull("field"); + context.recordOffset("field", "b"); + + var parserContext = new TestDocumentParserContext(); + context.addToLuceneDocument(parserContext); + + var binaryDocValues = parserContext.doc().getField("field"); + int[] offsetToOrd = parseOffsetArray(new ByteArrayStreamInput(binaryDocValues.binaryValue().bytes)); + assertArrayEquals(new int[] { -1, 0, 1, 2, -1, 1 }, offsetToOrd); + } + + public void testEmptyOffset() throws IOException { + var context = new FieldArrayContext(); + context.maybeRecordEmptyArray("field"); + + var parserContext = new TestDocumentParserContext(); + context.addToLuceneDocument(parserContext); + + var binaryDocValues = parserContext.doc().getField("field"); + int[] offsetToOrd = parseOffsetArray(new ByteArrayStreamInput(binaryDocValues.binaryValue().bytes)); + assertArrayEquals(new int[] {}, offsetToOrd); + } + +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java index 198988832cb55..af9fb63d77f27 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java @@ -971,4 +971,10 @@ public void testFieldTypeDefault_IndexedFalseDocValuesFalse() throws IOException assertFalse(mapper.fieldType().isIndexed()); assertFalse(mapper.fieldType().hasDocValuesSkipper()); } + + @Override + protected String randomSyntheticSourceKeep() { + // Only option all keeps array source in ignored source. + return randomFrom("all"); + } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java index e3bdb3d45818f..092d9a1210815 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java @@ -244,7 +244,8 @@ public void testFetchSourceValue() throws IOException { createIndexAnalyzers(), ScriptCompiler.NONE, Integer.MAX_VALUE, - IndexVersion.current() + IndexVersion.current(), + randomFrom(Mapper.SourceKeepMode.values()) ).normalizer("lowercase").build(MapperBuilderContext.root(false, false)).fieldType(); assertEquals(List.of("value"), fetchSourceValue(normalizerMapper, "VALUE")); assertEquals(List.of("42"), fetchSourceValue(normalizerMapper, 42L)); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordOffsetDocValuesLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordOffsetDocValuesLoaderTests.java new file mode 100644 index 0000000000000..8300e8e8e4614 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordOffsetDocValuesLoaderTests.java @@ -0,0 +1,237 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.DirectoryReader; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.mapper.SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer.DocValuesWithOffsetsLoader; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentType; + +import java.io.IOException; + +import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; +import static org.hamcrest.Matchers.nullValue; + +public class KeywordOffsetDocValuesLoaderTests extends MapperServiceTestCase { + + @Override + protected Settings getIndexSettings() { + return Settings.builder() + .put("index.mapping.source.mode", "synthetic") + .put("index.mapping.synthetic_source_keep", "arrays") + .build(); + } + + public void testOffsetArrayNoDocValues() throws Exception { + String mapping = """ + { + "_doc": { + "properties": { + "field": { + "type": "keyword", + "doc_values": false + } + } + } + } + """; + try (var mapperService = createMapperService(mapping)) { + var fieldMapper = mapperService.mappingLookup().getMapper("field"); + assertThat(fieldMapper.getOffsetFieldName(), nullValue()); + } + } + + public void testOffsetArrayStored() throws Exception { + String mapping = """ + { + "_doc": { + "properties": { + "field": { + "type": "keyword", + "store": true + } + } + } + } + """; + try (var mapperService = createMapperService(mapping)) { + var fieldMapper = mapperService.mappingLookup().getMapper("field"); + assertThat(fieldMapper.getOffsetFieldName(), nullValue()); + } + } + + public void testOffsetMultiFields() throws Exception { + String mapping = """ + { + "_doc": { + "properties": { + "field": { + "type": "keyword", + "fields": { + "sub": { + "type": "text" + } + } + } + } + } + } + """; + try (var mapperService = createMapperService(mapping)) { + var fieldMapper = mapperService.mappingLookup().getMapper("field"); + assertThat(fieldMapper.getOffsetFieldName(), nullValue()); + } + } + + public void testOffsetArrayNoSyntheticSource() throws Exception { + String mapping = """ + { + "_doc": { + "properties": { + "field": { + "type": "keyword" + } + } + } + } + """; + try (var mapperService = createMapperService(Settings.EMPTY, mapping)) { + var fieldMapper = mapperService.mappingLookup().getMapper("field"); + assertThat(fieldMapper.getOffsetFieldName(), nullValue()); + } + } + + public void testOffsetArrayNoSourceArrayKeep() throws Exception { + var settingsBuilder = Settings.builder().put("index.mapping.source.mode", "synthetic"); + String mapping; + if (randomBoolean()) { + mapping = """ + { + "_doc": { + "properties": { + "field": { + "type": "keyword", + "synthetic_source_keep": "{{synthetic_source_keep}}" + } + } + } + } + """.replace("{{synthetic_source_keep}}", randomBoolean() ? "none" : "all"); + } else { + mapping = """ + { + "_doc": { + "properties": { + "field": { + "type": "keyword" + } + } + } + } + """; + if (randomBoolean()) { + settingsBuilder.put("index.mapping.synthetic_source_keep", "none"); + } + } + try (var mapperService = createMapperService(settingsBuilder.build(), mapping)) { + var fieldMapper = mapperService.mappingLookup().getMapper("field"); + assertThat(fieldMapper.getOffsetFieldName(), nullValue()); + } + } + + public void testOffsetArray() throws Exception { + verifyOffsets("{\"field\":[\"z\",\"x\",\"y\",\"c\",\"b\",\"a\"]}"); + verifyOffsets("{\"field\":[\"z\",null,\"y\",\"c\",null,\"a\"]}"); + } + + public void testOffsetNestedArray() throws Exception { + verifyOffsets("{\"field\":[\"z\",[\"y\"],[\"c\"],null,\"a\"]}", "{\"field\":[\"z\",\"y\",\"c\",null,\"a\"]}"); + verifyOffsets( + "{\"field\":[\"z\",[\"y\", [\"k\"]],[\"c\", [\"l\"]],null,\"a\"]}", + "{\"field\":[\"z\",\"y\",\"k\",\"c\",\"l\",null,\"a\"]}" + ); + } + + public void testOffsetEmptyArray() throws Exception { + verifyOffsets("{\"field\":[]}"); + } + + public void testOffsetArrayWithNulls() throws Exception { + verifyOffsets("{\"field\":[null,null,null]}"); + } + + public void testOffsetArrayRandom() throws Exception { + StringBuilder values = new StringBuilder(); + int numValues = randomIntBetween(0, 256); + for (int i = 0; i < numValues; i++) { + if (randomInt(10) == 1) { + values.append("null"); + } else { + values.append('"').append(randomAlphanumericOfLength(2)).append('"'); + } + if (i != (numValues - 1)) { + values.append(','); + } + } + verifyOffsets("{\"field\":[" + values + "]}"); + } + + private void verifyOffsets(String source) throws IOException { + verifyOffsets(source, source); + } + + private void verifyOffsets(String source, String expectedSource) throws IOException { + String mapping = """ + { + "_doc": { + "properties": { + "field": { + "type": "keyword" + } + } + } + } + """; + verifyOffsets(mapping, source, expectedSource); + } + + private void verifyOffsets(String mapping, String source, String expectedSource) throws IOException { + try (var mapperService = createMapperService(mapping)) { + var mapper = mapperService.documentMapper(); + + try (var directory = newDirectory()) { + var iw = indexWriterForSyntheticSource(directory); + var doc = mapper.parse(new SourceToParse("_id", new BytesArray(source), XContentType.JSON)); + doc.updateSeqID(0, 0); + doc.version().setLongValue(0); + iw.addDocuments(doc.docs()); + iw.close(); + try (var indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) { + var layer = new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer("field", "field.offsets"); + var leafReader = indexReader.leaves().getFirst().reader(); + var loader = (DocValuesWithOffsetsLoader) layer.docValuesLoader(leafReader, new int[] { 0 }); + assertTrue(loader.advanceToDoc(0)); + assertTrue(loader.count() > 0); + XContentBuilder builder = jsonBuilder().startObject(); + builder.startArray("field"); + loader.write(builder); + builder.endArray().endObject(); + + var actual = Strings.toString(builder); + assertEquals(expectedSource, actual); + } + } + } + } + +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java new file mode 100644 index 0000000000000..8ebcfb4845c8c --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java @@ -0,0 +1,331 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReader; +import org.elasticsearch.action.admin.indices.forcemerge.ForceMergeRequest; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.support.WriteRequest; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.query.IdsQueryBuilder; +import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasKey; +import static org.hamcrest.Matchers.nullValue; + +public class KeywordSyntheticSourceNativeArrayIntegrationTests extends ESSingleNodeTestCase { + + public void testSynthesizeArray() throws Exception { + var arrayValues = new Object[][] { + new Object[] { "z", "y", null, "x", null, "v" }, + new Object[] { null, "b", null, "a" }, + new Object[] { null }, + new Object[] { null, null, null }, + new Object[] { "c", "b", "a" } }; + verifySyntheticArray(arrayValues); + } + + public void testSynthesizeEmptyArray() throws Exception { + var arrayValues = new Object[][] { new Object[] {} }; + verifySyntheticArray(arrayValues); + } + + public void testSynthesizeArrayRandom() throws Exception { + var arrayValues = new Object[][] { generateRandomStringArray(64, 8, false, true) }; + verifySyntheticArray(arrayValues); + } + + public void testSynthesizeArrayIgnoreAbove() throws Exception { + var mapping = jsonBuilder().startObject() + .startObject("properties") + .startObject("field") + .field("type", "keyword") + .field("ignore_above", 4) + .endObject() + .endObject() + .endObject(); + // Note values that would be ignored are added at the end of arrays, + // this makes testing easier as ignored values are always synthesized after regular values: + var arrayValues = new Object[][] { + new Object[] { null, "a", "ab", "abc", "abcd", null, "abcde" }, + new Object[] { "12345", "12345", "12345" }, + new Object[] { "123", "1234", "12345" }, + new Object[] { null, null, null, "blabla" }, + new Object[] { "1", "2", "3", "blabla" } }; + verifySyntheticArray(arrayValues, mapping, 4, "_id", "field._original"); + } + + public void testSynthesizeObjectArray() throws Exception { + List> documents = new ArrayList<>(); + { + List document = new ArrayList<>(); + document.add(new Object[] { "z", "y", "x" }); + document.add(new Object[] { "m", "l", "m" }); + document.add(new Object[] { "c", "b", "a" }); + documents.add(document); + } + { + List document = new ArrayList<>(); + document.add(new Object[] { "9", "7", "5" }); + document.add(new Object[] { "2", "4", "6" }); + document.add(new Object[] { "7", "6", "5" }); + documents.add(document); + } + verifySyntheticObjectArray(documents); + } + + public void testSynthesizeArrayInObjectField() throws Exception { + List documents = new ArrayList<>(); + documents.add(new Object[] { "z", "y", "x" }); + documents.add(new Object[] { "m", "l", "m" }); + documents.add(new Object[] { "c", "b", "a" }); + documents.add(new Object[] { "9", "7", "5" }); + documents.add(new Object[] { "2", "4", "6" }); + documents.add(new Object[] { "7", "6", "5" }); + verifySyntheticArrayInObject(documents); + } + + public void testSynthesizeArrayInObjectFieldRandom() throws Exception { + List documents = new ArrayList<>(); + int numDocs = randomIntBetween(8, 256); + for (int i = 0; i < numDocs; i++) { + documents.add(generateRandomStringArray(64, 8, false, true)); + } + verifySyntheticArrayInObject(documents); + } + + private void verifySyntheticArray(Object[][] arrays) throws IOException { + var mapping = jsonBuilder().startObject() + .startObject("properties") + .startObject("field") + .field("type", "keyword") + .endObject() + .endObject() + .endObject(); + verifySyntheticArray(arrays, mapping, null, "_id"); + } + + private void verifySyntheticArray(Object[][] arrays, XContentBuilder mapping, Integer ignoreAbove, String... expectedStoredFields) + throws IOException { + var indexService = createIndex( + "test-index", + Settings.builder().put("index.mapping.source.mode", "synthetic").put("index.mapping.synthetic_source_keep", "arrays").build(), + mapping + ); + for (int i = 0; i < arrays.length; i++) { + var array = arrays[i]; + + var indexRequest = new IndexRequest("test-index"); + indexRequest.id("my-id-" + i); + var source = jsonBuilder().startObject(); + if (array != null) { + source.startArray("field"); + for (Object arrayValue : array) { + source.value(arrayValue); + } + source.endArray(); + } else { + source.field("field").nullValue(); + } + indexRequest.source(source.endObject()); + indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); + client().index(indexRequest).actionGet(); + + var searchRequest = new SearchRequest("test-index"); + searchRequest.source().query(new IdsQueryBuilder().addIds("my-id-" + i)); + var searchResponse = client().search(searchRequest).actionGet(); + try { + var hit = searchResponse.getHits().getHits()[0]; + assertThat(hit.getId(), equalTo("my-id-" + i)); + var sourceAsMap = hit.getSourceAsMap(); + assertThat(sourceAsMap, hasKey("field")); + var actualArray = (List) sourceAsMap.get("field"); + if (array == null) { + assertThat(actualArray, nullValue()); + } else if (array.length == 0) { + assertThat(actualArray, empty()); + } else { + assertThat(actualArray, Matchers.contains(array)); + } + } finally { + searchResponse.decRef(); + } + } + + try (var searcher = indexService.getShard(0).acquireSearcher(getTestName())) { + var reader = searcher.getDirectoryReader(); + for (int i = 0; i < arrays.length; i++) { + var document = reader.storedFields().document(i); + // Verify that there is no ignored source: + Set storedFieldNames = new LinkedHashSet<>(document.getFields().stream().map(IndexableField::name).toList()); + assertThat(storedFieldNames, contains(expectedStoredFields)); + } + var fieldInfo = FieldInfos.getMergedFieldInfos(reader).fieldInfo("field.offsets"); + assertThat(fieldInfo.getDocValuesType(), equalTo(DocValuesType.SORTED)); + } + } + + private void verifySyntheticObjectArray(List> documents) throws IOException { + var indexService = createIndex( + "test-index", + Settings.builder().put("index.mapping.source.mode", "synthetic").put("index.mapping.synthetic_source_keep", "arrays").build(), + jsonBuilder().startObject() + .startObject("properties") + .startObject("object") + .startObject("properties") + .startObject("field") + .field("type", "keyword") + .endObject() + .endObject() + .endObject() + .endObject() + .endObject() + ); + for (int i = 0; i < documents.size(); i++) { + var document = documents.get(i); + + var indexRequest = new IndexRequest("test-index"); + indexRequest.id("my-id-" + i); + var source = jsonBuilder().startObject(); + source.startArray("object"); + for (Object[] arrayValue : document) { + source.startObject(); + source.array("field", arrayValue); + source.endObject(); + } + source.endArray(); + indexRequest.source(source.endObject()); + indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); + client().index(indexRequest).actionGet(); + + var searchRequest = new SearchRequest("test-index"); + searchRequest.source().query(new IdsQueryBuilder().addIds("my-id-" + i)); + var searchResponse = client().search(searchRequest).actionGet(); + try { + var hit = searchResponse.getHits().getHits()[0]; + assertThat(hit.getId(), equalTo("my-id-" + i)); + var sourceAsMap = hit.getSourceAsMap(); + var objectArray = (List) sourceAsMap.get("object"); + for (int j = 0; j < document.size(); j++) { + var expected = document.get(j); + List actual = (List) ((Map) objectArray.get(j)).get("field"); + assertThat(actual, Matchers.contains(expected)); + } + } finally { + searchResponse.decRef(); + } + } + + indexService.getShard(0).forceMerge(new ForceMergeRequest("test-index").maxNumSegments(1)); + try (var searcher = indexService.getShard(0).acquireSearcher(getTestName())) { + var reader = searcher.getDirectoryReader(); + for (int i = 0; i < documents.size(); i++) { + var document = reader.storedFields().document(i); + // Verify that there is ignored source because of leaf array being wrapped by object array: + List storedFieldNames = document.getFields().stream().map(IndexableField::name).toList(); + assertThat(storedFieldNames, contains("_id", "_ignored_source")); + + // Verify that there is no offset field: + LeafReader leafReader = reader.leaves().get(0).reader(); + for (FieldInfo fieldInfo : leafReader.getFieldInfos()) { + String name = fieldInfo.getName(); + assertFalse("expected no field that contains [offsets] in name, but found [" + name + "]", name.contains("offsets")); + } + + var binaryDocValues = leafReader.getBinaryDocValues("object.field.offsets"); + assertThat(binaryDocValues, nullValue()); + } + } + } + + private void verifySyntheticArrayInObject(List documents) throws IOException { + var indexService = createIndex( + "test-index", + Settings.builder().put("index.mapping.source.mode", "synthetic").put("index.mapping.synthetic_source_keep", "arrays").build(), + jsonBuilder().startObject() + .startObject("properties") + .startObject("object") + .startObject("properties") + .startObject("field") + .field("type", "keyword") + .endObject() + .endObject() + .endObject() + .endObject() + .endObject() + ); + for (int i = 0; i < documents.size(); i++) { + var arrayValue = documents.get(i); + + var indexRequest = new IndexRequest("test-index"); + indexRequest.id("my-id-" + i); + var source = jsonBuilder().startObject(); + source.startObject("object"); + source.array("field", arrayValue); + source.endObject(); + indexRequest.source(source.endObject()); + indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); + client().index(indexRequest).actionGet(); + + var searchRequest = new SearchRequest("test-index"); + searchRequest.source().query(new IdsQueryBuilder().addIds("my-id-" + i)); + var searchResponse = client().search(searchRequest).actionGet(); + try { + var hit = searchResponse.getHits().getHits()[0]; + assertThat(hit.getId(), equalTo("my-id-" + i)); + var sourceAsMap = hit.getSourceAsMap(); + var objectArray = (Map) sourceAsMap.get("object"); + + List actual = (List) objectArray.get("field"); + if (arrayValue == null) { + assertThat(actual, nullValue()); + } else if (arrayValue.length == 0) { + assertThat(actual, empty()); + } else { + assertThat(actual, Matchers.contains(arrayValue)); + } + } finally { + searchResponse.decRef(); + } + } + + indexService.getShard(0).forceMerge(new ForceMergeRequest("test-index").maxNumSegments(1)); + try (var searcher = indexService.getShard(0).acquireSearcher(getTestName())) { + var reader = searcher.getDirectoryReader(); + for (int i = 0; i < documents.size(); i++) { + var document = reader.storedFields().document(i); + // Verify that there is no ignored source: + Set storedFieldNames = new LinkedHashSet<>(document.getFields().stream().map(IndexableField::name).toList()); + assertThat(storedFieldNames, contains("_id")); + } + var fieldInfo = FieldInfos.getMergedFieldInfos(reader).fieldInfo("object.field.offsets"); + assertThat(fieldInfo.getDocValuesType(), equalTo(DocValuesType.SORTED)); + } + } + +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldsTests.java b/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldsTests.java index fd024c5d23e28..4c5bfeb66b075 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldsTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldsTests.java @@ -64,7 +64,8 @@ private KeywordFieldMapper.Builder getKeywordFieldMapperBuilder(boolean isStored IndexAnalyzers.of(Map.of(), Map.of("normalizer", Lucene.STANDARD_ANALYZER), Map.of()), ScriptCompiler.NONE, Integer.MAX_VALUE, - IndexVersion.current() + IndexVersion.current(), + Mapper.SourceKeepMode.NONE ); if (isStored) { keywordFieldMapperBuilder.stored(true); diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index 809660c5e9af8..7bf3ed1de5b66 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -1707,7 +1707,7 @@ public void testSyntheticSourceKeepArrays() throws IOException { SyntheticSourceExample example = syntheticSourceSupportForKeepTests(shouldUseIgnoreMalformed()).example(1); DocumentMapper mapperAll = createSytheticSourceMapperService(mapping(b -> { b.startObject("field"); - b.field("synthetic_source_keep", randomFrom("arrays", "all")); // Both options keep array source. + b.field("synthetic_source_keep", randomSyntheticSourceKeep()); example.mapping().accept(b); b.endObject(); })).documentMapper(); @@ -1726,6 +1726,10 @@ public void testSyntheticSourceKeepArrays() throws IOException { assertThat(actual, equalTo(expected)); } + protected String randomSyntheticSourceKeep() { + return randomFrom("all", "arrays"); + } + @Override protected final T compileScript(Script script, ScriptContext context) { return ingestScriptSupport().compileScript(script, context);