diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldArrayContext.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldArrayContext.java index 523ac19524ee2..4d088b13e91e3 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldArrayContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldArrayContext.java @@ -23,9 +23,10 @@ public class FieldArrayContext { + public static final String OFFSETS_FIELD_NAME_SUFFIX = ".offsets"; private final Map offsetsPerField = new HashMap<>(); - void recordOffset(String field, String value) { + void recordOffset(String field, Comparable value) { Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets()); int nextOffset = arrayOffsets.currentOffset++; var offsets = arrayOffsets.valueToOffsets.computeIfAbsent(value, s -> new ArrayList<>(2)); @@ -85,7 +86,7 @@ private static class Offsets { // Need to use TreeMap here, so that we maintain the order in which each value (with offset) stored inserted, // (which is in the same order the document gets parsed) so we store offsets in right order. This is the same // order in what the values get stored in SortedSetDocValues. - final Map> valueToOffsets = new TreeMap<>(); + final Map, List> valueToOffsets = new TreeMap<>(); final List nullValueOffsets = new ArrayList<>(2); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java index 3ded3d2699b21..f4e8d05cea54d 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java @@ -92,8 +92,15 @@ public static final class Builder extends FieldMapper.DimensionBuilder { private final boolean ignoreMalformedByDefault; private final IndexVersion indexCreatedVersion; private final ScriptCompiler scriptCompiler; + private final SourceKeepMode indexSourceKeepMode; - public Builder(String name, ScriptCompiler scriptCompiler, boolean ignoreMalformedByDefault, IndexVersion indexCreatedVersion) { + public Builder( + String name, + ScriptCompiler scriptCompiler, + boolean ignoreMalformedByDefault, + IndexVersion indexCreatedVersion, + SourceKeepMode indexSourceKeepMode + ) { super(name); this.scriptCompiler = Objects.requireNonNull(scriptCompiler); this.ignoreMalformedByDefault = ignoreMalformedByDefault; @@ -114,6 +121,7 @@ public Builder(String name, ScriptCompiler scriptCompiler, boolean ignoreMalform ); } }); + this.indexSourceKeepMode = indexSourceKeepMode; } Builder nullValue(String nullValue) { @@ -184,6 +192,27 @@ public IpFieldMapper build(MapperBuilderContext context) { } hasScript = script.get() != null; onScriptError = onScriptErrorParam.getValue(); + + var sourceKeepMode = this.sourceKeepMode.orElse(indexSourceKeepMode); + String offsetsFieldName; + if (context.isSourceSynthetic() + && sourceKeepMode == SourceKeepMode.ARRAYS + && hasDocValues.get() + && stored.get() == false + && copyTo.copyToFields().isEmpty() + && multiFieldsBuilder.hasMultiFields() == false + && indexCreatedVersion.onOrAfter(IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD)) { + // Skip stored, we will be synthesizing from stored fields, no point to keep track of the offsets + // Skip copy_to and multi fields, supporting that requires more work. However, copy_to usage is rare in metrics and + // logging use cases + + // keep track of value offsets so that we can reconstruct arrays from doc values in order as was specified during indexing + // (if field is stored then there is no point of doing this) + offsetsFieldName = context.buildFullName(leafName() + FieldArrayContext.OFFSETS_FIELD_NAME_SUFFIX); + } else { + offsetsFieldName = null; + } + return new IpFieldMapper( leafName(), new IpFieldType( @@ -198,7 +227,8 @@ public IpFieldMapper build(MapperBuilderContext context) { ), builderParams(this, context), context.isSourceSynthetic(), - this + this, + offsetsFieldName ); } @@ -206,7 +236,7 @@ public IpFieldMapper build(MapperBuilderContext context) { public static final TypeParser PARSER = createTypeParserWithLegacySupport((n, c) -> { boolean ignoreMalformedByDefault = IGNORE_MALFORMED_SETTING.get(c.getSettings()); - return new Builder(n, c.scriptCompiler(), ignoreMalformedByDefault, c.indexVersionCreated()); + return new Builder(n, c.scriptCompiler(), ignoreMalformedByDefault, c.indexVersionCreated(), c.getIndexSettings().sourceKeepMode()); }); public static final class IpFieldType extends SimpleMappedFieldType { @@ -501,13 +531,16 @@ public TermsEnum getTerms(IndexReader reader, String prefix, boolean caseInsensi private final Script script; private final FieldValues scriptValues; private final ScriptCompiler scriptCompiler; + private final SourceKeepMode indexSourceKeepMode; + private final String offsetsFieldName; private IpFieldMapper( String simpleName, MappedFieldType mappedFieldType, BuilderParams builderParams, boolean storeIgnored, - Builder builder + Builder builder, + String offsetsFieldName ) { super(simpleName, mappedFieldType, builderParams); this.ignoreMalformedByDefault = builder.ignoreMalformedByDefault; @@ -523,6 +556,8 @@ private IpFieldMapper( this.scriptCompiler = builder.scriptCompiler; this.dimension = builder.dimension.getValue(); this.storeIgnored = storeIgnored; + this.indexSourceKeepMode = builder.indexSourceKeepMode; + this.offsetsFieldName = offsetsFieldName; } @Override @@ -561,6 +596,14 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio if (address != null) { indexValue(context, address); } + if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.getRecordedSource() == false) { + if (address != null) { + BytesRef sortableValue = new BytesRef(InetAddressPoint.encode(address)); + context.getOffSetContext().recordOffset(offsetsFieldName, sortableValue); + } else { + context.getOffSetContext().recordNull(offsetsFieldName); + } + } } private void indexValue(DocumentParserContext context, InetAddress address) { @@ -593,7 +636,9 @@ protected void indexScriptValues( @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), scriptCompiler, ignoreMalformedByDefault, indexCreatedVersion).dimension(dimension).init(this); + return new Builder(leafName(), scriptCompiler, ignoreMalformedByDefault, indexCreatedVersion, indexSourceKeepMode).dimension( + dimension + ).init(this); } @Override @@ -610,19 +655,24 @@ protected SyntheticSourceSupport syntheticSourceSupport() { if (hasDocValues) { return new SyntheticSourceSupport.Native(() -> { var layers = new ArrayList(); - layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) { - @Override - protected BytesRef convert(BytesRef value) { - byte[] bytes = Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length); - return new BytesRef(NetworkAddress.format(InetAddressPoint.decode(bytes))); - } - - @Override - protected BytesRef preserve(BytesRef value) { - // No need to copy because convert has made a deep copy - return value; - } - }); + if (offsetsFieldName != null) { + layers.add( + new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName, IpFieldMapper::convert) + ); + } else { + layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) { + @Override + protected BytesRef convert(BytesRef value) { + return IpFieldMapper.convert(value); + } + + @Override + protected BytesRef preserve(BytesRef value) { + // No need to copy because convert has made a deep copy + return value; + } + }); + } if (ignoreMalformed) { layers.add(new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath())); @@ -633,4 +683,14 @@ protected BytesRef preserve(BytesRef value) { return super.syntheticSourceSupport(); } + + static BytesRef convert(BytesRef value) { + byte[] bytes = Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length); + return new BytesRef(NetworkAddress.format(InetAddressPoint.decode(bytes))); + } + + @Override + public String getOffsetFieldName() { + return offsetsFieldName; + } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 195ec5a27a72c..fc03ffcb044b6 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -95,7 +95,6 @@ public final class KeywordFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "keyword"; private static final String HOST_NAME = "host.name"; - public static final String OFFSETS_FIELD_NAME_SUFFIX = ".offsets"; public static class Defaults { public static final FieldType FIELD_TYPE; @@ -454,7 +453,7 @@ && indexVersionSupportStoringArraysNatively()) { // keep track of value offsets so that we can reconstruct arrays from doc values in order as was specified during indexing // (if field is stored then there is no point of doing this) - offsetsFieldName = context.buildFullName(leafName() + OFFSETS_FIELD_NAME_SUFFIX); + offsetsFieldName = context.buildFullName(leafName() + FieldArrayContext.OFFSETS_FIELD_NAME_SUFFIX); } else { offsetsFieldName = null; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer.java b/server/src/main/java/org/elasticsearch/index/mapper/SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer.java index 09a63eb6ab4a7..5407321ffd8a9 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Objects; +import java.util.function.Function; /** * Load {@code _source} fields from {@link SortedSetDocValues} and associated {@link BinaryDocValues}. The former contains the unique values @@ -30,11 +31,17 @@ final class SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer implements Co private final String name; private final String offsetsFieldName; + private final Function converter; private DocValuesWithOffsetsLoader docValues; SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(String name, String offsetsFieldName) { + this(name, offsetsFieldName, Function.identity()); + } + + SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(String name, String offsetsFieldName, Function converter) { this.name = Objects.requireNonNull(name); this.offsetsFieldName = Objects.requireNonNull(offsetsFieldName); + this.converter = Objects.requireNonNull(converter); } @Override @@ -47,7 +54,7 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf SortedSetDocValues valueDocValues = DocValues.getSortedSet(leafReader, name); SortedDocValues offsetDocValues = DocValues.getSorted(leafReader, offsetsFieldName); - return docValues = new DocValuesWithOffsetsLoader(valueDocValues, offsetDocValues); + return docValues = new DocValuesWithOffsetsLoader(valueDocValues, offsetDocValues, converter); } @Override @@ -78,15 +85,21 @@ public void write(XContentBuilder b) throws IOException { static final class DocValuesWithOffsetsLoader implements DocValuesLoader { private final SortedDocValues offsetDocValues; private final SortedSetDocValues valueDocValues; + private final Function converter; private final ByteArrayStreamInput scratch = new ByteArrayStreamInput(); private boolean hasValue; private boolean hasOffset; private int[] offsetToOrd; - DocValuesWithOffsetsLoader(SortedSetDocValues valueDocValues, SortedDocValues offsetDocValues) { + DocValuesWithOffsetsLoader( + SortedSetDocValues valueDocValues, + SortedDocValues offsetDocValues, + Function converter + ) { this.valueDocValues = valueDocValues; this.offsetDocValues = offsetDocValues; + this.converter = converter; } @Override @@ -146,7 +159,7 @@ public void write(XContentBuilder b) throws IOException { long ord = ords[offset]; BytesRef c = valueDocValues.lookupOrd(ord); - // This is keyword specific and needs to be updated once support is added for other field types: + c = converter.apply(c); b.utf8Value(c.bytes, c.offset, c.length); } } else if (offsetToOrd != null) { @@ -158,6 +171,7 @@ public void write(XContentBuilder b) throws IOException { } else { for (int i = 0; i < valueDocValues.docValueCount(); i++) { BytesRef c = valueDocValues.lookupOrd(valueDocValues.nextOrd()); + c = converter.apply(c); b.utf8Value(c.bytes, c.offset, c.length); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IPSyntheticSourceNativeArrayIntegrationTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IPSyntheticSourceNativeArrayIntegrationTests.java new file mode 100644 index 0000000000000..2ad08ebb10aae --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/IPSyntheticSourceNativeArrayIntegrationTests.java @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.elasticsearch.common.network.NetworkAddress; + +import java.util.ArrayList; +import java.util.List; + +import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; + +public class IPSyntheticSourceNativeArrayIntegrationTests extends NativeArrayIntegrationTestCase { + + @Override + protected String getFieldTypeName() { + return "ip"; + } + + @Override + protected String getRandomValue() { + return NetworkAddress.format(randomIp(true)); + } + + public void testSynthesizeArray() throws Exception { + var arrayValues = new Object[][] { + new Object[] { "192.168.1.4", "192.168.1.3", null, "192.168.1.2", null, "192.168.1.1" }, + new Object[] { null, "192.168.1.2", null, "192.168.1.1" }, + new Object[] { null }, + new Object[] { null, null, null }, + new Object[] { "192.168.1.3", "192.168.1.2", "192.168.1.1" } }; + verifySyntheticArray(arrayValues); + } + + public void testSynthesizeArrayIgnoreMalformed() throws Exception { + var mapping = jsonBuilder().startObject() + .startObject("properties") + .startObject("field") + .field("type", "ip") + .field("ignore_malformed", true) + .endObject() + .endObject() + .endObject(); + // Note values that would be ignored are added at the end of arrays, + // this makes testing easier as ignored values are always synthesized after regular values: + var arrayValues = new Object[][] { + new Object[] { null, "192.168.1.1", "192.168.1.2", "192.168.1.3", "192.168.1.4", null, "malformed" }, + new Object[] { "192.168.1.1", "192.168.1.2", "malformed" }, + new Object[] { "192.168.1.1", "192.168.1.1", "malformed" }, + new Object[] { null, null, null, "malformed" }, + new Object[] { "192.168.1.3", "192.168.1.3", "192.168.1.1", "malformed" } }; + verifySyntheticArray(arrayValues, mapping, "_id", "field._ignore_malformed"); + } + + public void testSynthesizeObjectArray() throws Exception { + List> documents = new ArrayList<>(); + { + List document = new ArrayList<>(); + document.add(new Object[] { "192.168.1.3", "192.168.1.2", "192.168.1.1" }); + document.add(new Object[] { "192.168.1.110", "192.168.1.109", "192.168.1.111" }); + document.add(new Object[] { "192.168.1.2", "192.168.1.2", "192.168.1.1" }); + documents.add(document); + } + { + List document = new ArrayList<>(); + document.add(new Object[] { "192.168.1.9", "192.168.1.7", "192.168.1.5" }); + document.add(new Object[] { "192.168.1.2", "192.168.1.4", "192.168.1.6" }); + document.add(new Object[] { "192.168.1.7", "192.168.1.6", "192.168.1.5" }); + documents.add(document); + } + verifySyntheticObjectArray(documents); + } + + public void testSynthesizeArrayInObjectField() throws Exception { + List documents = new ArrayList<>(); + documents.add(new Object[] { "192.168.1.254", "192.168.1.253", "192.168.1.252" }); + documents.add(new Object[] { "192.168.1.112", "192.168.1.113", "192.168.1.114" }); + documents.add(new Object[] { "192.168.1.3", "192.168.1.2", "192.168.1.1" }); + documents.add(new Object[] { "192.168.1.9", "192.168.1.7", "192.168.1.5" }); + documents.add(new Object[] { "192.168.1.2", "192.168.1.4", "192.168.1.6" }); + documents.add(new Object[] { "192.168.1.7", "192.168.1.6", "192.168.1.5" }); + verifySyntheticArrayInObject(documents); + } + +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java index 1b8a2d68cd930..17ecc2c22db28 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java @@ -439,4 +439,9 @@ public void execute() { protected Function loadBlockExpected() { return v -> InetAddresses.toAddrString(InetAddressPoint.decode(BytesRef.deepCopyOf((BytesRef) v).bytes)); } + + @Override + protected String randomSyntheticSourceKeep() { + return "all"; + } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IpFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IpFieldTypeTests.java index b3064810e5ca3..7dec761691ce6 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IpFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IpFieldTypeTests.java @@ -349,16 +349,24 @@ public void testRangeQuery() { } public void testFetchSourceValue() throws IOException { - MappedFieldType mapper = new IpFieldMapper.Builder("field", ScriptCompiler.NONE, true, IndexVersion.current()).build( - MapperBuilderContext.root(false, false) - ).fieldType(); + MappedFieldType mapper = new IpFieldMapper.Builder( + "field", + ScriptCompiler.NONE, + true, + IndexVersion.current(), + Mapper.SourceKeepMode.NONE + ).build(MapperBuilderContext.root(false, false)).fieldType(); assertEquals(List.of("2001:db8::2:1"), fetchSourceValue(mapper, "2001:db8::2:1")); assertEquals(List.of("2001:db8::2:1"), fetchSourceValue(mapper, "2001:db8:0:0:0:0:2:1")); assertEquals(List.of("::1"), fetchSourceValue(mapper, "0:0:0:0:0:0:0:1")); - MappedFieldType nullValueMapper = new IpFieldMapper.Builder("field", ScriptCompiler.NONE, true, IndexVersion.current()).nullValue( - "2001:db8:0:0:0:0:2:7" - ).build(MapperBuilderContext.root(false, false)).fieldType(); + MappedFieldType nullValueMapper = new IpFieldMapper.Builder( + "field", + ScriptCompiler.NONE, + true, + IndexVersion.current(), + Mapper.SourceKeepMode.NONE + ).nullValue("2001:db8:0:0:0:0:2:7").build(MapperBuilderContext.root(false, false)).fieldType(); assertEquals(List.of("2001:db8::2:7"), fetchSourceValue(nullValueMapper, null)); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IpOffsetDocValuesLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IpOffsetDocValuesLoaderTests.java new file mode 100644 index 0000000000000..dadfd22199aec --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/IpOffsetDocValuesLoaderTests.java @@ -0,0 +1,41 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.elasticsearch.common.network.NetworkAddress; + +public class IpOffsetDocValuesLoaderTests extends OffsetDocValuesLoaderTestCase { + + public void testOffsetArray() throws Exception { + verifyOffsets("{\"field\":[\"192.168.1.1\",\"192.168.1.3\",\"192.168.1.2\",\"192.168.1.1\",\"192.168.1.9\",\"192.168.1.3\"]}"); + verifyOffsets("{\"field\":[\"192.168.1.4\",null,\"192.168.1.3\",\"192.168.1.2\",null,\"192.168.1.1\"]}"); + } + + public void testOffsetNestedArray() throws Exception { + verifyOffsets( + "{\"field\":[\"192.168.1.2\",[\"192.168.1.1\"],[\"192.168.1.0\"],null,\"192.168.1.0\"]}", + "{\"field\":[\"192.168.1.2\",\"192.168.1.1\",\"192.168.1.0\",null,\"192.168.1.0\"]}" + ); + verifyOffsets( + "{\"field\":[\"192.168.1.6\",[\"192.168.1.5\", [\"192.168.1.4\"]],[\"192.168.1.3\", [\"192.168.1.2\"]],null,\"192.168.1.1\"]}", + "{\"field\":[\"192.168.1.6\",\"192.168.1.5\",\"192.168.1.4\",\"192.168.1.3\",\"192.168.1.2\",null,\"192.168.1.1\"]}" + ); + } + + @Override + protected String getFieldTypeName() { + return "ip"; + } + + @Override + protected String randomValue() { + return NetworkAddress.format(randomIp(true)); + } +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordOffsetDocValuesLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordOffsetDocValuesLoaderTests.java index 8300e8e8e4614..55e935e11996c 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordOffsetDocValuesLoaderTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordOffsetDocValuesLoaderTests.java @@ -9,145 +9,7 @@ package org.elasticsearch.index.mapper; -import org.apache.lucene.index.DirectoryReader; -import org.elasticsearch.common.Strings; -import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.mapper.SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer.DocValuesWithOffsetsLoader; -import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentType; - -import java.io.IOException; - -import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; -import static org.hamcrest.Matchers.nullValue; - -public class KeywordOffsetDocValuesLoaderTests extends MapperServiceTestCase { - - @Override - protected Settings getIndexSettings() { - return Settings.builder() - .put("index.mapping.source.mode", "synthetic") - .put("index.mapping.synthetic_source_keep", "arrays") - .build(); - } - - public void testOffsetArrayNoDocValues() throws Exception { - String mapping = """ - { - "_doc": { - "properties": { - "field": { - "type": "keyword", - "doc_values": false - } - } - } - } - """; - try (var mapperService = createMapperService(mapping)) { - var fieldMapper = mapperService.mappingLookup().getMapper("field"); - assertThat(fieldMapper.getOffsetFieldName(), nullValue()); - } - } - - public void testOffsetArrayStored() throws Exception { - String mapping = """ - { - "_doc": { - "properties": { - "field": { - "type": "keyword", - "store": true - } - } - } - } - """; - try (var mapperService = createMapperService(mapping)) { - var fieldMapper = mapperService.mappingLookup().getMapper("field"); - assertThat(fieldMapper.getOffsetFieldName(), nullValue()); - } - } - - public void testOffsetMultiFields() throws Exception { - String mapping = """ - { - "_doc": { - "properties": { - "field": { - "type": "keyword", - "fields": { - "sub": { - "type": "text" - } - } - } - } - } - } - """; - try (var mapperService = createMapperService(mapping)) { - var fieldMapper = mapperService.mappingLookup().getMapper("field"); - assertThat(fieldMapper.getOffsetFieldName(), nullValue()); - } - } - - public void testOffsetArrayNoSyntheticSource() throws Exception { - String mapping = """ - { - "_doc": { - "properties": { - "field": { - "type": "keyword" - } - } - } - } - """; - try (var mapperService = createMapperService(Settings.EMPTY, mapping)) { - var fieldMapper = mapperService.mappingLookup().getMapper("field"); - assertThat(fieldMapper.getOffsetFieldName(), nullValue()); - } - } - - public void testOffsetArrayNoSourceArrayKeep() throws Exception { - var settingsBuilder = Settings.builder().put("index.mapping.source.mode", "synthetic"); - String mapping; - if (randomBoolean()) { - mapping = """ - { - "_doc": { - "properties": { - "field": { - "type": "keyword", - "synthetic_source_keep": "{{synthetic_source_keep}}" - } - } - } - } - """.replace("{{synthetic_source_keep}}", randomBoolean() ? "none" : "all"); - } else { - mapping = """ - { - "_doc": { - "properties": { - "field": { - "type": "keyword" - } - } - } - } - """; - if (randomBoolean()) { - settingsBuilder.put("index.mapping.synthetic_source_keep", "none"); - } - } - try (var mapperService = createMapperService(settingsBuilder.build(), mapping)) { - var fieldMapper = mapperService.mappingLookup().getMapper("field"); - assertThat(fieldMapper.getOffsetFieldName(), nullValue()); - } - } +public class KeywordOffsetDocValuesLoaderTests extends OffsetDocValuesLoaderTestCase { public void testOffsetArray() throws Exception { verifyOffsets("{\"field\":[\"z\",\"x\",\"y\",\"c\",\"b\",\"a\"]}"); @@ -162,76 +24,13 @@ public void testOffsetNestedArray() throws Exception { ); } - public void testOffsetEmptyArray() throws Exception { - verifyOffsets("{\"field\":[]}"); - } - - public void testOffsetArrayWithNulls() throws Exception { - verifyOffsets("{\"field\":[null,null,null]}"); - } - - public void testOffsetArrayRandom() throws Exception { - StringBuilder values = new StringBuilder(); - int numValues = randomIntBetween(0, 256); - for (int i = 0; i < numValues; i++) { - if (randomInt(10) == 1) { - values.append("null"); - } else { - values.append('"').append(randomAlphanumericOfLength(2)).append('"'); - } - if (i != (numValues - 1)) { - values.append(','); - } - } - verifyOffsets("{\"field\":[" + values + "]}"); - } - - private void verifyOffsets(String source) throws IOException { - verifyOffsets(source, source); - } - - private void verifyOffsets(String source, String expectedSource) throws IOException { - String mapping = """ - { - "_doc": { - "properties": { - "field": { - "type": "keyword" - } - } - } - } - """; - verifyOffsets(mapping, source, expectedSource); + @Override + protected String getFieldTypeName() { + return "keyword"; } - private void verifyOffsets(String mapping, String source, String expectedSource) throws IOException { - try (var mapperService = createMapperService(mapping)) { - var mapper = mapperService.documentMapper(); - - try (var directory = newDirectory()) { - var iw = indexWriterForSyntheticSource(directory); - var doc = mapper.parse(new SourceToParse("_id", new BytesArray(source), XContentType.JSON)); - doc.updateSeqID(0, 0); - doc.version().setLongValue(0); - iw.addDocuments(doc.docs()); - iw.close(); - try (var indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) { - var layer = new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer("field", "field.offsets"); - var leafReader = indexReader.leaves().getFirst().reader(); - var loader = (DocValuesWithOffsetsLoader) layer.docValuesLoader(leafReader, new int[] { 0 }); - assertTrue(loader.advanceToDoc(0)); - assertTrue(loader.count() > 0); - XContentBuilder builder = jsonBuilder().startObject(); - builder.startArray("field"); - loader.write(builder); - builder.endArray().endObject(); - - var actual = Strings.toString(builder); - assertEquals(expectedSource, actual); - } - } - } + @Override + protected String randomValue() { + return randomAlphanumericOfLength(2); } - } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java index f0853c34b2097..6f59f617ba259 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java @@ -9,38 +9,24 @@ package org.elasticsearch.index.mapper; -import org.apache.lucene.index.DocValuesType; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.LeafReader; -import org.elasticsearch.action.admin.indices.forcemerge.ForceMergeRequest; -import org.elasticsearch.action.index.IndexRequest; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.support.WriteRequest; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.query.IdsQueryBuilder; -import org.elasticsearch.test.ESSingleNodeTestCase; -import org.elasticsearch.xcontent.XContentBuilder; -import org.hamcrest.Matchers; +import com.carrotsearch.randomizedtesting.generators.RandomStrings; -import java.io.IOException; import java.util.ArrayList; -import java.util.LinkedHashSet; import java.util.List; -import java.util.Map; -import java.util.Set; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; -import static org.hamcrest.Matchers.contains; -import static org.hamcrest.Matchers.containsInAnyOrder; -import static org.hamcrest.Matchers.empty; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.hasKey; -import static org.hamcrest.Matchers.nullValue; -public class KeywordSyntheticSourceNativeArrayIntegrationTests extends ESSingleNodeTestCase { +public class KeywordSyntheticSourceNativeArrayIntegrationTests extends NativeArrayIntegrationTestCase { + + @Override + protected String getFieldTypeName() { + return "keyword"; + } + + @Override + protected String getRandomValue() { + return RandomStrings.randomAsciiOfLength(random(), 8); + } public void testSynthesizeArray() throws Exception { var arrayValues = new Object[][] { @@ -52,16 +38,6 @@ public void testSynthesizeArray() throws Exception { verifySyntheticArray(arrayValues); } - public void testSynthesizeEmptyArray() throws Exception { - var arrayValues = new Object[][] { new Object[] {} }; - verifySyntheticArray(arrayValues); - } - - public void testSynthesizeArrayRandom() throws Exception { - var arrayValues = new Object[][] { generateRandomStringArray(64, 8, false, true) }; - verifySyntheticArray(arrayValues); - } - public void testSynthesizeArrayIgnoreAbove() throws Exception { var mapping = jsonBuilder().startObject() .startObject("properties") @@ -79,7 +55,7 @@ public void testSynthesizeArrayIgnoreAbove() throws Exception { new Object[] { "123", "1234", "12345" }, new Object[] { null, null, null, "blabla" }, new Object[] { "1", "2", "3", "blabla" } }; - verifySyntheticArray(arrayValues, mapping, 4, "_id", "field._original"); + verifySyntheticArray(arrayValues, mapping, "_id", "field._original"); } public void testSynthesizeObjectArray() throws Exception { @@ -112,237 +88,4 @@ public void testSynthesizeArrayInObjectField() throws Exception { verifySyntheticArrayInObject(documents); } - public void testSynthesizeArrayInObjectFieldRandom() throws Exception { - List documents = new ArrayList<>(); - int numDocs = randomIntBetween(8, 256); - for (int i = 0; i < numDocs; i++) { - documents.add(generateRandomStringArray(64, 8, false, true)); - } - verifySyntheticArrayInObject(documents); - } - - private void verifySyntheticArray(Object[][] arrays) throws IOException { - var mapping = jsonBuilder().startObject() - .startObject("properties") - .startObject("field") - .field("type", "keyword") - .endObject() - .endObject() - .endObject(); - verifySyntheticArray(arrays, mapping, null, "_id"); - } - - private void verifySyntheticArray(Object[][] arrays, XContentBuilder mapping, Integer ignoreAbove, String... expectedStoredFields) - throws IOException { - var indexService = createIndex( - "test-index", - Settings.builder().put("index.mapping.source.mode", "synthetic").put("index.mapping.synthetic_source_keep", "arrays").build(), - mapping - ); - for (int i = 0; i < arrays.length; i++) { - var array = arrays[i]; - - var indexRequest = new IndexRequest("test-index"); - indexRequest.id("my-id-" + i); - var source = jsonBuilder().startObject(); - if (array != null) { - source.startArray("field"); - for (Object arrayValue : array) { - source.value(arrayValue); - } - source.endArray(); - } else { - source.field("field").nullValue(); - } - indexRequest.source(source.endObject()); - indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); - client().index(indexRequest).actionGet(); - - var searchRequest = new SearchRequest("test-index"); - searchRequest.source().query(new IdsQueryBuilder().addIds("my-id-" + i)); - var searchResponse = client().search(searchRequest).actionGet(); - try { - var hit = searchResponse.getHits().getHits()[0]; - assertThat(hit.getId(), equalTo("my-id-" + i)); - var sourceAsMap = hit.getSourceAsMap(); - assertThat(sourceAsMap, hasKey("field")); - var actualArray = (List) sourceAsMap.get("field"); - if (array == null) { - assertThat(actualArray, nullValue()); - } else if (array.length == 0) { - assertThat(actualArray, empty()); - } else { - assertThat(actualArray, Matchers.contains(array)); - } - } finally { - searchResponse.decRef(); - } - } - - try (var searcher = indexService.getShard(0).acquireSearcher(getTestName())) { - var reader = searcher.getDirectoryReader(); - for (int i = 0; i < arrays.length; i++) { - var document = reader.storedFields().document(i); - // Verify that there is no ignored source: - Set storedFieldNames = new LinkedHashSet<>(document.getFields().stream().map(IndexableField::name).toList()); - if (IndexSettings.RECOVERY_USE_SYNTHETIC_SOURCE.isEnabled()) { - assertThat(storedFieldNames, contains(expectedStoredFields)); - } else { - String[] copyExpectedStoredFields = new String[expectedStoredFields.length + 1]; - System.arraycopy(expectedStoredFields, 0, copyExpectedStoredFields, 0, expectedStoredFields.length); - copyExpectedStoredFields[copyExpectedStoredFields.length - 1] = "_recovery_source"; - assertThat(storedFieldNames, containsInAnyOrder(copyExpectedStoredFields)); - } - } - var fieldInfo = FieldInfos.getMergedFieldInfos(reader).fieldInfo("field.offsets"); - assertThat(fieldInfo.getDocValuesType(), equalTo(DocValuesType.SORTED)); - } - } - - private void verifySyntheticObjectArray(List> documents) throws IOException { - var indexService = createIndex( - "test-index", - Settings.builder().put("index.mapping.source.mode", "synthetic").put("index.mapping.synthetic_source_keep", "arrays").build(), - jsonBuilder().startObject() - .startObject("properties") - .startObject("object") - .startObject("properties") - .startObject("field") - .field("type", "keyword") - .endObject() - .endObject() - .endObject() - .endObject() - .endObject() - ); - for (int i = 0; i < documents.size(); i++) { - var document = documents.get(i); - - var indexRequest = new IndexRequest("test-index"); - indexRequest.id("my-id-" + i); - var source = jsonBuilder().startObject(); - source.startArray("object"); - for (Object[] arrayValue : document) { - source.startObject(); - source.array("field", arrayValue); - source.endObject(); - } - source.endArray(); - indexRequest.source(source.endObject()); - indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); - client().index(indexRequest).actionGet(); - - var searchRequest = new SearchRequest("test-index"); - searchRequest.source().query(new IdsQueryBuilder().addIds("my-id-" + i)); - var searchResponse = client().search(searchRequest).actionGet(); - try { - var hit = searchResponse.getHits().getHits()[0]; - assertThat(hit.getId(), equalTo("my-id-" + i)); - var sourceAsMap = hit.getSourceAsMap(); - var objectArray = (List) sourceAsMap.get("object"); - for (int j = 0; j < document.size(); j++) { - var expected = document.get(j); - List actual = (List) ((Map) objectArray.get(j)).get("field"); - assertThat(actual, Matchers.contains(expected)); - } - } finally { - searchResponse.decRef(); - } - } - - indexService.getShard(0).forceMerge(new ForceMergeRequest("test-index").maxNumSegments(1)); - try (var searcher = indexService.getShard(0).acquireSearcher(getTestName())) { - var reader = searcher.getDirectoryReader(); - for (int i = 0; i < documents.size(); i++) { - var document = reader.storedFields().document(i); - // Verify that there is ignored source because of leaf array being wrapped by object array: - List storedFieldNames = document.getFields().stream().map(IndexableField::name).toList(); - if (IndexSettings.RECOVERY_USE_SYNTHETIC_SOURCE.isEnabled()) { - assertThat(storedFieldNames, contains("_id", "_ignored_source")); - } else { - assertThat(storedFieldNames, containsInAnyOrder("_id", "_ignored_source", "_recovery_source")); - } - - // Verify that there is no offset field: - LeafReader leafReader = reader.leaves().get(0).reader(); - for (FieldInfo fieldInfo : leafReader.getFieldInfos()) { - String name = fieldInfo.getName(); - assertFalse("expected no field that contains [offsets] in name, but found [" + name + "]", name.contains("offsets")); - } - - var binaryDocValues = leafReader.getBinaryDocValues("object.field.offsets"); - assertThat(binaryDocValues, nullValue()); - } - } - } - - private void verifySyntheticArrayInObject(List documents) throws IOException { - var indexService = createIndex( - "test-index", - Settings.builder().put("index.mapping.source.mode", "synthetic").put("index.mapping.synthetic_source_keep", "arrays").build(), - jsonBuilder().startObject() - .startObject("properties") - .startObject("object") - .startObject("properties") - .startObject("field") - .field("type", "keyword") - .endObject() - .endObject() - .endObject() - .endObject() - .endObject() - ); - for (int i = 0; i < documents.size(); i++) { - var arrayValue = documents.get(i); - - var indexRequest = new IndexRequest("test-index"); - indexRequest.id("my-id-" + i); - var source = jsonBuilder().startObject(); - source.startObject("object"); - source.array("field", arrayValue); - source.endObject(); - indexRequest.source(source.endObject()); - indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); - client().index(indexRequest).actionGet(); - - var searchRequest = new SearchRequest("test-index"); - searchRequest.source().query(new IdsQueryBuilder().addIds("my-id-" + i)); - var searchResponse = client().search(searchRequest).actionGet(); - try { - var hit = searchResponse.getHits().getHits()[0]; - assertThat(hit.getId(), equalTo("my-id-" + i)); - var sourceAsMap = hit.getSourceAsMap(); - var objectArray = (Map) sourceAsMap.get("object"); - - List actual = (List) objectArray.get("field"); - if (arrayValue == null) { - assertThat(actual, nullValue()); - } else if (arrayValue.length == 0) { - assertThat(actual, empty()); - } else { - assertThat(actual, Matchers.contains(arrayValue)); - } - } finally { - searchResponse.decRef(); - } - } - - indexService.getShard(0).forceMerge(new ForceMergeRequest("test-index").maxNumSegments(1)); - try (var searcher = indexService.getShard(0).acquireSearcher(getTestName())) { - var reader = searcher.getDirectoryReader(); - for (int i = 0; i < documents.size(); i++) { - var document = reader.storedFields().document(i); - // Verify that there is no ignored source: - Set storedFieldNames = new LinkedHashSet<>(document.getFields().stream().map(IndexableField::name).toList()); - if (IndexSettings.RECOVERY_USE_SYNTHETIC_SOURCE.isEnabled()) { - assertThat(storedFieldNames, contains("_id")); - } else { - assertThat(storedFieldNames, containsInAnyOrder("_id", "_recovery_source")); - } - } - var fieldInfo = FieldInfos.getMergedFieldInfos(reader).fieldInfo("object.field.offsets"); - assertThat(fieldInfo.getDocValuesType(), equalTo(DocValuesType.SORTED)); - } - } - } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java b/server/src/test/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java new file mode 100644 index 0000000000000..e074bf883ae16 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java @@ -0,0 +1,299 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReader; +import org.elasticsearch.action.admin.indices.forcemerge.ForceMergeRequest; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.support.WriteRequest; +import org.elasticsearch.common.network.NetworkAddress; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.query.IdsQueryBuilder; +import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasKey; +import static org.hamcrest.Matchers.nullValue; + +public abstract class NativeArrayIntegrationTestCase extends ESSingleNodeTestCase { + + public void testSynthesizeEmptyArray() throws Exception { + var arrayValues = new Object[][] { new Object[] {} }; + verifySyntheticArray(arrayValues); + } + + public void testSynthesizeArrayRandom() throws Exception { + var arrayValues = new Object[randomInt(64)]; + for (int j = 0; j < arrayValues.length; j++) { + arrayValues[j] = NetworkAddress.format(randomIp(true)); + } + verifySyntheticArray(new Object[][] { arrayValues }); + } + + public void testSynthesizeArrayInObjectFieldRandom() throws Exception { + List documents = new ArrayList<>(); + int numDocs = randomIntBetween(8, 256); + for (int i = 0; i < numDocs; i++) { + Object[] document = new Object[randomInt(64)]; + for (int j = 0; j < document.length; j++) { + document[j] = getRandomValue(); + } + documents.add(document); + } + verifySyntheticArrayInObject(documents); + } + + protected abstract String getFieldTypeName(); + + protected abstract String getRandomValue(); + + protected void verifySyntheticArray(Object[][] arrays) throws IOException { + var mapping = jsonBuilder().startObject() + .startObject("properties") + .startObject("field") + .field("type", getFieldTypeName()) + .endObject() + .endObject() + .endObject(); + verifySyntheticArray(arrays, mapping, "_id"); + } + + protected void verifySyntheticArray(Object[][] arrays, XContentBuilder mapping, String... expectedStoredFields) throws IOException { + var indexService = createIndex( + "test-index", + Settings.builder().put("index.mapping.source.mode", "synthetic").put("index.mapping.synthetic_source_keep", "arrays").build(), + mapping + ); + for (int i = 0; i < arrays.length; i++) { + var array = arrays[i]; + + var indexRequest = new IndexRequest("test-index"); + indexRequest.id("my-id-" + i); + var source = jsonBuilder().startObject(); + if (array != null) { + source.startArray("field"); + for (Object arrayValue : array) { + source.value(arrayValue); + } + source.endArray(); + } else { + source.field("field").nullValue(); + } + indexRequest.source(source.endObject()); + indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); + client().index(indexRequest).actionGet(); + + var searchRequest = new SearchRequest("test-index"); + searchRequest.source().query(new IdsQueryBuilder().addIds("my-id-" + i)); + var searchResponse = client().search(searchRequest).actionGet(); + try { + var hit = searchResponse.getHits().getHits()[0]; + assertThat(hit.getId(), equalTo("my-id-" + i)); + var sourceAsMap = hit.getSourceAsMap(); + assertThat(sourceAsMap, hasKey("field")); + var actualArray = (List) sourceAsMap.get("field"); + if (array == null) { + assertThat(actualArray, nullValue()); + } else if (array.length == 0) { + assertThat(actualArray, empty()); + } else { + assertThat(actualArray, Matchers.contains(array)); + } + } finally { + searchResponse.decRef(); + } + } + + try (var searcher = indexService.getShard(0).acquireSearcher(getTestName())) { + var reader = searcher.getDirectoryReader(); + for (int i = 0; i < arrays.length; i++) { + var document = reader.storedFields().document(i); + // Verify that there is no ignored source: + Set storedFieldNames = new LinkedHashSet<>(document.getFields().stream().map(IndexableField::name).toList()); + if (IndexSettings.RECOVERY_USE_SYNTHETIC_SOURCE.isEnabled()) { + assertThat(storedFieldNames, contains(expectedStoredFields)); + } else { + var copyExpectedStoredFields = new String[expectedStoredFields.length + 1]; + System.arraycopy(expectedStoredFields, 0, copyExpectedStoredFields, 0, expectedStoredFields.length); + copyExpectedStoredFields[copyExpectedStoredFields.length - 1] = "_ignored_source"; + assertThat(storedFieldNames, containsInAnyOrder(copyExpectedStoredFields)); + } + } + var fieldInfo = FieldInfos.getMergedFieldInfos(reader).fieldInfo("field.offsets"); + assertThat(fieldInfo.getDocValuesType(), equalTo(DocValuesType.SORTED)); + } + } + + protected void verifySyntheticObjectArray(List> documents) throws IOException { + var indexService = createIndex( + "test-index", + Settings.builder().put("index.mapping.source.mode", "synthetic").put("index.mapping.synthetic_source_keep", "arrays").build(), + jsonBuilder().startObject() + .startObject("properties") + .startObject("object") + .startObject("properties") + .startObject("field") + .field("type", getFieldTypeName()) + .endObject() + .endObject() + .endObject() + .endObject() + .endObject() + ); + for (int i = 0; i < documents.size(); i++) { + var document = documents.get(i); + + var indexRequest = new IndexRequest("test-index"); + indexRequest.id("my-id-" + i); + var source = jsonBuilder().startObject(); + source.startArray("object"); + for (Object[] arrayValue : document) { + source.startObject(); + source.array("field", arrayValue); + source.endObject(); + } + source.endArray(); + indexRequest.source(source.endObject()); + indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); + client().index(indexRequest).actionGet(); + + var searchRequest = new SearchRequest("test-index"); + searchRequest.source().query(new IdsQueryBuilder().addIds("my-id-" + i)); + var searchResponse = client().search(searchRequest).actionGet(); + try { + var hit = searchResponse.getHits().getHits()[0]; + assertThat(hit.getId(), equalTo("my-id-" + i)); + var sourceAsMap = hit.getSourceAsMap(); + var objectArray = (List) sourceAsMap.get("object"); + for (int j = 0; j < document.size(); j++) { + var expected = document.get(j); + List actual = (List) ((Map) objectArray.get(j)).get("field"); + assertThat(actual, Matchers.contains(expected)); + } + } finally { + searchResponse.decRef(); + } + } + + indexService.getShard(0).forceMerge(new ForceMergeRequest("test-index").maxNumSegments(1)); + try (var searcher = indexService.getShard(0).acquireSearcher(getTestName())) { + var reader = searcher.getDirectoryReader(); + for (int i = 0; i < documents.size(); i++) { + var document = reader.storedFields().document(i); + // Verify that there is ignored source because of leaf array being wrapped by object array: + List storedFieldNames = document.getFields().stream().map(IndexableField::name).toList(); + if (IndexSettings.RECOVERY_USE_SYNTHETIC_SOURCE.isEnabled()) { + assertThat(storedFieldNames, contains("_id", "_ignored_source")); + } else { + assertThat(storedFieldNames, containsInAnyOrder("_id", "_ignored_source", "_recovery_source")); + } + + // Verify that there is no offset field: + LeafReader leafReader = reader.leaves().get(0).reader(); + for (FieldInfo fieldInfo : leafReader.getFieldInfos()) { + String name = fieldInfo.getName(); + assertFalse("expected no field that contains [offsets] in name, but found [" + name + "]", name.contains("offsets")); + } + + var binaryDocValues = leafReader.getBinaryDocValues("object.field.offsets"); + assertThat(binaryDocValues, nullValue()); + } + } + } + + protected void verifySyntheticArrayInObject(List documents) throws IOException { + var indexService = createIndex( + "test-index", + Settings.builder().put("index.mapping.source.mode", "synthetic").put("index.mapping.synthetic_source_keep", "arrays").build(), + jsonBuilder().startObject() + .startObject("properties") + .startObject("object") + .startObject("properties") + .startObject("field") + .field("type", "keyword") + .endObject() + .endObject() + .endObject() + .endObject() + .endObject() + ); + for (int i = 0; i < documents.size(); i++) { + var arrayValue = documents.get(i); + + var indexRequest = new IndexRequest("test-index"); + indexRequest.id("my-id-" + i); + var source = jsonBuilder().startObject(); + source.startObject("object"); + source.array("field", arrayValue); + source.endObject(); + indexRequest.source(source.endObject()); + indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); + client().index(indexRequest).actionGet(); + + var searchRequest = new SearchRequest("test-index"); + searchRequest.source().query(new IdsQueryBuilder().addIds("my-id-" + i)); + var searchResponse = client().search(searchRequest).actionGet(); + try { + var hit = searchResponse.getHits().getHits()[0]; + assertThat(hit.getId(), equalTo("my-id-" + i)); + var sourceAsMap = hit.getSourceAsMap(); + var objectArray = (Map) sourceAsMap.get("object"); + + List actual = (List) objectArray.get("field"); + if (arrayValue == null) { + assertThat(actual, nullValue()); + } else if (arrayValue.length == 0) { + assertThat(actual, empty()); + } else { + assertThat(actual, Matchers.contains(arrayValue)); + } + } finally { + searchResponse.decRef(); + } + } + + indexService.getShard(0).forceMerge(new ForceMergeRequest("test-index").maxNumSegments(1)); + try (var searcher = indexService.getShard(0).acquireSearcher(getTestName())) { + var reader = searcher.getDirectoryReader(); + for (int i = 0; i < documents.size(); i++) { + var document = reader.storedFields().document(i); + // Verify that there is no ignored source: + Set storedFieldNames = new LinkedHashSet<>(document.getFields().stream().map(IndexableField::name).toList()); + if (IndexSettings.RECOVERY_USE_SYNTHETIC_SOURCE.isEnabled()) { + assertThat(storedFieldNames, contains("_id")); + } else { + assertThat(storedFieldNames, containsInAnyOrder("_id", "_recovery_source")); + } + } + var fieldInfo = FieldInfos.getMergedFieldInfos(reader).fieldInfo("object.field.offsets"); + assertThat(fieldInfo.getDocValuesType(), equalTo(DocValuesType.SORTED)); + } + } + +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/OffsetDocValuesLoaderTestCase.java b/server/src/test/java/org/elasticsearch/index/mapper/OffsetDocValuesLoaderTestCase.java new file mode 100644 index 0000000000000..bb12067aefd18 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/OffsetDocValuesLoaderTestCase.java @@ -0,0 +1,230 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.DirectoryReader; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentType; + +import java.io.IOException; + +import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; +import static org.hamcrest.Matchers.nullValue; + +public abstract class OffsetDocValuesLoaderTestCase extends MapperServiceTestCase { + + @Override + protected Settings getIndexSettings() { + return Settings.builder() + .put("index.mapping.source.mode", "synthetic") + .put("index.mapping.synthetic_source_keep", "arrays") + .build(); + } + + public void testOffsetArrayNoDocValues() throws Exception { + String mapping = """ + { + "_doc": { + "properties": { + "field": { + "type": "{{type}}", + "doc_values": false + } + } + } + } + """.replace("{{type}}", getFieldTypeName()); + try (var mapperService = createMapperService(mapping)) { + var fieldMapper = mapperService.mappingLookup().getMapper("field"); + assertThat(fieldMapper.getOffsetFieldName(), nullValue()); + } + } + + public void testOffsetArrayStored() throws Exception { + String mapping = """ + { + "_doc": { + "properties": { + "field": { + "type": "{{type}}", + "store": true + } + } + } + } + """.replace("{{type}}", getFieldTypeName()); + ; + try (var mapperService = createMapperService(mapping)) { + var fieldMapper = mapperService.mappingLookup().getMapper("field"); + assertThat(fieldMapper.getOffsetFieldName(), nullValue()); + } + } + + public void testOffsetMultiFields() throws Exception { + String mapping = """ + { + "_doc": { + "properties": { + "field": { + "type": "{{type}}", + "fields": { + "sub": { + "type": "text" + } + } + } + } + } + } + """.replace("{{type}}", getFieldTypeName()); + try (var mapperService = createMapperService(mapping)) { + var fieldMapper = mapperService.mappingLookup().getMapper("field"); + assertThat(fieldMapper.getOffsetFieldName(), nullValue()); + } + } + + public void testOffsetArrayNoSyntheticSource() throws Exception { + String mapping = """ + { + "_doc": { + "properties": { + "field": { + "type": "{{type}}" + } + } + } + } + """.replace("{{type}}", getFieldTypeName()); + try (var mapperService = createMapperService(Settings.EMPTY, mapping)) { + var fieldMapper = mapperService.mappingLookup().getMapper("field"); + assertThat(fieldMapper.getOffsetFieldName(), nullValue()); + } + } + + public void testOffsetArrayNoSourceArrayKeep() throws Exception { + var settingsBuilder = Settings.builder().put("index.mapping.source.mode", "synthetic"); + String mapping; + if (randomBoolean()) { + mapping = """ + { + "_doc": { + "properties": { + "field": { + "type": "{{type}}", + "synthetic_source_keep": "{{synthetic_source_keep}}" + } + } + } + } + """.replace("{{synthetic_source_keep}}", randomBoolean() ? "none" : "all").replace("{{type}}", getFieldTypeName()); + } else { + mapping = """ + { + "_doc": { + "properties": { + "field": { + "type": "{{type}}" + } + } + } + } + """.replace("{{type}}", getFieldTypeName()); + if (randomBoolean()) { + settingsBuilder.put("index.mapping.synthetic_source_keep", "none"); + } + } + try (var mapperService = createMapperService(settingsBuilder.build(), mapping)) { + var fieldMapper = mapperService.mappingLookup().getMapper("field"); + assertThat(fieldMapper.getOffsetFieldName(), nullValue()); + } + } + + public void testOffsetEmptyArray() throws Exception { + verifyOffsets("{\"field\":[]}"); + } + + public void testOffsetArrayWithNulls() throws Exception { + verifyOffsets("{\"field\":[null,null,null]}"); + verifyOffsets("{\"field\":[null,[null],null]}", "{\"field\":[null,null,null]}"); + } + + public void testOffsetArrayRandom() throws Exception { + StringBuilder values = new StringBuilder(); + int numValues = randomIntBetween(0, 256); + for (int i = 0; i < numValues; i++) { + if (randomInt(10) == 1) { + values.append("null"); + } else { + String randomValue = randomValue(); + values.append('"').append(randomValue).append('"'); + } + if (i != (numValues - 1)) { + values.append(','); + } + } + verifyOffsets("{\"field\":[" + values + "]}"); + } + + protected abstract String getFieldTypeName(); + + protected abstract String randomValue(); + + protected void verifyOffsets(String source) throws IOException { + verifyOffsets(source, source); + } + + protected void verifyOffsets(String source, String expectedSource) throws IOException { + String mapping = """ + { + "_doc": { + "properties": { + "field": { + "type": "{{type}}" + } + } + } + } + """.replace("{{type}}", getFieldTypeName()); + verifyOffsets(mapping, source, expectedSource); + } + + private void verifyOffsets(String mapping, String source, String expectedSource) throws IOException { + try (var mapperService = createMapperService(mapping)) { + var mapper = mapperService.documentMapper(); + + try (var directory = newDirectory()) { + var iw = indexWriterForSyntheticSource(directory); + var doc = mapper.parse(new SourceToParse("_id", new BytesArray(source), XContentType.JSON)); + doc.updateSeqID(0, 0); + doc.version().setLongValue(0); + iw.addDocuments(doc.docs()); + iw.close(); + try (var indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) { + FieldMapper fieldMapper = (FieldMapper) mapper.mappers().getMapper("field"); + var syntheticSourceLoader = fieldMapper.syntheticFieldLoader(); + var leafReader = indexReader.leaves().getFirst().reader(); + var docValueLoader = syntheticSourceLoader.docValuesLoader(leafReader, new int[] { 0 }); + assertTrue(docValueLoader.advanceToDoc(0)); + assertTrue(syntheticSourceLoader.hasValue()); + XContentBuilder builder = jsonBuilder().startObject(); + syntheticSourceLoader.write(builder); + builder.endObject(); + + var actual = Strings.toString(builder); + assertEquals(expectedSource, actual); + } + } + } + } + +}