Skip to content

Commit

Permalink
Store arrays offsets for ip fields natively with synthetic source
Browse files Browse the repository at this point in the history
Follow up of elastic#113757 and adds support to natively store array offsets for ip fields instead of falling back to ignored source.
  • Loading branch information
martijnvg committed Feb 21, 2025
1 parent 45836c8 commit 62a3b50
Show file tree
Hide file tree
Showing 12 changed files with 799 additions and 509 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@

public class FieldArrayContext {

public static final String OFFSETS_FIELD_NAME_SUFFIX = ".offsets";
private final Map<String, Offsets> offsetsPerField = new HashMap<>();

void recordOffset(String field, String value) {
void recordOffset(String field, Comparable<?> value) {
Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets());
int nextOffset = arrayOffsets.currentOffset++;
var offsets = arrayOffsets.valueToOffsets.computeIfAbsent(value, s -> new ArrayList<>(2));
Expand Down Expand Up @@ -85,7 +86,7 @@ private static class Offsets {
// Need to use TreeMap here, so that we maintain the order in which each value (with offset) stored inserted,
// (which is in the same order the document gets parsed) so we store offsets in right order. This is the same
// order in what the values get stored in SortedSetDocValues.
final Map<String, List<Integer>> valueToOffsets = new TreeMap<>();
final Map<Comparable<?>, List<Integer>> valueToOffsets = new TreeMap<>();
final List<Integer> nullValueOffsets = new ArrayList<>(2);

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,15 @@ public static final class Builder extends FieldMapper.DimensionBuilder {
private final boolean ignoreMalformedByDefault;
private final IndexVersion indexCreatedVersion;
private final ScriptCompiler scriptCompiler;
private final SourceKeepMode indexSourceKeepMode;

public Builder(String name, ScriptCompiler scriptCompiler, boolean ignoreMalformedByDefault, IndexVersion indexCreatedVersion) {
public Builder(
String name,
ScriptCompiler scriptCompiler,
boolean ignoreMalformedByDefault,
IndexVersion indexCreatedVersion,
SourceKeepMode indexSourceKeepMode
) {
super(name);
this.scriptCompiler = Objects.requireNonNull(scriptCompiler);
this.ignoreMalformedByDefault = ignoreMalformedByDefault;
Expand All @@ -114,6 +121,7 @@ public Builder(String name, ScriptCompiler scriptCompiler, boolean ignoreMalform
);
}
});
this.indexSourceKeepMode = indexSourceKeepMode;
}

Builder nullValue(String nullValue) {
Expand Down Expand Up @@ -184,6 +192,27 @@ public IpFieldMapper build(MapperBuilderContext context) {
}
hasScript = script.get() != null;
onScriptError = onScriptErrorParam.getValue();

var sourceKeepMode = this.sourceKeepMode.orElse(indexSourceKeepMode);
String offsetsFieldName;
if (context.isSourceSynthetic()
&& sourceKeepMode == SourceKeepMode.ARRAYS
&& hasDocValues.get()
&& stored.get() == false
&& copyTo.copyToFields().isEmpty()
&& multiFieldsBuilder.hasMultiFields() == false
&& indexCreatedVersion.onOrAfter(IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD)) {
// Skip stored, we will be synthesizing from stored fields, no point to keep track of the offsets
// Skip copy_to and multi fields, supporting that requires more work. However, copy_to usage is rare in metrics and
// logging use cases

// keep track of value offsets so that we can reconstruct arrays from doc values in order as was specified during indexing
// (if field is stored then there is no point of doing this)
offsetsFieldName = context.buildFullName(leafName() + FieldArrayContext.OFFSETS_FIELD_NAME_SUFFIX);
} else {
offsetsFieldName = null;
}

return new IpFieldMapper(
leafName(),
new IpFieldType(
Expand All @@ -198,15 +227,16 @@ public IpFieldMapper build(MapperBuilderContext context) {
),
builderParams(this, context),
context.isSourceSynthetic(),
this
this,
offsetsFieldName
);
}

}

public static final TypeParser PARSER = createTypeParserWithLegacySupport((n, c) -> {
boolean ignoreMalformedByDefault = IGNORE_MALFORMED_SETTING.get(c.getSettings());
return new Builder(n, c.scriptCompiler(), ignoreMalformedByDefault, c.indexVersionCreated());
return new Builder(n, c.scriptCompiler(), ignoreMalformedByDefault, c.indexVersionCreated(), c.getIndexSettings().sourceKeepMode());
});

public static final class IpFieldType extends SimpleMappedFieldType {
Expand Down Expand Up @@ -501,13 +531,16 @@ public TermsEnum getTerms(IndexReader reader, String prefix, boolean caseInsensi
private final Script script;
private final FieldValues<InetAddress> scriptValues;
private final ScriptCompiler scriptCompiler;
private final SourceKeepMode indexSourceKeepMode;
private final String offsetsFieldName;

private IpFieldMapper(
String simpleName,
MappedFieldType mappedFieldType,
BuilderParams builderParams,
boolean storeIgnored,
Builder builder
Builder builder,
String offsetsFieldName
) {
super(simpleName, mappedFieldType, builderParams);
this.ignoreMalformedByDefault = builder.ignoreMalformedByDefault;
Expand All @@ -523,6 +556,8 @@ private IpFieldMapper(
this.scriptCompiler = builder.scriptCompiler;
this.dimension = builder.dimension.getValue();
this.storeIgnored = storeIgnored;
this.indexSourceKeepMode = builder.indexSourceKeepMode;
this.offsetsFieldName = offsetsFieldName;
}

@Override
Expand Down Expand Up @@ -561,6 +596,14 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
if (address != null) {
indexValue(context, address);
}
if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.getRecordedSource() == false) {
if (address != null) {
BytesRef sortableValue = new BytesRef(InetAddressPoint.encode(address));
context.getOffSetContext().recordOffset(offsetsFieldName, sortableValue);
} else {
context.getOffSetContext().recordNull(offsetsFieldName);
}
}
}

private void indexValue(DocumentParserContext context, InetAddress address) {
Expand Down Expand Up @@ -593,7 +636,9 @@ protected void indexScriptValues(

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), scriptCompiler, ignoreMalformedByDefault, indexCreatedVersion).dimension(dimension).init(this);
return new Builder(leafName(), scriptCompiler, ignoreMalformedByDefault, indexCreatedVersion, indexSourceKeepMode).dimension(
dimension
).init(this);
}

@Override
Expand All @@ -610,19 +655,24 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
if (hasDocValues) {
return new SyntheticSourceSupport.Native(() -> {
var layers = new ArrayList<CompositeSyntheticFieldLoader.Layer>();
layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) {
@Override
protected BytesRef convert(BytesRef value) {
byte[] bytes = Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length);
return new BytesRef(NetworkAddress.format(InetAddressPoint.decode(bytes)));
}

@Override
protected BytesRef preserve(BytesRef value) {
// No need to copy because convert has made a deep copy
return value;
}
});
if (offsetsFieldName != null) {
layers.add(
new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName, IpFieldMapper::convert)
);
} else {
layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) {
@Override
protected BytesRef convert(BytesRef value) {
return IpFieldMapper.convert(value);
}

@Override
protected BytesRef preserve(BytesRef value) {
// No need to copy because convert has made a deep copy
return value;
}
});
}

if (ignoreMalformed) {
layers.add(new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath()));
Expand All @@ -633,4 +683,14 @@ protected BytesRef preserve(BytesRef value) {

return super.syntheticSourceSupport();
}

static BytesRef convert(BytesRef value) {
byte[] bytes = Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length);
return new BytesRef(NetworkAddress.format(InetAddressPoint.decode(bytes)));
}

@Override
public String getOffsetFieldName() {
return offsetsFieldName;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ public final class KeywordFieldMapper extends FieldMapper {

public static final String CONTENT_TYPE = "keyword";
private static final String HOST_NAME = "host.name";
public static final String OFFSETS_FIELD_NAME_SUFFIX = ".offsets";

public static class Defaults {
public static final FieldType FIELD_TYPE;
Expand Down Expand Up @@ -454,7 +453,7 @@ && indexVersionSupportStoringArraysNatively()) {

// keep track of value offsets so that we can reconstruct arrays from doc values in order as was specified during indexing
// (if field is stored then there is no point of doing this)
offsetsFieldName = context.buildFullName(leafName() + OFFSETS_FIELD_NAME_SUFFIX);
offsetsFieldName = context.buildFullName(leafName() + FieldArrayContext.OFFSETS_FIELD_NAME_SUFFIX);
} else {
offsetsFieldName = null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.io.IOException;
import java.util.Objects;
import java.util.function.Function;

/**
* Load {@code _source} fields from {@link SortedSetDocValues} and associated {@link BinaryDocValues}. The former contains the unique values
Expand All @@ -30,11 +31,17 @@ final class SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer implements Co

private final String name;
private final String offsetsFieldName;
private final Function<BytesRef, BytesRef> converter;
private DocValuesWithOffsetsLoader docValues;

SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(String name, String offsetsFieldName) {
this(name, offsetsFieldName, Function.identity());
}

SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(String name, String offsetsFieldName, Function<BytesRef, BytesRef> converter) {
this.name = Objects.requireNonNull(name);
this.offsetsFieldName = Objects.requireNonNull(offsetsFieldName);
this.converter = Objects.requireNonNull(converter);
}

@Override
Expand All @@ -47,7 +54,7 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
SortedSetDocValues valueDocValues = DocValues.getSortedSet(leafReader, name);
SortedDocValues offsetDocValues = DocValues.getSorted(leafReader, offsetsFieldName);

return docValues = new DocValuesWithOffsetsLoader(valueDocValues, offsetDocValues);
return docValues = new DocValuesWithOffsetsLoader(valueDocValues, offsetDocValues, converter);
}

@Override
Expand Down Expand Up @@ -78,15 +85,21 @@ public void write(XContentBuilder b) throws IOException {
static final class DocValuesWithOffsetsLoader implements DocValuesLoader {
private final SortedDocValues offsetDocValues;
private final SortedSetDocValues valueDocValues;
private final Function<BytesRef, BytesRef> converter;
private final ByteArrayStreamInput scratch = new ByteArrayStreamInput();

private boolean hasValue;
private boolean hasOffset;
private int[] offsetToOrd;

DocValuesWithOffsetsLoader(SortedSetDocValues valueDocValues, SortedDocValues offsetDocValues) {
DocValuesWithOffsetsLoader(
SortedSetDocValues valueDocValues,
SortedDocValues offsetDocValues,
Function<BytesRef, BytesRef> converter
) {
this.valueDocValues = valueDocValues;
this.offsetDocValues = offsetDocValues;
this.converter = converter;
}

@Override
Expand Down Expand Up @@ -146,7 +159,7 @@ public void write(XContentBuilder b) throws IOException {

long ord = ords[offset];
BytesRef c = valueDocValues.lookupOrd(ord);
// This is keyword specific and needs to be updated once support is added for other field types:
c = converter.apply(c);
b.utf8Value(c.bytes, c.offset, c.length);
}
} else if (offsetToOrd != null) {
Expand All @@ -158,6 +171,7 @@ public void write(XContentBuilder b) throws IOException {
} else {
for (int i = 0; i < valueDocValues.docValueCount(); i++) {
BytesRef c = valueDocValues.lookupOrd(valueDocValues.nextOrd());
c = converter.apply(c);
b.utf8Value(c.bytes, c.offset, c.length);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.mapper;

import org.elasticsearch.common.network.NetworkAddress;

import java.util.ArrayList;
import java.util.List;

import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;

public class IPSyntheticSourceNativeArrayIntegrationTests extends NativeArrayIntegrationTestCase {

@Override
protected String getFieldTypeName() {
return "ip";
}

@Override
protected String getRandomValue() {
return NetworkAddress.format(randomIp(true));
}

public void testSynthesizeArray() throws Exception {
var arrayValues = new Object[][] {
new Object[] { "192.168.1.4", "192.168.1.3", null, "192.168.1.2", null, "192.168.1.1" },
new Object[] { null, "192.168.1.2", null, "192.168.1.1" },
new Object[] { null },
new Object[] { null, null, null },
new Object[] { "192.168.1.3", "192.168.1.2", "192.168.1.1" } };
verifySyntheticArray(arrayValues);
}

public void testSynthesizeArrayIgnoreMalformed() throws Exception {
var mapping = jsonBuilder().startObject()
.startObject("properties")
.startObject("field")
.field("type", "ip")
.field("ignore_malformed", true)
.endObject()
.endObject()
.endObject();
// Note values that would be ignored are added at the end of arrays,
// this makes testing easier as ignored values are always synthesized after regular values:
var arrayValues = new Object[][] {
new Object[] { null, "192.168.1.1", "192.168.1.2", "192.168.1.3", "192.168.1.4", null, "malformed" },
new Object[] { "192.168.1.1", "192.168.1.2", "malformed" },
new Object[] { "192.168.1.1", "192.168.1.1", "malformed" },
new Object[] { null, null, null, "malformed" },
new Object[] { "192.168.1.3", "192.168.1.3", "192.168.1.1", "malformed" } };
verifySyntheticArray(arrayValues, mapping, "_id", "field._ignore_malformed");
}

public void testSynthesizeObjectArray() throws Exception {
List<List<Object[]>> documents = new ArrayList<>();
{
List<Object[]> document = new ArrayList<>();
document.add(new Object[] { "192.168.1.3", "192.168.1.2", "192.168.1.1" });
document.add(new Object[] { "192.168.1.110", "192.168.1.109", "192.168.1.111" });
document.add(new Object[] { "192.168.1.2", "192.168.1.2", "192.168.1.1" });
documents.add(document);
}
{
List<Object[]> document = new ArrayList<>();
document.add(new Object[] { "192.168.1.9", "192.168.1.7", "192.168.1.5" });
document.add(new Object[] { "192.168.1.2", "192.168.1.4", "192.168.1.6" });
document.add(new Object[] { "192.168.1.7", "192.168.1.6", "192.168.1.5" });
documents.add(document);
}
verifySyntheticObjectArray(documents);
}

public void testSynthesizeArrayInObjectField() throws Exception {
List<Object[]> documents = new ArrayList<>();
documents.add(new Object[] { "192.168.1.254", "192.168.1.253", "192.168.1.252" });
documents.add(new Object[] { "192.168.1.112", "192.168.1.113", "192.168.1.114" });
documents.add(new Object[] { "192.168.1.3", "192.168.1.2", "192.168.1.1" });
documents.add(new Object[] { "192.168.1.9", "192.168.1.7", "192.168.1.5" });
documents.add(new Object[] { "192.168.1.2", "192.168.1.4", "192.168.1.6" });
documents.add(new Object[] { "192.168.1.7", "192.168.1.6", "192.168.1.5" });
verifySyntheticArrayInObject(documents);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -439,4 +439,9 @@ public void execute() {
protected Function<Object, Object> loadBlockExpected() {
return v -> InetAddresses.toAddrString(InetAddressPoint.decode(BytesRef.deepCopyOf((BytesRef) v).bytes));
}

@Override
protected String randomSyntheticSourceKeep() {
return "all";
}
}
Loading

0 comments on commit 62a3b50

Please sign in to comment.