Skip to content

Commit

Permalink
Fix counted_keyword support in arrays of objects (elastic#121558)
Browse files Browse the repository at this point in the history
As a result of the randomized testing enabled in elastic#121462, we found that
we currently fail to parse documents with arrays of objects containing
counted_keyword fields. This PR fixes this issue by using a custom
docvalues field to store the count instead of the built-in lucene
BinaryDocValues. This custom CountsBinaryDocValuesField has logic to
handle multiple values for the same field.
  • Loading branch information
jordan-powers authored Feb 4, 2025
1 parent 9b6af6a commit 13b743c
Show file tree
Hide file tree
Showing 3 changed files with 257 additions and 166 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

package org.elasticsearch.xpack.countedkeyword;

import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
Expand All @@ -19,6 +18,7 @@
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
Expand All @@ -31,6 +31,7 @@
import org.elasticsearch.index.fielddata.plain.AbstractIndexOrdinalsFieldData;
import org.elasticsearch.index.fielddata.plain.AbstractLeafOrdinalsFieldData;
import org.elasticsearch.index.mapper.BinaryFieldMapper;
import org.elasticsearch.index.mapper.CustomDocValuesField;
import org.elasticsearch.index.mapper.DocumentParserContext;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
Expand Down Expand Up @@ -434,15 +435,17 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
return;
}

int i = 0;
int[] counts = new int[values.size()];
for (Map.Entry<String, Integer> value : values.entrySet()) {
context.doc().add(new KeywordFieldMapper.KeywordField(fullPath(), new BytesRef(value.getKey()), fieldType));
counts[i++] = value.getValue();
for (String value : values.keySet()) {
context.doc().add(new KeywordFieldMapper.KeywordField(fullPath(), new BytesRef(value), fieldType));
}
CountsBinaryDocValuesField field = (CountsBinaryDocValuesField) context.doc().getByKey(countFieldMapper.fieldType().name());
if (field == null) {
field = new CountsBinaryDocValuesField(countFieldMapper.fieldType().name());
field.add(values);
context.doc().addWithKey(countFieldMapper.fieldType().name(), field);
} else {
field.add(values);
}
BytesStreamOutput streamOutput = new BytesStreamOutput();
streamOutput.writeVIntArray(counts);
context.doc().add(new BinaryDocValuesField(countFieldMapper.fullPath(), streamOutput.bytes().toBytesRef()));
}

private void parseArray(DocumentParserContext context, SortedMap<String, Integer> values) throws IOException {
Expand Down Expand Up @@ -509,4 +512,37 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
);
}

private class CountsBinaryDocValuesField extends CustomDocValuesField {
private final SortedMap<String, Integer> counts;

CountsBinaryDocValuesField(String name) {
super(name);
counts = new TreeMap<>();
}

public void add(SortedMap<String, Integer> newCounts) {
for (Map.Entry<String, Integer> currCount : newCounts.entrySet()) {
this.counts.put(currCount.getKey(), this.counts.getOrDefault(currCount.getKey(), 0) + currCount.getValue());
}
}

@Override
public BytesRef binaryValue() {
try {
int maxBytesPerVInt = 5;
int bytesSize = (counts.size() + 1) * maxBytesPerVInt;
BytesStreamOutput out = new BytesStreamOutput(bytesSize);
int countsArr[] = new int[counts.size()];
int i = 0;
for (Integer currCount : counts.values()) {
countsArr[i++] = currCount;
}
out.writeVIntArray(countsArr);
return out.bytes().toBytesRef();
} catch (IOException e) {
throw new ElasticsearchException("Failed to get binary value", e);
}
}
}

}
Loading

0 comments on commit 13b743c

Please sign in to comment.