Skip to content

Commit

Permalink
tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
martijnvg committed Feb 27, 2025
1 parent b1e6908 commit fd3e74f
Show file tree
Hide file tree
Showing 15 changed files with 424 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,7 @@ public Builder(String name, boolean ignoreMalformedByDefault, boolean coerceByDe
this.indexMode = indexMode;
this.indexed = Parameter.indexParam(m -> toType(m).indexed, () -> {
if (indexMode == IndexMode.TIME_SERIES) {
var metricType = getMetric().getValue();
return metricType != TimeSeriesParams.MetricType.COUNTER && metricType != TimeSeriesParams.MetricType.GAUGE;
return false;
} else {
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,19 @@ private <DV extends DocIdSetIterator> DV iterateDocValues(
return dv;
}

private <DV extends DocIdSetIterator> DV iterateDocValues2(
int maxDocs,
CheckedSupplier<DV, IOException> dvReader,
CheckedConsumer<DV, IOException> valueAccessor
) throws IOException {
DV dv = dvReader.get();
for (int docId = dv.nextDoc(); docId < maxDocs; docId = dv.nextDoc()) {
cancellationChecker.logEvent();
valueAccessor.accept(dv);
}
return dv;
}

void analyzeDocValues(SegmentReader reader, IndexDiskUsageStats stats) throws IOException {
if (reader.getDocValuesReader() == null) {
return;
Expand All @@ -258,37 +271,51 @@ void analyzeDocValues(SegmentReader reader, IndexDiskUsageStats stats) throws IO
cancellationChecker.checkForCancellation();
directory.resetBytesRead();
switch (dvType) {
case NUMERIC -> iterateDocValues(maxDocs, () -> docValuesReader.getNumeric(field), NumericDocValues::longValue);
case SORTED_NUMERIC -> iterateDocValues(maxDocs, () -> docValuesReader.getSortedNumeric(field), dv -> {
case NUMERIC -> iterateDocValues2(maxDocs, () -> docValuesReader.getNumeric(field), NumericDocValues::longValue);
case SORTED_NUMERIC -> iterateDocValues2(maxDocs, () -> docValuesReader.getSortedNumeric(field), dv -> {
for (int i = 0; i < dv.docValueCount(); i++) {
cancellationChecker.logEvent();
dv.nextValue();
}
});
case BINARY -> iterateDocValues(maxDocs, () -> docValuesReader.getBinary(field), BinaryDocValues::binaryValue);
case BINARY -> iterateDocValues2(maxDocs, () -> docValuesReader.getBinary(field), BinaryDocValues::binaryValue);
case SORTED -> {
SortedDocValues sorted = iterateDocValues(maxDocs, () -> docValuesReader.getSorted(field), SortedDocValues::ordValue);
SortedDocValues sorted = iterateDocValues2(maxDocs, () -> docValuesReader.getSorted(field), SortedDocValues::ordValue);
if (sorted.getValueCount() > 0) {
sorted.lookupOrd(0);
sorted.lookupOrd(sorted.getValueCount() - 1);
for (int ord = 0; ord < sorted.getValueCount(); ord++) {
cancellationChecker.logEvent();
sorted.lookupOrd(ord);
}
}
}
case SORTED_SET -> {
SortedSetDocValues sortedSet = iterateDocValues(maxDocs, () -> docValuesReader.getSortedSet(field), dv -> {
SortedSetDocValues sortedSet = iterateDocValues2(maxDocs, () -> docValuesReader.getSortedSet(field), dv -> {
for (int i = 0; i < dv.docValueCount(); i++) {
cancellationChecker.logEvent();
}
});
if (sortedSet.getValueCount() > 0) {
sortedSet.lookupOrd(0);
sortedSet.lookupOrd(sortedSet.getValueCount() - 1);
for (long ord = 0; ord < sortedSet.getValueCount(); ord++) {
cancellationChecker.logEvent();
sortedSet.lookupOrd(ord);
}
}
}
default -> {
assert false : "Unknown docValues type [" + dvType + "]";
throw new IllegalStateException("Unknown docValues type [" + dvType + "]");
}
}
switch (field.docValuesSkipIndexType()) {
case NONE -> {}
case RANGE -> {
var skipper = docValuesReader.getSkipper(field);
while(skipper.maxDocID(0) != DocIdSetIterator.NO_MORE_DOCS) {
skipper.advance(skipper.maxDocID(0) + 1);
}
}
default -> throw new IllegalStateException("Unknown skipper [" + field.docValuesSkipIndexType() + "]");
}
stats.addDocValues(field.name, directory.getBytesRead());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
package org.elasticsearch.common.lucene.uid;

import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
Expand All @@ -24,6 +26,7 @@
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndSeqNo;
import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
import org.elasticsearch.index.mapper.VersionFieldMapper;
Expand Down Expand Up @@ -95,10 +98,19 @@ final class PerThreadIDVersionAndSeqNoLookup {
// Also check for the existence of the timestamp field, because sometimes a segment can only contain tombstone documents,
// which don't have any mapped fields (also not the timestamp field) and just some meta fields like _id, _seq_no etc.
if (loadTimestampRange && reader.getFieldInfos().fieldInfo(DataStream.TIMESTAMP_FIELD_NAME) != null) {
PointValues tsPointValues = reader.getPointValues(DataStream.TIMESTAMP_FIELD_NAME);
assert tsPointValues != null : "no timestamp field for reader:" + reader + " and parent:" + reader.getContext().parent.reader();
minTimestamp = LongPoint.decodeDimension(tsPointValues.getMinPackedValue(), 0);
maxTimestamp = LongPoint.decodeDimension(tsPointValues.getMaxPackedValue(), 0);
var fieldInfo = reader.getFieldInfos().fieldInfo(DataStream.TIMESTAMP_FIELD_NAME);
if (fieldInfo.docValuesSkipIndexType() == DocValuesSkipIndexType.RANGE) {
DocValuesSkipper skipper = reader.getDocValuesSkipper(DataStream.TIMESTAMP_FIELD_NAME);
assert skipper != null : "no skipper for reader:" + reader + " and parent:" + reader.getContext().parent.reader();
minTimestamp = skipper.minValue();
maxTimestamp = skipper.maxValue();
} else {
PointValues tsPointValues = reader.getPointValues(DataStream.TIMESTAMP_FIELD_NAME);
assert tsPointValues != null
: "no timestamp field for reader:" + reader + " and parent:" + reader.getContext().parent.reader();
minTimestamp = LongPoint.decodeDimension(tsPointValues.getMinPackedValue(), 0);
maxTimestamp = LongPoint.decodeDimension(tsPointValues.getMaxPackedValue(), 0);
}
} else {
minTimestamp = 0;
maxTimestamp = Long.MAX_VALUE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ public boolean isES87TSDBCodecEnabled() {
public static final FeatureFlag DOC_VALUES_SKIPPER = new FeatureFlag("doc_values_skipper");
public static final Setting<Boolean> USE_DOC_VALUES_SKIPPER = Setting.boolSetting(
"index.mapping.use_doc_values_skipper",
IndexSettings.DOC_VALUES_SKIPPER.isEnabled(),
false,
Property.IndexScope,
Property.Final
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.lucene.util.compress.LZ4;
import org.apache.lucene.util.packed.DirectMonotonicReader;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.core.IOUtils;

import java.io.IOException;
Expand Down Expand Up @@ -826,6 +827,7 @@ private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
if (info == null) {
throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
}
// System.out.println("name: " + info.name);
byte type = meta.readByte();
if (info.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
skippers.put(info.name, readDocValueSkipperMeta(meta));
Expand Down Expand Up @@ -885,6 +887,14 @@ private static void readNumeric(IndexInput meta, NumericEntry entry) throws IOEx
entry.valuesOffset = meta.readLong();
entry.valuesLength = meta.readLong();
}

// System.out.printf(
// "docsWithFieldLength=%s, numValues=%d, indexLength=%s, valuesLength=%s\n",
// ByteSizeValue.ofBytes(entry.docsWithFieldLength),
// entry.numValues,
// ByteSizeValue.ofBytes(entry.indexLength),
// ByteSizeValue.ofBytes(entry.valuesLength)
// );
}

private BinaryEntry readBinary(IndexInput meta) throws IOException {
Expand Down Expand Up @@ -926,6 +936,14 @@ private static SortedNumericEntry readSortedNumeric(IndexInput meta, SortedNumer
entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, entry.numDocsWithField + 1, blockShift);
entry.addressesLength = meta.readLong();
}
// System.out.printf(
// "docsWithFieldLength=%s, numValues=%d, indexLength=%s, valuesLength=%s, addressesLength=%s\n",
// ByteSizeValue.ofBytes(entry.docsWithFieldLength),
// entry.numValues,
// ByteSizeValue.ofBytes(entry.indexLength),
// ByteSizeValue.ofBytes(entry.valuesLength),
// ByteSizeValue.ofBytes(entry.addressesLength)
// );
return entry;
}

Expand All @@ -935,6 +953,17 @@ private SortedEntry readSorted(IndexInput meta) throws IOException {
readNumeric(meta, entry.ordsEntry);
entry.termsDictEntry = new TermsDictEntry();
readTermDict(meta, entry.termsDictEntry);
// System.out.printf(
// "docsWithFieldLength=%s, numValues=%d, indexLength=%s, valuesLength=%s, termsDictSize=%d, termsAddressesLength%s, termsIndexLength=%s, termsDataLength=%s\n",
// ByteSizeValue.ofBytes(entry.ordsEntry.valuesLength),
// entry.ordsEntry.numValues,
// ByteSizeValue.ofBytes(entry.ordsEntry.indexLength),
// ByteSizeValue.ofBytes(entry.ordsEntry.valuesLength),
// entry.termsDictEntry.termsDictSize,
// ByteSizeValue.ofBytes(entry.termsDictEntry.termsAddressesLength),
// ByteSizeValue.ofBytes(entry.termsDictEntry.termsIndexLength),
// ByteSizeValue.ofBytes(entry.termsDictEntry.termsDataLength)
// );
return entry;
}

Expand All @@ -944,6 +973,17 @@ private SortedSetEntry readSortedSet(IndexInput meta) throws IOException {
switch (multiValued) {
case 0: // singlevalued
entry.singleValueEntry = readSorted(meta);
// System.out.printf(
// "docsWithFieldLength=%s, numValues=%d, indexLength=%s, valuesLength=%s, termsDictSize=%d, termsAddressesLength%s, termsIndexLength=%s, termsDataLength=%s\n",
// ByteSizeValue.ofBytes(entry.singleValueEntry.ordsEntry.docsWithFieldLength),
// entry.singleValueEntry.ordsEntry.numValues,
// ByteSizeValue.ofBytes(entry.singleValueEntry.ordsEntry.indexLength),
// ByteSizeValue.ofBytes(entry.singleValueEntry.ordsEntry.valuesLength),
// entry.singleValueEntry.termsDictEntry.termsDictSize,
// ByteSizeValue.ofBytes(entry.singleValueEntry.termsDictEntry.termsAddressesLength),
// ByteSizeValue.ofBytes(entry.singleValueEntry.termsDictEntry.termsIndexLength),
// ByteSizeValue.ofBytes(entry.singleValueEntry.termsDictEntry.termsDataLength)
// );
return entry;
case 1: // multivalued
break;
Expand All @@ -954,6 +994,17 @@ private SortedSetEntry readSortedSet(IndexInput meta) throws IOException {
readSortedNumeric(meta, entry.ordsEntry);
entry.termsDictEntry = new TermsDictEntry();
readTermDict(meta, entry.termsDictEntry);
// System.out.printf(
// "docsWithFieldLength=%s, numValues=%d, indexLength=%s, valuesLength=%s, termsDictSize=%d, termsAddressesLength%s, termsIndexLength=%s, termsDataLength=%s\n",
// ByteSizeValue.ofBytes(entry.ordsEntry.valuesLength),
// entry.ordsEntry.numValues,
// ByteSizeValue.ofBytes(entry.ordsEntry.indexLength),
// ByteSizeValue.ofBytes(entry.ordsEntry.valuesLength),
// entry.termsDictEntry.termsDictSize,
// ByteSizeValue.ofBytes(entry.termsDictEntry.termsAddressesLength),
// ByteSizeValue.ofBytes(entry.termsDictEntry.termsIndexLength),
// ByteSizeValue.ofBytes(entry.termsDictEntry.termsDataLength)
// );
return entry;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ public DateFieldMapper build(MapperBuilderContext context) {
c.getIndexSettings().getMode(),
c.getIndexSettings().getIndexSortConfig(),
c.indexVersionCreated(),
IndexSettings.USE_DOC_VALUES_SKIPPER.get(c.getSettings())
true //IndexSettings.USE_DOC_VALUES_SKIPPER.get(c.getSettings())
);
});

Expand All @@ -471,7 +471,7 @@ public DateFieldMapper build(MapperBuilderContext context) {
c.getIndexSettings().getMode(),
c.getIndexSettings().getIndexSortConfig(),
c.indexVersionCreated(),
IndexSettings.USE_DOC_VALUES_SKIPPER.get(c.getSettings())
true //IndexSettings.USE_DOC_VALUES_SKIPPER.get(c.getSettings())
);
});

Expand Down Expand Up @@ -1055,7 +1055,7 @@ private static boolean shouldUseDocValuesSkipper(
return indexCreatedVersion.onOrAfter(IndexVersions.TIMESTAMP_DOC_VALUES_SPARSE_INDEX)
&& useDocValuesSkipper
&& hasDocValues
&& IndexMode.LOGSDB.equals(indexMode)
&& (indexMode == IndexMode.LOGSDB || indexMode == IndexMode.TIME_SERIES)
&& indexSortConfig != null
&& indexSortConfig.hasSortOnField(fullFieldName)
&& DataStreamTimestampFieldMapper.DEFAULT_PATH.equals(fullFieldName);
Expand Down
Loading

0 comments on commit fd3e74f

Please sign in to comment.