-
Notifications
You must be signed in to change notification settings - Fork 25.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Allow efficient can_match phases on frozen indices (#35431)
This change adds a special caching reader that caches all relevant values for a range query to rewrite correctly in a can_match phase without actually opening the underlying directory reader. This allows frozen indices to be filtered with can_match and in-turn searched with wildcards in a efficient way since it allows us to exclude shards that won't match based on their date-ranges without opening their directory readers. Relates to #34352 Depends on #34357
- Loading branch information
Showing
6 changed files
with
506 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
267 changes: 267 additions & 0 deletions
267
...ugin/core/src/main/java/org/elasticsearch/index/engine/RewriteCachingDirectoryReader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,267 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License; | ||
* you may not use this file except in compliance with the Elastic License. | ||
*/ | ||
package org.elasticsearch.index.engine; | ||
|
||
import org.apache.lucene.index.BinaryDocValues; | ||
import org.apache.lucene.index.DirectoryReader; | ||
import org.apache.lucene.index.FieldInfo; | ||
import org.apache.lucene.index.FieldInfos; | ||
import org.apache.lucene.index.Fields; | ||
import org.apache.lucene.index.IndexCommit; | ||
import org.apache.lucene.index.IndexWriter; | ||
import org.apache.lucene.index.LeafMetaData; | ||
import org.apache.lucene.index.LeafReader; | ||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.index.NumericDocValues; | ||
import org.apache.lucene.index.PointValues; | ||
import org.apache.lucene.index.SortedDocValues; | ||
import org.apache.lucene.index.SortedNumericDocValues; | ||
import org.apache.lucene.index.SortedSetDocValues; | ||
import org.apache.lucene.index.StoredFieldVisitor; | ||
import org.apache.lucene.index.Terms; | ||
import org.apache.lucene.store.Directory; | ||
import org.apache.lucene.util.Bits; | ||
|
||
import java.io.IOException; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
/** | ||
* This special DirectoryReader is used to handle can_match requests against frozen indices. | ||
* It' caches all relevant point value data for every point value field ie. min/max packed values etc. | ||
* to hold enough information to rewrite a date range query and make a decisions if an index can match or not. | ||
* This allows frozen indices to be searched with wildcards in a very efficient way without opening a reader on them. | ||
*/ | ||
final class RewriteCachingDirectoryReader extends DirectoryReader { | ||
|
||
RewriteCachingDirectoryReader(Directory directory, List<LeafReaderContext> segmentReaders) throws | ||
IOException { | ||
super(directory, wrap(segmentReaders)); | ||
} | ||
|
||
private static LeafReader[] wrap(List<LeafReaderContext> readers) throws IOException { | ||
LeafReader[] wrapped = new LeafReader[readers.size()]; | ||
int i = 0; | ||
for (LeafReaderContext ctx : readers) { | ||
LeafReader wrap = new RewriteCachingLeafReader(ctx.reader()); | ||
wrapped[i++] = wrap; | ||
} | ||
return wrapped; | ||
} | ||
|
||
@Override | ||
protected DirectoryReader doOpenIfChanged() { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
protected DirectoryReader doOpenIfChanged(IndexCommit commit) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
protected DirectoryReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public long getVersion() { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public boolean isCurrent() { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public IndexCommit getIndexCommit() { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
protected void doClose() { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public CacheHelper getReaderCacheHelper() { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
// except of a couple of selected methods everything else will | ||
// throw a UOE which causes a can_match phase to just move to the actual phase | ||
// later such that we never false exclude a shard if something else is used to rewrite. | ||
private static final class RewriteCachingLeafReader extends LeafReader { | ||
|
||
private final int maxDoc; | ||
private final int numDocs; | ||
private final Map<String, PointValues> pointValuesMap; | ||
private final FieldInfos fieldInfos; | ||
|
||
private RewriteCachingLeafReader(LeafReader original) throws IOException { | ||
this.maxDoc = original.maxDoc(); | ||
this.numDocs = original.numDocs(); | ||
fieldInfos = original.getFieldInfos(); | ||
Map<String, PointValues> valuesMap = new HashMap<>(); | ||
for (FieldInfo info : fieldInfos) { | ||
if (info.getPointIndexDimensionCount() != 0) { | ||
PointValues pointValues = original.getPointValues(info.name); | ||
if (pointValues != null) { // might not be in this reader | ||
byte[] minPackedValue = pointValues.getMinPackedValue(); | ||
byte[] maxPackedValue = pointValues.getMaxPackedValue(); | ||
int numDimensions = pointValues.getNumIndexDimensions(); | ||
int bytesPerDimension = pointValues.getBytesPerDimension(); | ||
int numDataDimensions = pointValues.getNumDataDimensions(); | ||
long size = pointValues.size(); | ||
int docCount = pointValues.getDocCount(); | ||
valuesMap.put(info.name, new PointValues() { | ||
@Override | ||
public void intersect(IntersectVisitor visitor) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public long estimatePointCount(IntersectVisitor visitor) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public byte[] getMinPackedValue() { | ||
return minPackedValue; | ||
} | ||
|
||
@Override | ||
public byte[] getMaxPackedValue() { | ||
return maxPackedValue; | ||
} | ||
|
||
@Override | ||
public int getNumDataDimensions() { | ||
return numDataDimensions; | ||
} | ||
|
||
@Override | ||
public int getNumIndexDimensions() { | ||
return numDimensions; | ||
} | ||
|
||
@Override | ||
public int getBytesPerDimension() { | ||
return bytesPerDimension; | ||
} | ||
|
||
@Override | ||
public long size() { | ||
return size; | ||
} | ||
|
||
@Override | ||
public int getDocCount() { | ||
return docCount; | ||
} | ||
}); | ||
} | ||
} | ||
} | ||
pointValuesMap = valuesMap; | ||
} | ||
|
||
@Override | ||
public CacheHelper getCoreCacheHelper() { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public Terms terms(String field) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public NumericDocValues getNumericDocValues(String field) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public BinaryDocValues getBinaryDocValues(String field) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public SortedDocValues getSortedDocValues(String field) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public SortedNumericDocValues getSortedNumericDocValues(String field) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public SortedSetDocValues getSortedSetDocValues(String field) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public NumericDocValues getNormValues(String field) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public FieldInfos getFieldInfos() { | ||
return fieldInfos; | ||
} | ||
|
||
@Override | ||
public Bits getLiveDocs() { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public PointValues getPointValues(String field) { | ||
return pointValuesMap.get(field); | ||
} | ||
|
||
@Override | ||
public void checkIntegrity() { | ||
} | ||
|
||
@Override | ||
public LeafMetaData getMetaData() { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public Fields getTermVectors(int docID) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public int numDocs() { | ||
return numDocs; | ||
} | ||
|
||
@Override | ||
public int maxDoc() { | ||
return maxDoc; | ||
} | ||
|
||
@Override | ||
public void document(int docID, StoredFieldVisitor visitor) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
protected void doClose() { | ||
} | ||
|
||
@Override | ||
public CacheHelper getReaderCacheHelper() { | ||
return null; | ||
} | ||
} | ||
} |
Oops, something went wrong.