Skip to content

Commit

Permalink
Allow efficient can_match phases on frozen indices (#35431)
Browse files Browse the repository at this point in the history
This change adds a special caching reader that caches all relevant
values for a range query to rewrite correctly in a can_match phase
without actually opening the underlying directory reader. This
allows frozen indices to be filtered with can_match and in-turn
searched with wildcards in a efficient way since it allows us to
exclude shards that won't match based on their date-ranges without
opening their directory readers.

Relates to #34352
Depends on #34357
  • Loading branch information
s1monw committed Nov 13, 2018
1 parent eb0616b commit e09c28e
Show file tree
Hide file tree
Showing 6 changed files with 505 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -647,16 +647,16 @@ final SearchContext createContext(ShardSearchRequest request) throws IOException

public DefaultSearchContext createSearchContext(ShardSearchRequest request, TimeValue timeout)
throws IOException {
return createSearchContext(request, timeout, true);
return createSearchContext(request, timeout, true, "search");
}
private DefaultSearchContext createSearchContext(ShardSearchRequest request, TimeValue timeout,
boolean assertAsyncActions)
boolean assertAsyncActions, String source)
throws IOException {
IndexService indexService = indicesService.indexServiceSafe(request.shardId().getIndex());
IndexShard indexShard = indexService.getShard(request.shardId().getId());
SearchShardTarget shardTarget = new SearchShardTarget(clusterService.localNode().getId(),
indexShard.shardId(), request.getClusterAlias(), OriginalIndices.NONE);
Engine.Searcher engineSearcher = indexShard.acquireSearcher("search");
Engine.Searcher engineSearcher = indexShard.acquireSearcher(source);

final DefaultSearchContext searchContext = new DefaultSearchContext(idGenerator.incrementAndGet(), request, shardTarget,
engineSearcher, clusterService, indexService, indexShard, bigArrays, threadPool.estimatedTimeInMillisCounter(), timeout,
Expand Down Expand Up @@ -1023,7 +1023,7 @@ public AliasFilter buildAliasFilter(ClusterState state, String index, String...
*/
public boolean canMatch(ShardSearchRequest request) throws IOException {
assert request.searchType() == SearchType.QUERY_THEN_FETCH : "unexpected search type: " + request.searchType();
try (DefaultSearchContext context = createSearchContext(request, defaultSearchTimeout, false)) {
try (DefaultSearchContext context = createSearchContext(request, defaultSearchTimeout, false, "can_match")) {
SearchSourceBuilder source = context.request().source();
if (canRewriteToMatchNone(source)) {
QueryBuilder queryBuilder = source.query();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ReferenceManager;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.common.lucene.Lucene;
Expand All @@ -40,6 +41,7 @@
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.function.Function;

/**
Expand All @@ -66,9 +68,23 @@ public final class FrozenEngine extends ReadOnlyEngine {
public static final Setting<Boolean> INDEX_FROZEN = Setting.boolSetting("index.frozen", false, Setting.Property.IndexScope,
Setting.Property.PrivateIndex);
private volatile DirectoryReader lastOpenedReader;
private final DirectoryReader canMatchReader;

public FrozenEngine(EngineConfig config) {
super(config, null, null, true, Function.identity());

boolean success = false;
Directory directory = store.directory();
try (DirectoryReader reader = DirectoryReader.open(directory)) {
canMatchReader = new RewriteCachingDirectoryReader(directory, reader.leaves());
success = true;
} catch (IOException e) {
throw new UncheckedIOException(e);
} finally {
if (success == false) {
closeNoLock("failed on construction", new CountDownLatch(1));
}
}
}

@Override
Expand Down Expand Up @@ -193,6 +209,7 @@ public Searcher acquireSearcher(String source, SearcherScope scope) throws Engin
case "segments_stats":
case "completion_stats":
case "refresh_needed":
case "can_match": // special case for can_match phase - we use the cached point values reader
maybeOpenReader = false;
break;
default:
Expand All @@ -205,6 +222,10 @@ public Searcher acquireSearcher(String source, SearcherScope scope) throws Engin
// we just hand out a searcher on top of an empty reader that we opened for the ReadOnlyEngine in the #open(IndexCommit)
// method. this is the case when we don't have a reader open right now and we get a stats call any other that falls in
// the category that doesn't trigger a reopen
if ("can_match".equals(source)) {
canMatchReader.incRef();
return new Searcher(source, new IndexSearcher(canMatchReader), canMatchReader::decRef);
}
return super.acquireSearcher(source, scope);
} else {
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.index.engine;

import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafMetaData;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Terms;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;

import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* This special DirectoryReader is used to handle can_match requests against frozen indices.
* It' caches all relevant point value data for every point value field ie. min/max packed values etc.
* to hold enough information to rewrite a date range query and make a decisions if an index can match or not.
* This allows frozen indices to be searched with wildcards in a very efficient way without opening a reader on them.
*/
final class RewriteCachingDirectoryReader extends DirectoryReader {

RewriteCachingDirectoryReader(Directory directory, List<LeafReaderContext> segmentReaders) throws
IOException {
super(directory, wrap(segmentReaders));
}

private static LeafReader[] wrap(List<LeafReaderContext> readers) throws IOException {
LeafReader[] wrapped = new LeafReader[readers.size()];
int i = 0;
for (LeafReaderContext ctx : readers) {
LeafReader wrap = new RewriteCachingLeafReader(ctx.reader());
wrapped[i++] = wrap;
}
return wrapped;
}

@Override
protected DirectoryReader doOpenIfChanged() {
throw new UnsupportedOperationException();
}

@Override
protected DirectoryReader doOpenIfChanged(IndexCommit commit) {
throw new UnsupportedOperationException();
}

@Override
protected DirectoryReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) {
throw new UnsupportedOperationException();
}

@Override
public long getVersion() {
throw new UnsupportedOperationException();
}

@Override
public boolean isCurrent() {
throw new UnsupportedOperationException();
}

@Override
public IndexCommit getIndexCommit() {
throw new UnsupportedOperationException();
}

@Override
protected void doClose() {
throw new UnsupportedOperationException();
}

@Override
public CacheHelper getReaderCacheHelper() {
throw new UnsupportedOperationException();
}

// except of a couple of selected methods everything else will
// throw a UOE which causes a can_match phase to just move to the actual phase
// later such that we never false exclude a shard if something else is used to rewrite.
private static final class RewriteCachingLeafReader extends LeafReader {

private final int maxDoc;
private final int numDocs;
private final Map<String, PointValues> pointValuesMap;
private final FieldInfos fieldInfos;

private RewriteCachingLeafReader(LeafReader original) throws IOException {
this.maxDoc = original.maxDoc();
this.numDocs = original.numDocs();
fieldInfos = original.getFieldInfos();
Map<String, PointValues> valuesMap = new HashMap<>();
for (FieldInfo info : fieldInfos) {
if (info.getPointIndexDimensionCount() != 0) {
PointValues pointValues = original.getPointValues(info.name);
if (pointValues != null) { // might not be in this reader
byte[] minPackedValue = pointValues.getMinPackedValue();
byte[] maxPackedValue = pointValues.getMaxPackedValue();
int numDimensions = pointValues.getNumIndexDimensions();
int bytesPerDimension = pointValues.getBytesPerDimension();
int numDataDimensions = pointValues.getNumDataDimensions();
long size = pointValues.size();
int docCount = pointValues.getDocCount();
valuesMap.put(info.name, new PointValues() {
@Override
public void intersect(IntersectVisitor visitor) {
throw new UnsupportedOperationException();
}

@Override
public long estimatePointCount(IntersectVisitor visitor) {
throw new UnsupportedOperationException();
}

@Override
public byte[] getMinPackedValue() {
return minPackedValue;
}

@Override
public byte[] getMaxPackedValue() {
return maxPackedValue;
}

@Override
public int getNumDataDimensions() {
return numDataDimensions;
}

@Override
public int getNumIndexDimensions() {
return numDimensions;
}

@Override
public int getBytesPerDimension() {
return bytesPerDimension;
}

@Override
public long size() {
return size;
}

@Override
public int getDocCount() {
return docCount;
}
});
}
}
}
pointValuesMap = valuesMap;
}

@Override
public CacheHelper getCoreCacheHelper() {
throw new UnsupportedOperationException();
}

@Override
public Terms terms(String field) {
throw new UnsupportedOperationException();
}

@Override
public NumericDocValues getNumericDocValues(String field) {
throw new UnsupportedOperationException();
}

@Override
public BinaryDocValues getBinaryDocValues(String field) {
throw new UnsupportedOperationException();
}

@Override
public SortedDocValues getSortedDocValues(String field) {
throw new UnsupportedOperationException();
}

@Override
public SortedNumericDocValues getSortedNumericDocValues(String field) {
throw new UnsupportedOperationException();
}

@Override
public SortedSetDocValues getSortedSetDocValues(String field) {
throw new UnsupportedOperationException();
}

@Override
public NumericDocValues getNormValues(String field) {
throw new UnsupportedOperationException();
}

@Override
public FieldInfos getFieldInfos() {
return fieldInfos;
}

@Override
public Bits getLiveDocs() {
throw new UnsupportedOperationException();
}

@Override
public PointValues getPointValues(String field) {
return pointValuesMap.get(field);
}

@Override
public void checkIntegrity() {
}

@Override
public LeafMetaData getMetaData() {
throw new UnsupportedOperationException();
}

@Override
public Fields getTermVectors(int docID) {
throw new UnsupportedOperationException();
}

@Override
public int numDocs() {
return numDocs;
}

@Override
public int maxDoc() {
return maxDoc;
}

@Override
public void document(int docID, StoredFieldVisitor visitor) {
throw new UnsupportedOperationException();
}

@Override
protected void doClose() {
}

@Override
public CacheHelper getReaderCacheHelper() {
return null;
}
}
}
Loading

0 comments on commit e09c28e

Please sign in to comment.