Allow efficient can_match phases on frozen indices (#35431)

This change adds a special caching reader that caches all relevant values for a range query to rewrite correctly in a can_match phase without actually opening the underlying directory reader. This allows frozen indices to be filtered with can_match and in-turn searched with wildcards in a efficient way since it allows us to exclude shards that won't match based on their date-ranges without opening their directory readers. Relates to #34352 Depends on #34357
elastic · Nov 13, 2018 · e09c28e · e09c28e
1 parent eb0616b
commit e09c28e
Show file tree

Hide file tree

Showing 6 changed files with 505 additions and 5 deletions.
diff --git a/server/src/main/java/org/elasticsearch/search/SearchService.java b/server/src/main/java/org/elasticsearch/search/SearchService.java
@@ -647,16 +647,16 @@ final SearchContext createContext(ShardSearchRequest request) throws IOException
 
     public DefaultSearchContext createSearchContext(ShardSearchRequest request, TimeValue timeout)
         throws IOException {
-        return createSearchContext(request, timeout, true);
+        return createSearchContext(request, timeout, true, "search");
     }
     private DefaultSearchContext createSearchContext(ShardSearchRequest request, TimeValue timeout,
-                                                     boolean assertAsyncActions)
+                                                     boolean assertAsyncActions, String source)
             throws IOException {
         IndexService indexService = indicesService.indexServiceSafe(request.shardId().getIndex());
         IndexShard indexShard = indexService.getShard(request.shardId().getId());
         SearchShardTarget shardTarget = new SearchShardTarget(clusterService.localNode().getId(),
                 indexShard.shardId(), request.getClusterAlias(), OriginalIndices.NONE);
-        Engine.Searcher engineSearcher = indexShard.acquireSearcher("search");
+        Engine.Searcher engineSearcher = indexShard.acquireSearcher(source);
 
         final DefaultSearchContext searchContext = new DefaultSearchContext(idGenerator.incrementAndGet(), request, shardTarget,
             engineSearcher, clusterService, indexService, indexShard, bigArrays, threadPool.estimatedTimeInMillisCounter(), timeout,
@@ -1023,7 +1023,7 @@ public AliasFilter buildAliasFilter(ClusterState state, String index, String...
      */
     public boolean canMatch(ShardSearchRequest request) throws IOException {
         assert request.searchType() == SearchType.QUERY_THEN_FETCH : "unexpected search type: " + request.searchType();
-        try (DefaultSearchContext context = createSearchContext(request, defaultSearchTimeout, false)) {
+        try (DefaultSearchContext context = createSearchContext(request, defaultSearchTimeout, false, "can_match")) {
             SearchSourceBuilder source = context.request().source();
             if (canRewriteToMatchNone(source)) {
                 QueryBuilder queryBuilder = source.query();

diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/index/engine/FrozenEngine.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/index/engine/FrozenEngine.java
@@ -28,6 +28,7 @@
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ReferenceManager;
 import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
 import org.elasticsearch.common.SuppressForbidden;
 import org.elasticsearch.common.lucene.Lucene;
@@ -40,6 +41,7 @@
 import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.util.List;
+import java.util.concurrent.CountDownLatch;
 import java.util.function.Function;
 
 /**
@@ -66,9 +68,23 @@ public final class FrozenEngine extends ReadOnlyEngine {
     public static final Setting<Boolean> INDEX_FROZEN = Setting.boolSetting("index.frozen", false, Setting.Property.IndexScope,
         Setting.Property.PrivateIndex);
     private volatile DirectoryReader lastOpenedReader;
+    private final DirectoryReader canMatchReader;
 
     public FrozenEngine(EngineConfig config) {
         super(config, null, null, true, Function.identity());
+
+        boolean success = false;
+        Directory directory = store.directory();
+        try (DirectoryReader reader = DirectoryReader.open(directory)) {
+            canMatchReader = new RewriteCachingDirectoryReader(directory, reader.leaves());
+            success = true;
+        } catch (IOException e) {
+            throw new UncheckedIOException(e);
+        } finally {
+            if (success == false) {
+                closeNoLock("failed on construction", new CountDownLatch(1));
+            }
+        }
     }
 
     @Override
@@ -193,6 +209,7 @@ public Searcher acquireSearcher(String source, SearcherScope scope) throws Engin
                 case "segments_stats":
                 case "completion_stats":
                 case "refresh_needed":
+                case "can_match": // special case for can_match phase - we use the cached point values reader
                     maybeOpenReader = false;
                     break;
                 default:
@@ -205,6 +222,10 @@ public Searcher acquireSearcher(String source, SearcherScope scope) throws Engin
                 // we just hand out a searcher on top of an empty reader that we opened for the ReadOnlyEngine in the #open(IndexCommit)
                 // method. this is the case when we don't have a reader open right now and we get a stats call any other that falls in
                 // the category that doesn't trigger a reopen
+                if ("can_match".equals(source)) {
+                    canMatchReader.incRef();
+                    return new Searcher(source, new IndexSearcher(canMatchReader), canMatchReader::decRef);
+                }
                 return super.acquireSearcher(source, scope);
             } else {
                 try {

diff --git a/...ugin/core/src/main/java/org/elasticsearch/index/engine/RewriteCachingDirectoryReader.java b/...ugin/core/src/main/java/org/elasticsearch/index/engine/RewriteCachingDirectoryReader.java
@@ -0,0 +1,267 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.index.engine;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexCommit;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.LeafMetaData;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.PointValues;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.StoredFieldVisitor;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This special DirectoryReader is used to handle can_match requests against frozen indices.
+ * It' caches all relevant point value data for every point value field ie. min/max packed values etc.
+ * to hold enough information to rewrite a date range query and make a decisions if an index can match or not.
+ * This allows frozen indices to be searched with wildcards in a very efficient way without opening a reader on them.
+ */
+final class RewriteCachingDirectoryReader extends DirectoryReader {
+
+    RewriteCachingDirectoryReader(Directory directory, List<LeafReaderContext> segmentReaders) throws
+        IOException {
+        super(directory, wrap(segmentReaders));
+    }
+
+    private static LeafReader[] wrap(List<LeafReaderContext> readers) throws IOException {
+        LeafReader[] wrapped = new LeafReader[readers.size()];
+        int i = 0;
+        for (LeafReaderContext ctx : readers) {
+            LeafReader wrap = new RewriteCachingLeafReader(ctx.reader());
+            wrapped[i++] = wrap;
+        }
+        return wrapped;
+    }
+
+    @Override
+    protected DirectoryReader doOpenIfChanged() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    protected DirectoryReader doOpenIfChanged(IndexCommit commit) {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    protected DirectoryReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long getVersion() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean isCurrent() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public IndexCommit getIndexCommit() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    protected void doClose() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public CacheHelper getReaderCacheHelper() {
+        throw new UnsupportedOperationException();
+    }
+
+    // except of a couple of selected methods everything else will
+    // throw a UOE which causes a can_match phase to just move to the actual phase
+    // later such that we never false exclude a shard if something else is used to rewrite.
+    private static final class RewriteCachingLeafReader extends LeafReader {
+
+        private final int maxDoc;
+        private final int numDocs;
+        private final Map<String, PointValues> pointValuesMap;
+        private final FieldInfos fieldInfos;
+
+        private RewriteCachingLeafReader(LeafReader original) throws IOException {
+            this.maxDoc = original.maxDoc();
+            this.numDocs = original.numDocs();
+            fieldInfos = original.getFieldInfos();
+            Map<String, PointValues> valuesMap = new HashMap<>();
+            for (FieldInfo info : fieldInfos) {
+                if (info.getPointIndexDimensionCount() != 0) {
+                    PointValues pointValues = original.getPointValues(info.name);
+                    if (pointValues != null) { // might not be in this reader
+                        byte[] minPackedValue = pointValues.getMinPackedValue();
+                        byte[] maxPackedValue = pointValues.getMaxPackedValue();
+                        int numDimensions = pointValues.getNumIndexDimensions();
+                        int bytesPerDimension = pointValues.getBytesPerDimension();
+                        int numDataDimensions = pointValues.getNumDataDimensions();
+                        long size = pointValues.size();
+                        int docCount = pointValues.getDocCount();
+                        valuesMap.put(info.name, new PointValues() {
+                            @Override
+                            public void intersect(IntersectVisitor visitor) {
+                                throw new UnsupportedOperationException();
+                            }
+
+                            @Override
+                            public long estimatePointCount(IntersectVisitor visitor) {
+                                throw new UnsupportedOperationException();
+                            }
+
+                            @Override
+                            public byte[] getMinPackedValue() {
+                                return minPackedValue;
+                            }
+
+                            @Override
+                            public byte[] getMaxPackedValue() {
+                                return maxPackedValue;
+                            }
+
+                            @Override
+                            public int getNumDataDimensions() {
+                                return numDataDimensions;
+                            }
+
+                            @Override
+                            public int getNumIndexDimensions() {
+                                return numDimensions;
+                            }
+
+                            @Override
+                            public int getBytesPerDimension() {
+                                return bytesPerDimension;
+                            }
+
+                            @Override
+                            public long size() {
+                                return size;
+                            }
+
+                            @Override
+                            public int getDocCount() {
+                                return docCount;
+                            }
+                        });
+                    }
+                }
+            }
+            pointValuesMap = valuesMap;
+        }
+
+        @Override
+        public CacheHelper getCoreCacheHelper() {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public Terms terms(String field) {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public NumericDocValues getNumericDocValues(String field) {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public BinaryDocValues getBinaryDocValues(String field) {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public SortedDocValues getSortedDocValues(String field) {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public SortedNumericDocValues getSortedNumericDocValues(String field) {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public SortedSetDocValues getSortedSetDocValues(String field) {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public NumericDocValues getNormValues(String field) {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public FieldInfos getFieldInfos() {
+            return fieldInfos;
+        }
+
+        @Override
+        public Bits getLiveDocs() {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public PointValues getPointValues(String field) {
+            return pointValuesMap.get(field);
+        }
+
+        @Override
+        public void checkIntegrity() {
+        }
+
+        @Override
+        public LeafMetaData getMetaData() {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public Fields getTermVectors(int docID) {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public int numDocs() {
+            return numDocs;
+        }
+
+        @Override
+        public int maxDoc() {
+            return maxDoc;
+        }
+
+        @Override
+        public void document(int docID, StoredFieldVisitor visitor) {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        protected void doClose() {
+        }
+
+        @Override
+        public CacheHelper getReaderCacheHelper() {
+            return null;
+        }
+    }
+}