Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up sorted scroll when the index sort matches the search sort #25138

Merged
merged 4 commits into from
Jun 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 43 additions & 35 deletions core/src/main/java/org/apache/lucene/queries/MinDocQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,46 +66,54 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
return null;
}
final int segmentMinDoc = Math.max(0, minDoc - context.docBase);
final DocIdSetIterator disi = new DocIdSetIterator() {

int doc = -1;

@Override
public int docID() {
return doc;
}

@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}

@Override
public int advance(int target) throws IOException {
assert target > doc;
if (doc == -1) {
// skip directly to minDoc
doc = Math.max(target, segmentMinDoc);
} else {
doc = target;
}
if (doc >= maxDoc) {
doc = NO_MORE_DOCS;
}
return doc;
}

@Override
public long cost() {
return maxDoc - segmentMinDoc;
}

};
final DocIdSetIterator disi = new MinDocIterator(segmentMinDoc, maxDoc);
return new ConstantScoreScorer(this, score(), disi);
}
};
}

static class MinDocIterator extends DocIdSetIterator {
final int segmentMinDoc;
final int maxDoc;
int doc = -1;

MinDocIterator(int segmentMinDoc, int maxDoc) {
this.segmentMinDoc = segmentMinDoc;
this.maxDoc = maxDoc;
}

@Override
public int docID() {
return doc;
}

@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}

@Override
public int advance(int target) throws IOException {
assert target > doc;
if (doc == -1) {
// skip directly to minDoc
doc = Math.max(target, segmentMinDoc);
} else {
doc = target;
}
if (doc >= maxDoc) {
doc = NO_MORE_DOCS;
}
return doc;
}

@Override
public long cost() {
return maxDoc - segmentMinDoc;
}
}


@Override
public String toString(String field) {
return "MinDocQuery(minDoc=" + minDoc + ")";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.lucene.queries;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.EarlyTerminatingSortingCollector;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.Weight;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;

/**
* A {@link Query} that only matches documents that are greater than the provided {@link FieldDoc}.
* This works only if the index is sorted according to the given search {@link Sort}.
*/
public class SearchAfterSortedDocQuery extends Query {
private final Sort sort;
private final FieldDoc after;
private final FieldComparator<?>[] fieldComparators;
private final int[] reverseMuls;

public SearchAfterSortedDocQuery(Sort sort, FieldDoc after) {
if (sort.getSort().length != after.fields.length) {
throw new IllegalArgumentException("after doc has " + after.fields.length + " value(s) but sort has "
+ sort.getSort().length + ".");
}
this.sort = sort;
this.after = after;
int numFields = sort.getSort().length;
this.fieldComparators = new FieldComparator[numFields];
this.reverseMuls = new int[numFields];
for (int i = 0; i < numFields; i++) {
SortField sortField = sort.getSort()[i];
FieldComparator<?> fieldComparator = sortField.getComparator(1, i);
@SuppressWarnings("unchecked")
FieldComparator<Object> comparator = (FieldComparator<Object>) fieldComparator;
comparator.setTopValue(after.fields[i]);
fieldComparators[i] = fieldComparator;
reverseMuls[i] = sortField.getReverse() ? -1 : 1;
}
}

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, 1.0f) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
Sort segmentSort = context.reader().getMetaData().getSort();
if (EarlyTerminatingSortingCollector.canEarlyTerminate(sort, segmentSort) == false) {
throw new IOException("search sort :[" + sort.getSort() + "] does not match the index sort:[" + segmentSort + "]");
}
final int afterDoc = after.doc - context.docBase;
TopComparator comparator= getTopComparator(fieldComparators, reverseMuls, context, afterDoc);
final int maxDoc = context.reader().maxDoc();
final int firstDoc = searchAfterDoc(comparator, 0, context.reader().maxDoc());
if (firstDoc >= maxDoc) {
return null;
}
final DocIdSetIterator disi = new MinDocQuery.MinDocIterator(firstDoc, maxDoc);
return new ConstantScoreScorer(this, score(), disi);
}
};
}

@Override
public String toString(String field) {
return "SearchAfterSortedDocQuery(sort=" + sort + ", afterDoc=" + after.toString() + ")";
}

@Override
public boolean equals(Object other) {
return sameClassAs(other) &&
equalsTo(getClass().cast(other));
}

private boolean equalsTo(SearchAfterSortedDocQuery other) {
return sort.equals(other.sort) &&
after.doc == other.after.doc &&
Double.compare(after.score, other.after.score) == 0 &&
Arrays.equals(after.fields, other.after.fields);
}

@Override
public int hashCode() {
return Objects.hash(classHash(), sort, after.doc, after.score, Arrays.hashCode(after.fields));
}

interface TopComparator {
boolean lessThanTop(int doc) throws IOException;
}

static TopComparator getTopComparator(FieldComparator<?>[] fieldComparators,
int[] reverseMuls,
LeafReaderContext leafReaderContext,
int topDoc) {
return doc -> {
// DVs use forward iterators so we recreate the iterator for each sort field
// every time we need to compare a document with the <code>after<code> doc.
// We could reuse the iterators when the comparison goes forward but
// this should only be called a few time per segment (binary search).
for (int i = 0; i < fieldComparators.length; i++) {
LeafFieldComparator comparator = fieldComparators[i].getLeafComparator(leafReaderContext);
int value = reverseMuls[i] * comparator.compareTop(doc);
if (value != 0) {
return value < 0;
}
}

if (topDoc <= doc) {
return false;
}
return true;
};
}

/**
* Returns the first doc id greater than the provided <code>after</code> doc.
*/
static int searchAfterDoc(TopComparator comparator, int from, int to) throws IOException {
int low = from;
int high = to - 1;

while (low <= high) {
int mid = (low + high) >>> 1;
if (comparator.lessThanTop(mid)) {
high = mid - 1;
} else {
low = mid + 1;
}
}
return low;
}

}
36 changes: 26 additions & 10 deletions core/src/main/java/org/elasticsearch/search/query/QueryPhase.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queries.MinDocQuery;
import org.apache.lucene.queries.SearchAfterSortedDocQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.EarlyTerminatingSortingCollector;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
Expand All @@ -50,7 +52,6 @@
import org.elasticsearch.search.suggest.SuggestPhase;

import java.util.LinkedList;
import java.util.List;

import static org.elasticsearch.search.query.QueryCollectorContext.createCancellableCollectorContext;
import static org.elasticsearch.search.query.QueryCollectorContext.createEarlySortingTerminationCollectorContext;
Expand Down Expand Up @@ -130,16 +131,17 @@ static boolean execute(SearchContext searchContext, final IndexSearcher searcher

final ScrollContext scrollContext = searchContext.scrollContext();
if (scrollContext != null) {
if (returnsDocsInOrder(query, searchContext.sort())) {
if (scrollContext.totalHits == -1) {
// first round
assert scrollContext.lastEmittedDoc == null;
// there is not much that we can optimize here since we want to collect all
// documents in order to get the total number of hits
} else {
if (scrollContext.totalHits == -1) {
// first round
assert scrollContext.lastEmittedDoc == null;
// there is not much that we can optimize here since we want to collect all
// documents in order to get the total number of hits

} else {
final ScoreDoc after = scrollContext.lastEmittedDoc;
if (returnsDocsInOrder(query, searchContext.sort())) {
// now this gets interesting: since we sort in index-order, we can directly
// skip to the desired doc
final ScoreDoc after = scrollContext.lastEmittedDoc;
if (after != null) {
BooleanQuery bq = new BooleanQuery.Builder()
.add(query, BooleanClause.Occur.MUST)
Expand All @@ -150,6 +152,17 @@ static boolean execute(SearchContext searchContext, final IndexSearcher searcher
// ... and stop collecting after ${size} matches
searchContext.terminateAfter(searchContext.size());
searchContext.trackTotalHits(false);
} else if (canEarlyTerminate(indexSort, searchContext)) {
// now this gets interesting: since the index sort matches the search sort, we can directly
// skip to the desired doc
if (after != null) {
BooleanQuery bq = new BooleanQuery.Builder()
.add(query, BooleanClause.Occur.MUST)
.add(new SearchAfterSortedDocQuery(indexSort, (FieldDoc) after), BooleanClause.Occur.FILTER)
.build();
query = bq;
}
searchContext.trackTotalHits(false);
}
}
}
Expand Down Expand Up @@ -189,7 +202,10 @@ static boolean execute(SearchContext searchContext, final IndexSearcher searcher
final TopDocsCollectorContext topDocsFactory = createTopDocsCollectorContext(searchContext, reader,
collectors.stream().anyMatch(QueryCollectorContext::shouldCollect));
final boolean shouldCollect = topDocsFactory.shouldCollect();
if (scrollContext == null && topDocsFactory.numHits() > 0 && canEarlyTerminate(indexSort, searchContext)) {

if (topDocsFactory.numHits() > 0 &&
(scrollContext == null || scrollContext.totalHits != -1) &&
canEarlyTerminate(indexSort, searchContext)) {
// top docs collection can be early terminated based on index sort
// add the collector context first so we don't early terminate aggs but only top docs
collectors.addFirst(createEarlySortingTerminationCollectorContext(reader, searchContext.query(), indexSort,
Expand Down
Loading