Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include docIds in Projection and Transform block #8262

Merged
merged 1 commit into from
Feb 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,15 @@ public int getNumDocs() {
return _length;
}

/**
* Returns the document ids within the current block.
*
* @return Document ids within the current block.
*/
public int[] getDocIds() {
return _docIds;
}

/**
* SINGLE-VALUED COLUMN API
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ public int getNumDocs() {
return _dataBlockCache.getNumDocs();
}

public int[] getDocIds() {
return _dataBlockCache.getDocIds();
}

public BlockValSet getBlockValueSet(String column) {
return new ProjectionBlockValSet(_dataBlockCache, column, _dataSourceMap.get(column));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ public int getNumDocs() {
return _projectionBlock.getNumDocs();
}

public int[] getDocIds() {
return _projectionBlock.getDocIds();
}

public BlockValSet getBlockValueSet(ExpressionContext expression) {
if (expression.getType() == ExpressionContext.Type.IDENTIFIER) {
return _projectionBlock.getBlockValueSet(expression.getIdentifier());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
import org.apache.pinot.segment.spi.datasource.DataSource;
import org.apache.pinot.spi.data.FieldSpec.DataType;
import org.apache.pinot.spi.utils.ByteArray;
import org.apache.pinot.spi.utils.CommonConstants.Segment.BuiltInVirtualColumn;
import org.roaringbitmap.RoaringBitmap;


Expand Down Expand Up @@ -210,7 +209,7 @@ private IntermediateResultsBlock computeAllPreSorted() {
_numDocsScanned++;
}
}
_numEntriesScannedPostFilter += _numDocsScanned * numColumnsProjected;
_numEntriesScannedPostFilter = (long) _numDocsScanned * numColumnsProjected;

// Create the data schema
String[] columnNames = new String[numExpressions];
Expand Down Expand Up @@ -249,8 +248,8 @@ private IntermediateResultsBlock computeAllOrdered() {
SelectionOperatorUtils.addToPriorityQueue(blockValueFetcher.getRow(i), _rows, _numRowsToKeep);
}
_numDocsScanned += numDocsFetched;
_numEntriesScannedPostFilter += numDocsFetched * numColumnsProjected;
}
_numEntriesScannedPostFilter = (long) _numDocsScanned * numColumnsProjected;

// Create the data schema
String[] columnNames = new String[numExpressions];
Expand All @@ -274,28 +273,29 @@ private IntermediateResultsBlock computePartiallyOrdered() {
int numOrderByExpressions = _orderByExpressions.size();

// Fetch the order-by expressions and docIds and insert them into the priority queue
BlockValSet[] blockValSets = new BlockValSet[numOrderByExpressions + 1];
BlockValSet[] blockValSets = new BlockValSet[numOrderByExpressions];
int numColumnsProjected = _transformOperator.getNumColumnsProjected();
TransformBlock transformBlock;
while ((transformBlock = _transformOperator.nextBlock()) != null) {
for (int i = 0; i < numOrderByExpressions; i++) {
ExpressionContext expression = _orderByExpressions.get(i).getExpression();
blockValSets[i] = transformBlock.getBlockValueSet(expression);
}
blockValSets[numOrderByExpressions] = transformBlock.getBlockValueSet(BuiltInVirtualColumn.DOCID);
RowBasedBlockValueFetcher blockValueFetcher = new RowBasedBlockValueFetcher(blockValSets);
int numDocsFetched = transformBlock.getNumDocs();
int[] docIds = transformBlock.getDocIds();
for (int i = 0; i < numDocsFetched; i++) {
// NOTE: We pre-allocate the complete row so that we can fill up the non-order-by output expression values later
// without creating extra rows or re-constructing the priority queue. We can change the values in-place
// because the comparator only compare the values for the order-by expressions.
Object[] row = new Object[numExpressions];
blockValueFetcher.getRow(i, row, 0);
row[numOrderByExpressions] = docIds[i];
SelectionOperatorUtils.addToPriorityQueue(row, _rows, _numRowsToKeep);
}
_numDocsScanned += numDocsFetched;
_numEntriesScannedPostFilter += numDocsFetched * numColumnsProjected;
}
_numEntriesScannedPostFilter = (long) _numDocsScanned * numColumnsProjected;

// Copy the rows (shallow copy so that any modification will also be reflected to the priority queue) into a list,
// and store the document ids into a bitmap
Expand Down Expand Up @@ -340,7 +340,7 @@ private IntermediateResultsBlock computePartiallyOrdered() {
for (int i = 0; i < numDocsFetched; i++) {
blockValueFetcher.getRow(i, rowList.get(rowBaseId + i), numOrderByExpressions);
}
_numEntriesScannedPostFilter += numDocsFetched * numColumns;
_numEntriesScannedPostFilter += (long) numDocsFetched * numColumns;
rowBaseId += numDocsFetched;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import org.apache.pinot.core.query.request.context.QueryContext;
import org.apache.pinot.core.query.selection.SelectionOperatorUtils;
import org.apache.pinot.segment.spi.IndexSegment;
import org.apache.pinot.spi.utils.CommonConstants.Segment.BuiltInVirtualColumn;


/**
Expand Down Expand Up @@ -73,11 +72,10 @@ public Operator<IntermediateResultsBlock> run() {
} else {
// Not all output expressions are ordered, only fetch the order-by expressions and docId to avoid the
// unnecessary data fetch
List<ExpressionContext> expressionsToTransform = new ArrayList<>(orderByExpressions.size() + 1);
List<ExpressionContext> expressionsToTransform = new ArrayList<>(orderByExpressions.size());
for (OrderByExpressionContext orderByExpression : orderByExpressions) {
expressionsToTransform.add(orderByExpression.getExpression());
}
expressionsToTransform.add(ExpressionContext.forIdentifier(BuiltInVirtualColumn.DOCID));
TransformOperator transformOperator =
new TransformPlanNode(_indexSegment, _queryContext, expressionsToTransform,
DocIdSetPlanNode.MAX_DOC_PER_CALL).run();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,8 @@ public void testSelectionOrderBy() {
ExecutionStatistics executionStatistics = selectionOrderByOperator.getExecutionStatistics();
Assert.assertEquals(executionStatistics.getNumDocsScanned(), 100000L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 0L);
// 100000 * (2 order-by columns + 1 docId column) + 10 * (2 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 300020L);
// 100000 * (2 order-by columns) + 10 * (2 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 200020L);
Assert.assertEquals(executionStatistics.getNumTotalDocs(), 100000L);
DataSchema selectionDataSchema = resultsBlock.getDataSchema();
Map<String, Integer> columnIndexMap = computeColumnNameToIndexMap(selectionDataSchema);
Expand All @@ -226,8 +226,8 @@ public void testSelectionOrderBy() {
executionStatistics = selectionOrderByOperator.getExecutionStatistics();
Assert.assertEquals(executionStatistics.getNumDocsScanned(), 15620L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 275416L);
// 15620 * (2 order-by columns + 1 docId column) + 10 * (2 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 46880L);
// 15620 * (2 order-by columns) + 10 * (2 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 31260L);
Assert.assertEquals(executionStatistics.getNumTotalDocs(), 100000L);
selectionDataSchema = resultsBlock.getDataSchema();
columnIndexMap = computeColumnNameToIndexMap(selectionDataSchema);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,8 @@ public void testSelectionOrderBy() {
ExecutionStatistics executionStatistics = selectionOrderByOperator.getExecutionStatistics();
Assert.assertEquals(executionStatistics.getNumDocsScanned(), 30000L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 0L);
// 30000 * (2 order-by columns + 1 docId column) + 10 * (2 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 90020L);
// 30000 * (2 order-by columns) + 10 * (2 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 60020L);
Assert.assertEquals(executionStatistics.getNumTotalDocs(), 30000L);
DataSchema selectionDataSchema = resultsBlock.getDataSchema();
Map<String, Integer> columnIndexMap = computeColumnNameToIndexMap(selectionDataSchema);
Expand All @@ -259,8 +259,8 @@ public void testSelectionOrderBy() {
executionStatistics = selectionOrderByOperator.getExecutionStatistics();
Assert.assertEquals(executionStatistics.getNumDocsScanned(), 6129L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 84134L);
// 6129 * (2 order-by columns + 1 docId column) + 10 * (2 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 18407L);
// 6129 * (2 order-by columns) + 10 * (2 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 12278L);
Assert.assertEquals(executionStatistics.getNumTotalDocs(), 30000L);
selectionDataSchema = resultsBlock.getDataSchema();
columnIndexMap = computeColumnNameToIndexMap(selectionDataSchema);
Expand Down Expand Up @@ -290,8 +290,8 @@ public void testSelectStarOrderBy() {
ExecutionStatistics executionStatistics = selectionOrderByOperator.getExecutionStatistics();
Assert.assertEquals(executionStatistics.getNumDocsScanned(), 30000L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 0L);
// 30000 * (2 order-by columns + 1 docId column) + 10 * (9 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 90090L);
// 30000 * (2 order-by columns) + 10 * (9 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 60090L);
Assert.assertEquals(executionStatistics.getNumTotalDocs(), 30000L);
DataSchema selectionDataSchema = resultsBlock.getDataSchema();
Map<String, Integer> columnIndexMap = computeColumnNameToIndexMap(selectionDataSchema);
Expand All @@ -317,8 +317,8 @@ public void testSelectStarOrderBy() {
executionStatistics = selectionOrderByOperator.getExecutionStatistics();
Assert.assertEquals(executionStatistics.getNumDocsScanned(), 6129L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 84134L);
// 6129 * (2 order-by columns + 1 docId column) + 10 * (9 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 18477L);
// 6129 * (2 order-by columns) + 10 * (9 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 12348L);
Assert.assertEquals(executionStatistics.getNumTotalDocs(), 30000L);
selectionDataSchema = resultsBlock.getDataSchema();
columnIndexMap = computeColumnNameToIndexMap(selectionDataSchema);
Expand Down Expand Up @@ -400,8 +400,8 @@ public void testSelectStarOrderByLargeOffsetLimit() {
ExecutionStatistics executionStatistics = selectionOrderByOperator.getExecutionStatistics();
Assert.assertEquals(executionStatistics.getNumDocsScanned(), 30000L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 0L);
// 30000 * (2 order-by columns + 1 docId column) + 12000 * (9 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 198000L);
// 30000 * (2 order-by columns) + 12000 * (9 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 168000L);
Assert.assertEquals(executionStatistics.getNumTotalDocs(), 30000L);
DataSchema selectionDataSchema = resultsBlock.getDataSchema();
Map<String, Integer> columnIndexMap = computeColumnNameToIndexMap(selectionDataSchema);
Expand All @@ -427,8 +427,8 @@ public void testSelectStarOrderByLargeOffsetLimit() {
executionStatistics = selectionOrderByOperator.getExecutionStatistics();
Assert.assertEquals(executionStatistics.getNumDocsScanned(), 6129L);
Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 84134L);
// 6129 * (2 order-by columns + 1 docId column) + 6129 * (9 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 73548L);
// 6129 * (2 order-by columns) + 6129 * (9 non-order-by columns)
Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 67419L);
Assert.assertEquals(executionStatistics.getNumTotalDocs(), 30000L);
selectionDataSchema = resultsBlock.getDataSchema();
columnIndexMap = computeColumnNameToIndexMap(selectionDataSchema);
Expand Down