-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
prune unselected THEN statements in CaseTransformFunction #8138
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,7 +25,6 @@ | |
import java.util.Map; | ||
import org.apache.pinot.core.operator.blocks.ProjectionBlock; | ||
import org.apache.pinot.core.operator.transform.TransformResultMetadata; | ||
import org.apache.pinot.core.plan.DocIdSetPlanNode; | ||
import org.apache.pinot.segment.spi.datasource.DataSource; | ||
import org.apache.pinot.spi.data.FieldSpec.DataType; | ||
|
||
|
@@ -58,6 +57,8 @@ public class CaseTransformFunction extends BaseTransformFunction { | |
|
||
private List<TransformFunction> _whenStatements = new ArrayList<>(); | ||
private List<TransformFunction> _elseThenStatements = new ArrayList<>(); | ||
private boolean[] _selections; | ||
private int _numSelections; | ||
private TransformResultMetadata _resultMetadata; | ||
private int[] _selectedResults; | ||
private int[] _intResults; | ||
|
@@ -89,6 +90,7 @@ public void init(List<TransformFunction> arguments, Map<String, DataSource> data | |
for (int i = numWhenStatements; i < numWhenStatements * 2; i++) { | ||
_elseThenStatements.add(arguments.get(i)); | ||
} | ||
_selections = new boolean[_elseThenStatements.size()]; | ||
_resultMetadata = calculateResultMetadata(); | ||
} | ||
|
||
|
@@ -102,8 +104,9 @@ private TransformResultMetadata calculateResultMetadata() { | |
for (int i = 0; i < numThenStatements; i++) { | ||
TransformFunction thenStatement = _elseThenStatements.get(i + 1); | ||
TransformResultMetadata thenStatementResultMetadata = thenStatement.getResultMetadata(); | ||
Preconditions.checkState(thenStatementResultMetadata.isSingleValue(), | ||
String.format("Unsupported multi-value expression in the THEN clause of index: %d", i)); | ||
if (!thenStatementResultMetadata.isSingleValue()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do this instead of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably to avoid There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Otherwise you call |
||
throw new IllegalStateException("Unsupported multi-value expression in the THEN clause of index: " + i); | ||
} | ||
DataType thenStatementDataType = thenStatementResultMetadata.getDataType(); | ||
|
||
// Upcast the data type to cover all the data types in THEN and ELSE clauses if they don't match | ||
|
@@ -185,21 +188,29 @@ public TransformResultMetadata getResultMetadata() { | |
* index(1 to N) of matched WHEN clause, 0 means nothing matched, so go to ELSE. | ||
*/ | ||
private int[] getSelectedArray(ProjectionBlock projectionBlock) { | ||
if (_selectedResults == null) { | ||
_selectedResults = new int[DocIdSetPlanNode.MAX_DOC_PER_CALL]; | ||
int numDocs = projectionBlock.getNumDocs(); | ||
if (_selectedResults == null || _selectedResults.length < numDocs) { | ||
_selectedResults = new int[numDocs]; | ||
} else { | ||
Arrays.fill(_selectedResults, 0); | ||
Arrays.fill(_selectedResults, 0, numDocs, 0); | ||
Arrays.fill(_selections, false); | ||
} | ||
int numWhenStatements = _whenStatements.size(); | ||
for (int i = 0; i < numWhenStatements; i++) { | ||
for (int i = numWhenStatements - 1; i >= 0; i--) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why loop needs to be reversed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Allows branch-free setting of the highest priority case below (note that the statement numbers increase) |
||
TransformFunction whenStatement = _whenStatements.get(i); | ||
int[] conditions = whenStatement.transformToIntValuesSV(projectionBlock); | ||
for (int j = 0; j < conditions.length; j++) { | ||
if (_selectedResults[j] == 0 && conditions[j] == 1) { | ||
_selectedResults[j] = i + 1; | ||
} | ||
for (int j = 0; j < numDocs & j < conditions.length; j++) { | ||
_selectedResults[j] = Math.max(conditions[j] * (i + 1), _selectedResults[j]); | ||
_selections[_selectedResults[j]] = true; | ||
} | ||
} | ||
int numSelections = 0; | ||
for (boolean selection : _selections) { | ||
if (selection) { | ||
numSelections++; | ||
} | ||
} | ||
_numSelections = numSelections; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we use a bitmap instead of boolean array ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Assuming you have fewer than 64 cases (a large case statement) all updates to the bitmap would be to the same word, which creates a data dependency in the loop, which slows the loop down. |
||
return _selectedResults; | ||
} | ||
|
||
|
@@ -209,17 +220,23 @@ public int[] transformToIntValuesSV(ProjectionBlock projectionBlock) { | |
return super.transformToIntValuesSV(projectionBlock); | ||
} | ||
int[] selected = getSelectedArray(projectionBlock); | ||
if (_intResults == null) { | ||
_intResults = new int[DocIdSetPlanNode.MAX_DOC_PER_CALL]; | ||
int numDocs = projectionBlock.getNumDocs(); | ||
if (_intResults == null || _intResults.length < numDocs) { | ||
_intResults = new int[numDocs]; | ||
} | ||
int numElseThenStatements = _elseThenStatements.size(); | ||
for (int i = 0; i < numElseThenStatements; i++) { | ||
TransformFunction transformFunction = _elseThenStatements.get(i); | ||
int[] intValues = transformFunction.transformToIntValuesSV(projectionBlock); | ||
int numDocs = projectionBlock.getNumDocs(); | ||
for (int j = 0; j < numDocs; j++) { | ||
if (selected[j] == i) { | ||
_intResults[j] = intValues[j]; | ||
if (_selections[i]) { | ||
TransformFunction transformFunction = _elseThenStatements.get(i); | ||
int[] intValues = transformFunction.transformToIntValuesSV(projectionBlock); | ||
if (_numSelections == 1) { | ||
System.arraycopy(intValues, 0, _intResults, 0, numDocs); | ||
} else { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is checking for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the alternative is the loop below which handles the generic case, which is a lot slower. |
||
for (int j = 0; j < numDocs; j++) { | ||
if (selected[j] == i) { | ||
_intResults[j] = intValues[j]; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
@@ -232,17 +249,23 @@ public long[] transformToLongValuesSV(ProjectionBlock projectionBlock) { | |
return super.transformToLongValuesSV(projectionBlock); | ||
} | ||
int[] selected = getSelectedArray(projectionBlock); | ||
if (_longResults == null) { | ||
_longResults = new long[DocIdSetPlanNode.MAX_DOC_PER_CALL]; | ||
int numDocs = projectionBlock.getNumDocs(); | ||
if (_longResults == null || _longResults.length < numDocs) { | ||
_longResults = new long[numDocs]; | ||
} | ||
int numElseThenStatements = _elseThenStatements.size(); | ||
for (int i = 0; i < numElseThenStatements; i++) { | ||
TransformFunction transformFunction = _elseThenStatements.get(i); | ||
long[] longValues = transformFunction.transformToLongValuesSV(projectionBlock); | ||
int numDocs = projectionBlock.getNumDocs(); | ||
for (int j = 0; j < numDocs; j++) { | ||
if (selected[j] == i) { | ||
_longResults[j] = longValues[j]; | ||
if (_selections[i]) { | ||
TransformFunction transformFunction = _elseThenStatements.get(i); | ||
long[] longValues = transformFunction.transformToLongValuesSV(projectionBlock); | ||
if (_numSelections == 1) { | ||
System.arraycopy(longValues, 0, _longResults, 0, numDocs); | ||
} else { | ||
for (int j = 0; j < numDocs; j++) { | ||
if (selected[j] == i) { | ||
_longResults[j] = longValues[j]; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
@@ -255,17 +278,23 @@ public float[] transformToFloatValuesSV(ProjectionBlock projectionBlock) { | |
return super.transformToFloatValuesSV(projectionBlock); | ||
} | ||
int[] selected = getSelectedArray(projectionBlock); | ||
if (_floatResults == null) { | ||
_floatResults = new float[DocIdSetPlanNode.MAX_DOC_PER_CALL]; | ||
int numDocs = projectionBlock.getNumDocs(); | ||
if (_floatResults == null || _floatResults.length < numDocs) { | ||
_floatResults = new float[numDocs]; | ||
} | ||
int numElseThenStatements = _elseThenStatements.size(); | ||
for (int i = 0; i < numElseThenStatements; i++) { | ||
TransformFunction transformFunction = _elseThenStatements.get(i); | ||
float[] floatValues = transformFunction.transformToFloatValuesSV(projectionBlock); | ||
int numDocs = projectionBlock.getNumDocs(); | ||
for (int j = 0; j < numDocs; j++) { | ||
if (selected[j] == i) { | ||
_floatResults[j] = floatValues[j]; | ||
if (_selections[i]) { | ||
TransformFunction transformFunction = _elseThenStatements.get(i); | ||
float[] floatValues = transformFunction.transformToFloatValuesSV(projectionBlock); | ||
if (_numSelections == 1) { | ||
System.arraycopy(floatValues, 0, _floatResults, 0, numDocs); | ||
} else { | ||
for (int j = 0; j < numDocs; j++) { | ||
if (selected[j] == i) { | ||
_floatResults[j] = floatValues[j]; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
@@ -278,17 +307,23 @@ public double[] transformToDoubleValuesSV(ProjectionBlock projectionBlock) { | |
return super.transformToDoubleValuesSV(projectionBlock); | ||
} | ||
int[] selected = getSelectedArray(projectionBlock); | ||
if (_doubleResults == null) { | ||
_doubleResults = new double[DocIdSetPlanNode.MAX_DOC_PER_CALL]; | ||
int numDocs = projectionBlock.getNumDocs(); | ||
if (_doubleResults == null || _doubleResults.length < numDocs) { | ||
_doubleResults = new double[numDocs]; | ||
} | ||
int numElseThenStatements = _elseThenStatements.size(); | ||
for (int i = 0; i < numElseThenStatements; i++) { | ||
TransformFunction transformFunction = _elseThenStatements.get(i); | ||
double[] doubleValues = transformFunction.transformToDoubleValuesSV(projectionBlock); | ||
int numDocs = projectionBlock.getNumDocs(); | ||
for (int j = 0; j < numDocs; j++) { | ||
if (selected[j] == i) { | ||
_doubleResults[j] = doubleValues[j]; | ||
if (_selections[i]) { | ||
TransformFunction transformFunction = _elseThenStatements.get(i); | ||
double[] doubleValues = transformFunction.transformToDoubleValuesSV(projectionBlock); | ||
if (_numSelections == 1) { | ||
System.arraycopy(doubleValues, 0, _doubleResults, 0, numDocs); | ||
} else { | ||
for (int j = 0; j < numDocs; j++) { | ||
if (selected[j] == i) { | ||
_doubleResults[j] = doubleValues[j]; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
@@ -301,17 +336,23 @@ public String[] transformToStringValuesSV(ProjectionBlock projectionBlock) { | |
return super.transformToStringValuesSV(projectionBlock); | ||
} | ||
int[] selected = getSelectedArray(projectionBlock); | ||
if (_stringResults == null) { | ||
_stringResults = new String[DocIdSetPlanNode.MAX_DOC_PER_CALL]; | ||
int numDocs = projectionBlock.getNumDocs(); | ||
if (_stringResults == null || _selectedResults.length < numDocs) { | ||
_stringResults = new String[numDocs]; | ||
} | ||
int numElseThenStatements = _elseThenStatements.size(); | ||
for (int i = 0; i < numElseThenStatements; i++) { | ||
TransformFunction transformFunction = _elseThenStatements.get(i); | ||
String[] stringValues = transformFunction.transformToStringValuesSV(projectionBlock); | ||
int numDocs = projectionBlock.getNumDocs(); | ||
for (int j = 0; j < numDocs; j++) { | ||
if (selected[j] == i) { | ||
_stringResults[j] = stringValues[j]; | ||
if (_selections[i]) { | ||
TransformFunction transformFunction = _elseThenStatements.get(i); | ||
String[] stringValues = transformFunction.transformToStringValuesSV(projectionBlock); | ||
if (_numSelections == 1) { | ||
System.arraycopy(stringValues, 0, _stringResults, 0, numDocs); | ||
} else { | ||
for (int j = 0; j < numDocs; j++) { | ||
if (selected[j] == i) { | ||
_stringResults[j] = stringValues[j]; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
@@ -324,17 +365,23 @@ public byte[][] transformToBytesValuesSV(ProjectionBlock projectionBlock) { | |
return super.transformToBytesValuesSV(projectionBlock); | ||
} | ||
int[] selected = getSelectedArray(projectionBlock); | ||
if (_bytesResults == null) { | ||
_bytesResults = new byte[DocIdSetPlanNode.MAX_DOC_PER_CALL][]; | ||
int numDocs = projectionBlock.getNumDocs(); | ||
if (_bytesResults == null || _bytesResults.length < numDocs) { | ||
_bytesResults = new byte[numDocs][]; | ||
} | ||
int numElseThenStatements = _elseThenStatements.size(); | ||
for (int i = 0; i < numElseThenStatements; i++) { | ||
TransformFunction transformFunction = _elseThenStatements.get(i); | ||
byte[][] bytesValues = transformFunction.transformToBytesValuesSV(projectionBlock); | ||
int numDocs = projectionBlock.getNumDocs(); | ||
for (int j = 0; j < numDocs; j++) { | ||
if (selected[j] == i) { | ||
_bytesResults[j] = bytesValues[j]; | ||
if (_selections[i]) { | ||
TransformFunction transformFunction = _elseThenStatements.get(i); | ||
byte[][] bytesValues = transformFunction.transformToBytesValuesSV(projectionBlock); | ||
if (_numSelections == 1) { | ||
System.arraycopy(bytesValues, 0, _byteValuesSV, 0, numDocs); | ||
} else { | ||
for (int j = 0; j < numDocs; j++) { | ||
if (selected[j] == i) { | ||
_bytesResults[j] = bytesValues[j]; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(nit) brief comment to explain what is _selections. I am guessing this is to track if a statement is selected or not ?