diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxAggregationFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxAggregationFunction.java index 2d0bac280385..79c08bd2bec3 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxAggregationFunction.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxAggregationFunction.java @@ -72,11 +72,13 @@ public void aggregate(int length, AggregationResultHolder aggregationResultHolde Map blockValSetMap) { BlockValSet blockValSet = blockValSetMap.get(_expression); if (_nullHandlingEnabled) { + // TODO: avoid the null bitmap check when it is null or empty for better performance. RoaringBitmap nullBitmap = blockValSet.getNullBitmap(); - if (nullBitmap != null && !nullBitmap.isEmpty()) { - aggregateNullHandlingEnabled(length, aggregationResultHolder, blockValSet, nullBitmap); - return; + if (nullBitmap == null) { + nullBitmap = new RoaringBitmap(); } + aggregateNullHandlingEnabled(length, aggregationResultHolder, blockValSet, nullBitmap); + return; } switch (blockValSet.getValueType().getStoredType()) { @@ -219,20 +221,21 @@ public void aggregateGroupBySV(int length, int[] groupKeyArray, GroupByResultHol BlockValSet blockValSet = blockValSetMap.get(_expression); if (_nullHandlingEnabled) { RoaringBitmap nullBitmap = blockValSet.getNullBitmap(); - if (nullBitmap != null && !nullBitmap.isEmpty()) { - if (nullBitmap.getCardinality() < length) { - double[] valueArray = blockValSet.getDoubleValuesSV(); - for (int i = 0; i < length; i++) { - double value = valueArray[i]; - int groupKey = groupKeyArray[i]; - Double result = groupByResultHolder.getResult(groupKey); - if (!nullBitmap.contains(i) && (result == null || value > result)) { - groupByResultHolder.setValueForKey(groupKey, value); - } + if (nullBitmap == null) { + nullBitmap = new RoaringBitmap(); + } + if (nullBitmap.getCardinality() < length) { + double[] valueArray = blockValSet.getDoubleValuesSV(); + for (int i = 0; i < length; i++) { + double value = valueArray[i]; + int groupKey = groupKeyArray[i]; + Double result = groupByResultHolder.getResult(groupKey); + if (!nullBitmap.contains(i) && (result == null || value > result)) { + groupByResultHolder.setValueForKey(groupKey, value); } } - return; } + return; } double[] valueArray = blockValSet.getDoubleValuesSV(); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MinAggregationFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MinAggregationFunction.java index 2e80387cd202..b2ed9390d330 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MinAggregationFunction.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MinAggregationFunction.java @@ -73,10 +73,11 @@ public void aggregate(int length, AggregationResultHolder aggregationResultHolde BlockValSet blockValSet = blockValSetMap.get(_expression); if (_nullHandlingEnabled) { RoaringBitmap nullBitmap = blockValSet.getNullBitmap(); - if (nullBitmap != null && !nullBitmap.isEmpty()) { - aggregateNullHandlingEnabled(length, aggregationResultHolder, blockValSet, nullBitmap); - return; + if (nullBitmap == null) { + nullBitmap = new RoaringBitmap(); } + aggregateNullHandlingEnabled(length, aggregationResultHolder, blockValSet, nullBitmap); + return; } switch (blockValSet.getValueType().getStoredType()) { @@ -219,20 +220,21 @@ public void aggregateGroupBySV(int length, int[] groupKeyArray, GroupByResultHol BlockValSet blockValSet = blockValSetMap.get(_expression); if (_nullHandlingEnabled) { RoaringBitmap nullBitmap = blockValSet.getNullBitmap(); - if (nullBitmap != null && !nullBitmap.isEmpty()) { - if (nullBitmap.getCardinality() < length) { - double[] valueArray = blockValSet.getDoubleValuesSV(); - for (int i = 0; i < length; i++) { - double value = valueArray[i]; - int groupKey = groupKeyArray[i]; - Double result = groupByResultHolder.getResult(groupKey); - if (!nullBitmap.contains(i) && (result == null || value < result)) { - groupByResultHolder.setValueForKey(groupKey, value); - } + if (nullBitmap == null) { + nullBitmap = new RoaringBitmap(); + } + if (nullBitmap.getCardinality() < length) { + double[] valueArray = blockValSet.getDoubleValuesSV(); + for (int i = 0; i < length; i++) { + double value = valueArray[i]; + int groupKey = groupKeyArray[i]; + Double result = groupByResultHolder.getResult(groupKey); + if (!nullBitmap.contains(i) && (result == null || value < result)) { + groupByResultHolder.setValueForKey(groupKey, value); } } - return; } + return; } double[] valueArray = blockValSet.getDoubleValuesSV(); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/SumAggregationFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/SumAggregationFunction.java index 8456eda43641..04f63bd4e274 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/SumAggregationFunction.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/SumAggregationFunction.java @@ -73,10 +73,11 @@ public void aggregate(int length, AggregationResultHolder aggregationResultHolde BlockValSet blockValSet = blockValSetMap.get(_expression); if (_nullHandlingEnabled) { RoaringBitmap nullBitmap = blockValSet.getNullBitmap(); - if (nullBitmap != null && !nullBitmap.isEmpty()) { - aggregateNullHandlingEnabled(length, aggregationResultHolder, blockValSet, nullBitmap); - return; + if (nullBitmap == null) { + nullBitmap = new RoaringBitmap(); } + aggregateNullHandlingEnabled(length, aggregationResultHolder, blockValSet, nullBitmap); + return; } double sum = aggregationResultHolder.getDoubleResult(); @@ -207,26 +208,27 @@ public void aggregateGroupBySV(int length, int[] groupKeyArray, GroupByResultHol BlockValSet blockValSet = blockValSetMap.get(_expression); if (_nullHandlingEnabled) { RoaringBitmap nullBitmap = blockValSet.getNullBitmap(); - if (nullBitmap != null && !nullBitmap.isEmpty()) { - if (nullBitmap.getCardinality() < length) { - double[] valueArray = blockValSet.getDoubleValuesSV(); - for (int i = 0; i < length; i++) { - if (!nullBitmap.contains(i)) { - int groupKey = groupKeyArray[i]; - Double result = groupByResultHolder.getResult(groupKey); - groupByResultHolder.setValueForKey(groupKey, result == null ? valueArray[i] : result + valueArray[i]); - // In presto: - // SELECT sum (cast(id AS DOUBLE)) as sum, min(id) as min, max(id) as max, key FROM (VALUES (null, 1), - // (null, 2)) AS t(id, key) GROUP BY key ORDER BY max DESC; - // sum | min | max | key - //------+------+------+----- - // NULL | NULL | NULL | 2 - // NULL | NULL | NULL | 1 - } + if (nullBitmap == null) { + nullBitmap = new RoaringBitmap(); + } + if (nullBitmap.getCardinality() < length) { + double[] valueArray = blockValSet.getDoubleValuesSV(); + for (int i = 0; i < length; i++) { + if (!nullBitmap.contains(i)) { + int groupKey = groupKeyArray[i]; + Double result = groupByResultHolder.getResult(groupKey); + groupByResultHolder.setValueForKey(groupKey, result == null ? valueArray[i] : result + valueArray[i]); + // In presto: + // SELECT sum (cast(id AS DOUBLE)) as sum, min(id) as min, max(id) as max, key FROM (VALUES (null, 1), + // (null, 2)) AS t(id, key) GROUP BY key ORDER BY max DESC; + // sum | min | max | key + //------+------+------+----- + // NULL | NULL | NULL | 2 + // NULL | NULL | NULL | 1 } } - return; } + return; } double[] valueArray = blockValSet.getDoubleValuesSV(); diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/NullEnabledQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/NullEnabledQueriesTest.java index e4ba37268c86..a9ba3aa65414 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/NullEnabledQueriesTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/NullEnabledQueriesTest.java @@ -92,7 +92,7 @@ protected List getIndexSegments() { return _indexSegments; } - public void createRecords(Number baseValue) + public void createRecords(Number baseValue, boolean generateNulls) throws Exception { FileUtils.deleteDirectory(INDEX_DIR); @@ -115,11 +115,12 @@ public void createRecords(Number baseValue) record.putValue(KEY_COLUMN, 2); _sumKey2 += value; } - } else { + _records.add(record); + } else if (generateNulls) { // Key column value here is null. record.putValue(COLUMN_NAME, null); + _records.add(record); } - _records.add(record); } } @@ -160,12 +161,13 @@ public void testQueriesWithDictFloatColumn() throws Exception { ColumnDataType columnDataType = ColumnDataType.FLOAT; float baseValue = RANDOM.nextFloat(); - createRecords(baseValue); + boolean generateNulls = true; + createRecords(baseValue, generateNulls); TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE) .setTableName(RAW_TABLE_NAME) .build(); setUp(tableConfig, columnDataType.toDataType()); - testQueries(baseValue, columnDataType); + testQueries(baseValue, columnDataType, generateNulls); } @Test(priority = 1) @@ -173,7 +175,8 @@ public void testQueriesWithNoDictFloatColumn() throws Exception { ColumnDataType columnDataType = ColumnDataType.FLOAT; float baseValue = RANDOM.nextFloat(); - createRecords(baseValue); + boolean generateNulls = true; + createRecords(baseValue, generateNulls); List noDictionaryColumns = new ArrayList(); noDictionaryColumns.add(COLUMN_NAME); TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE) @@ -181,7 +184,7 @@ public void testQueriesWithNoDictFloatColumn() .setNoDictionaryColumns(noDictionaryColumns) .build(); setUp(tableConfig, columnDataType.toDataType()); - testQueries(baseValue, columnDataType); + testQueries(baseValue, columnDataType, generateNulls); } @Test(priority = 2) @@ -189,12 +192,13 @@ public void testQueriesWithDictDoubleColumn() throws Exception { ColumnDataType columnDataType = ColumnDataType.DOUBLE; double baseValue = RANDOM.nextDouble(); - createRecords(baseValue); + boolean generateNulls = true; + createRecords(baseValue, generateNulls); TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE) .setTableName(RAW_TABLE_NAME) .build(); setUp(tableConfig, columnDataType.toDataType()); - testQueries(baseValue, columnDataType); + testQueries(baseValue, columnDataType, generateNulls); } @Test(priority = 3) @@ -202,7 +206,8 @@ public void testQueriesWithNoDictDoubleColumn() throws Exception { ColumnDataType columnDataType = ColumnDataType.DOUBLE; double baseValue = RANDOM.nextDouble(); - createRecords(baseValue); + boolean generateNulls = true; + createRecords(baseValue, generateNulls); List noDictionaryColumns = new ArrayList(); noDictionaryColumns.add(COLUMN_NAME); TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE) @@ -210,10 +215,72 @@ public void testQueriesWithNoDictDoubleColumn() .setNoDictionaryColumns(noDictionaryColumns) .build(); setUp(tableConfig, columnDataType.toDataType()); - testQueries(baseValue, columnDataType); + testQueries(baseValue, columnDataType, generateNulls); } - public void testQueries(Number baseValue, ColumnDataType dataType) { + @Test(priority = 4) + public void testQueriesWithDictFloatColumnNoNullValues() + throws Exception { + ColumnDataType columnDataType = ColumnDataType.FLOAT; + float baseValue = RANDOM.nextFloat(); + boolean generateNulls = false; + createRecords(baseValue, generateNulls); + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE) + .setTableName(RAW_TABLE_NAME) + .build(); + setUp(tableConfig, columnDataType.toDataType()); + testQueries(baseValue, columnDataType, generateNulls); + } + + @Test(priority = 5) + public void testQueriesWithNoDictFloatColumnNoNullValues() + throws Exception { + ColumnDataType columnDataType = ColumnDataType.FLOAT; + float baseValue = RANDOM.nextFloat(); + boolean generateNulls = false; + createRecords(baseValue, generateNulls); + List noDictionaryColumns = new ArrayList(); + noDictionaryColumns.add(COLUMN_NAME); + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE) + .setTableName(RAW_TABLE_NAME) + .setNoDictionaryColumns(noDictionaryColumns) + .build(); + setUp(tableConfig, columnDataType.toDataType()); + testQueries(baseValue, columnDataType, generateNulls); + } + + @Test(priority = 6) + public void testQueriesWithDictDoubleColumnNoNullValues() + throws Exception { + ColumnDataType columnDataType = ColumnDataType.DOUBLE; + double baseValue = RANDOM.nextDouble(); + boolean generateNulls = false; + createRecords(baseValue, generateNulls); + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE) + .setTableName(RAW_TABLE_NAME) + .build(); + setUp(tableConfig, columnDataType.toDataType()); + testQueries(baseValue, columnDataType, generateNulls); + } + + @Test(priority = 7) + public void testQueriesWithNoDictDoubleColumnNoNullValues() + throws Exception { + ColumnDataType columnDataType = ColumnDataType.DOUBLE; + double baseValue = RANDOM.nextDouble(); + boolean generateNulls = false; + createRecords(baseValue, generateNulls); + List noDictionaryColumns = new ArrayList(); + noDictionaryColumns.add(COLUMN_NAME); + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE) + .setTableName(RAW_TABLE_NAME) + .setNoDictionaryColumns(noDictionaryColumns) + .build(); + setUp(tableConfig, columnDataType.toDataType()); + testQueries(baseValue, columnDataType, generateNulls); + } + + public void testQueries(Number baseValue, ColumnDataType dataType, boolean nullValuesExist) { DataTableBuilderFactory.setDataTableVersion(DataTableFactory.VERSION_4); Map queryOptions = new HashMap<>(); queryOptions.put("enableNullHandling", "true"); @@ -228,8 +295,9 @@ public void testQueries(Number baseValue, ColumnDataType dataType) { ColumnDataType.DOUBLE, ColumnDataType.DOUBLE, ColumnDataType.DOUBLE, ColumnDataType.LONG, ColumnDataType.INT })); List rows = resultTable.getRows(); - assertEquals(rows.size(), 3); - for (int index = 0; index < 3; index++) { + int resultCount = nullValuesExist ? 3 : 2; + assertEquals(rows.size(), resultCount); + for (int index = 0; index < resultCount; index++) { Object[] row = rows.get(index); assertEquals(row.length, 5); int keyColumnIdx = 4; @@ -277,7 +345,8 @@ public void testQueries(Number baseValue, ColumnDataType dataType) { Object[] row = rows.get(0); assertEquals(row.length, 4); // Note: count(*) returns total number of docs (nullable and non-nullable). - assertEquals((long) row[0], 1000 * 4); + int totalDocs = nullValuesExist ? 1000 : 500; + assertEquals((long) row[0], totalDocs * 4); // count(col) returns the count of non-nullable docs. assertEquals((long) row[1], 500 * 4); assertEquals(row[2], baseValue.doubleValue()); @@ -296,7 +365,8 @@ public void testQueries(Number baseValue, ColumnDataType dataType) { Object[] row = rows.get(i); assertEquals(row.length, 2); if (row[0] != null) { - assertTrue(Math.abs(((Number) row[0]).doubleValue() - (baseValue.doubleValue() + i)) < 1e-1); + int incValue = nullValuesExist ? i : i * 2; + assertTrue(Math.abs(((Number) row[0]).doubleValue() - (baseValue.doubleValue() + incValue)) < 1e-1); assertEquals(row[1], 1); } else { assertNull(row[1]); @@ -313,7 +383,8 @@ public void testQueries(Number baseValue, ColumnDataType dataType) { assertEquals(dataSchema, new DataSchema(new String[]{COLUMN_NAME, KEY_COLUMN}, new ColumnDataType[]{dataType, ColumnDataType.INT})); List rows = resultTable.getRows(); - assertEquals(rows.size(), 4000); + int rowsCount = nullValuesExist ? 4000 : 2000; + assertEquals(rows.size(), rowsCount); int k = 0; for (int i = 0; i < 2000; i += 4) { // Null values are inserted at indices where: index % 2 equals 1. Skip null values. @@ -331,7 +402,7 @@ public void testQueries(Number baseValue, ColumnDataType dataType) { // Note 1: we inserted 500 nulls in _records, and since we query 4 identical index segments, the number of null // values is: 500 * 4 = 2000. // Note 2: The default null ordering is 'NULLS LAST', regardless of the ordering direction. - for (int i = 2000; i < 4000; i++) { + for (int i = 2000; i < rowsCount; i++) { Object[] values = rows.get(i); assertEquals(values.length, 2); assertNull(values[0]); @@ -361,7 +432,9 @@ public void testQueries(Number baseValue, ColumnDataType dataType) { index++; } // The default null ordering is 'NULLS LAST'. Therefore, null will appear as the last record. - assertNull(rows.get(rows.size() - 1)[0]); + if (nullValuesExist) { + assertNull(rows.get(rows.size() - 1)[0]); + } } { int limit = 40; @@ -388,7 +461,9 @@ public void testQueries(Number baseValue, ColumnDataType dataType) { index++; } // The default null ordering is 'NULLS LAST'. Therefore, null will appear as the last record. - assertNull(rows.get(rows.size() - 1)[0]); + if (nullValuesExist) { + assertNull(rows.get(rows.size() - 1)[0]); + } } { // This test case was added to validate path-code for distinct w/o order by. @@ -434,8 +509,10 @@ public void testQueries(Number baseValue, ColumnDataType dataType) { List rows = resultTable.getRows(); assertEquals(rows.size(), 10); // The default null ordering is 'NULLS LAST'. Therefore, null will appear as the last record. - assertNull(rows.get(0)[0]); - int index = 1; + if (nullValuesExist) { + assertNull(rows.get(0)[0]); + } + int index = nullValuesExist ? 1 : 0; int i = 0; while (index < rows.size()) { if ((NUM_RECORDS - i - 1) % 2 == 1) { @@ -459,7 +536,8 @@ public void testQueries(Number baseValue, ColumnDataType dataType) { assertEquals(dataSchema, new DataSchema(new String[]{"count", COLUMN_NAME}, new ColumnDataType[]{ColumnDataType.LONG, dataType})); List rows = resultTable.getRows(); - assertEquals(rows.size(), 501); + int rowsCount = nullValuesExist ? 501 : 500; + assertEquals(rows.size(), rowsCount); int i = 0; for (int index = 0; index < 500; index++) { Object[] row = rows.get(index); @@ -474,9 +552,11 @@ public void testQueries(Number baseValue, ColumnDataType dataType) { i++; } // The default null ordering is 'NULLS LAST'. - Object[] row = rows.get(500); - assertEquals(row[0], 2000L); - assertNull(row[1]); + if (nullValuesExist) { + Object[] row = rows.get(500); + assertEquals(row[0], 2000L); + assertNull(row[1]); + } } { String query = String.format("SELECT SUMPRECISION(%s) AS sum FROM testTable", COLUMN_NAME); @@ -638,9 +718,10 @@ public void testQueries(Number baseValue, ColumnDataType dataType) { assertEquals(dataSchema, new DataSchema(new String[]{"max", COLUMN_NAME}, new ColumnDataType[]{ColumnDataType.DOUBLE, dataType})); List rows = resultTable.getRows(); - assertEquals(rows.size(), 501); + int rowsCount = 500; + assertEquals(rows.size(), rowsCount + (nullValuesExist ? 1 : 0)); int i = 0; - for (int index = 0; index < 500; index++) { + for (int index = 0; index < rowsCount; index++) { if (i % 2 == 1) { // Null values are inserted at: index % 2 == 1. i++; @@ -651,7 +732,9 @@ public void testQueries(Number baseValue, ColumnDataType dataType) { assertTrue(Math.abs(((Number) row[1]).doubleValue() - (baseValue.doubleValue() + i)) < 1e-1); i++; } - assertNull(rows.get(rows.size() - 1)[0]); + if (nullValuesExist) { + assertNull(rows.get(rows.size() - 1)[0]); + } } DataTableBuilderFactory.setDataTableVersion(DataTableBuilderFactory.DEFAULT_VERSION); }