From 86fe730de0464ba4179b2f06259b6b9821f92f39 Mon Sep 17 00:00:00 2001 From: Sonam Mandal Date: Mon, 12 Dec 2022 08:20:44 -0800 Subject: [PATCH] Default column handling of noForwardIndex and regeneration of forward index on reload path (#9810) * Add code to handle default column and add a utility to construct forward index from inverted index and dictionary * Empty-Commit * Fix compilation error * Address review comments * Empty-Commit * Empty-Commit * Empty-Commit * Address review comments * Empty-Commit * Address latest review comments --- ...sabledMultiValueQueriesWithReloadTest.java | 300 +++++++++ ...rdIndexDisabledSingleValueQueriesTest.java | 338 +++++++++- .../index/loader/BaseIndexHandler.java | 98 +++ .../index/loader/ForwardIndexHandler.java | 103 +-- .../segment/index/loader/IndexHandler.java | 5 +- .../index/loader/IndexHandlerFactory.java | 4 + ...AndDictionaryBasedForwardIndexCreator.java | 615 +++++++++++++++++ .../segment/index/loader/LoaderUtils.java | 12 + .../index/loader/SegmentPreProcessor.java | 8 +- .../bloomfilter/BloomFilterHandler.java | 24 +- .../BaseDefaultColumnHandler.java | 50 +- .../defaultcolumn/V3DefaultColumnHandler.java | 18 +- .../loader/invertedindex/FSTIndexHandler.java | 12 +- .../loader/invertedindex/H3IndexHandler.java | 18 +- .../invertedindex/InvertedIndexHandler.java | 12 +- .../invertedindex/JsonIndexHandler.java | 18 +- .../invertedindex/RangeIndexHandler.java | 18 +- .../invertedindex/TextIndexHandler.java | 17 +- .../index/loader/ForwardIndexHandlerTest.java | 543 +++++++++++++-- .../index/loader/SegmentPreProcessorTest.java | 630 ++++++++++++++++-- 20 files changed, 2608 insertions(+), 235 deletions(-) create mode 100644 pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/BaseIndexHandler.java create mode 100644 pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/ForwardIndexDisabledMultiValueQueriesWithReloadTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/ForwardIndexDisabledMultiValueQueriesWithReloadTest.java index faa1125edd28..0181575637fc 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/ForwardIndexDisabledMultiValueQueriesWithReloadTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/ForwardIndexDisabledMultiValueQueriesWithReloadTest.java @@ -290,6 +290,141 @@ public void testSelectQueriesWithReload() brokerResponseNative = getBrokerResponse(query); assertTrue(brokerResponseNative.getProcessingExceptions() != null && brokerResponseNative.getProcessingExceptions().size() > 0); + + // Re-enable forward index for column7 and column6 + reenableForwardIndexForSomeColumns(); + + // Selection query without filters including column7 + query = "SELECT column1, column5, column7, column6 FROM testTable WHERE column7 != 201 ORDER BY column1"; + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 10); + assertEquals(brokerResponseNative.getTotalDocs(), 400_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 399_896L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 400_016L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 536_360L); + dataSchema = new DataSchema(new String[]{"column1", "column5", "column7", "column6"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.STRING, + DataSchema.ColumnDataType.INT_ARRAY, DataSchema.ColumnDataType.INT_ARRAY}); + assertEquals(resultTable.getDataSchema(), dataSchema); + resultRows = resultTable.getRows(); + previousColumn1 = Integer.MIN_VALUE; + for (Object[] resultRow : resultRows) { + assertEquals(resultRow.length, 4); + // Column 1 + assertTrue((Integer) resultRow[0] >= previousColumn1); + previousColumn1 = (Integer) resultRow[0]; + } + firstRow = resultRows.get(0); + // Column 5 + assertEquals((String) firstRow[1], "AKXcXcIqsqOJFsdwxZ"); + + // Transform function on a filter clause for forwardIndexDisabled column in transform + query = "SELECT column1, column10 from testTable WHERE ARRAYLENGTH(column7) = 2 AND ARRAYLENGTH(column6) = 2 " + + "ORDER BY column1"; + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 10); + assertEquals(brokerResponseNative.getTotalDocs(), 400_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 5388L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 5428L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 799_056L); + dataSchema = new DataSchema(new String[]{"column1", "column10"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.INT}); + assertEquals(resultTable.getDataSchema(), dataSchema); + resultRows = resultTable.getRows(); + previousColumn1 = Integer.MIN_VALUE; + for (Object[] resultRow : resultRows) { + assertEquals(resultRow.length, 2); + // Column 1 + assertTrue((Integer) resultRow[0] >= previousColumn1); + previousColumn1 = (Integer) resultRow[0]; + } + } + + @Test + public void testSelectAllResultsQueryWithReload() + throws Exception { + // Select query with order by on column9 with limit == totalDocs + String query = "SELECT column7 FROM testTable ORDER BY column1 LIMIT 400000"; + BrokerResponseNative brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + ResultTable resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 400_000); + assertEquals(brokerResponseNative.getTotalDocs(), 400_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 400_000L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 800_000L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 0L); + assertNotNull(brokerResponseNative.getProcessingExceptions()); + assertEquals(brokerResponseNative.getProcessingExceptions().size(), 0); + DataSchema dataSchema = new DataSchema(new String[]{"column7"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.INT_ARRAY}); + assertEquals(resultTable.getDataSchema(), dataSchema); + List resultRowsBeforeDisabling = resultTable.getRows(); + for (Object[] resultRow : resultRowsBeforeDisabling) { + assertEquals(resultRow.length, 1); + } + + // Disable forward index for column7 + disableForwardIndexForSomeColumns(); + + // Run the same query and validate that an exception is thrown since we are running select query on forward index + // disabled column + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() != null + && brokerResponseNative.getProcessingExceptions().size() > 0); + + // Re-enable forward index for column7 and column6 + reenableForwardIndexForSomeColumns(); + + // The first query should work now + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 400_000); + assertEquals(brokerResponseNative.getTotalDocs(), 400_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 400_000L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 800_000L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 0L); + assertNotNull(brokerResponseNative.getProcessingExceptions()); + assertEquals(brokerResponseNative.getProcessingExceptions().size(), 0); + dataSchema = new DataSchema(new String[]{"column7"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.INT_ARRAY}); + assertEquals(resultTable.getDataSchema(), dataSchema); + List resultRowsAfterReenabling = resultTable.getRows(); + // Validate that the result row size before disabling the forward index matches the result row size after + // re-enabling the forward index + assertEquals(resultRowsAfterReenabling.size(), resultRowsBeforeDisabling.size()); + for (int i = 0; i < resultRowsAfterReenabling.size(); i++) { + Object[] resultRow = resultRowsAfterReenabling.get(i); + assertEquals(resultRow.length, 1); + int[] rowValuesAfterReenabling = (int[]) resultRow[0]; + int[] rowValuesBeforeDisabling = (int[]) resultRowsBeforeDisabling.get(i)[0]; + assertEquals(rowValuesAfterReenabling.length, rowValuesBeforeDisabling.length); + // Validate that the value of result row matches the value at this index before forward index was disabled + // Since ordering cannot be guaranteed for multi-value rows, validate all entries are present + Set rowValuesSetAfterReenabling = new HashSet<>(); + Set rowValuesSetBeforeDisabling = new HashSet<>(); + for (int j = 0; j < rowValuesAfterReenabling.length; j++) { + rowValuesSetAfterReenabling.add(rowValuesAfterReenabling[j]); + rowValuesSetBeforeDisabling.add(rowValuesBeforeDisabling[j]); + } + assertEquals(rowValuesSetAfterReenabling, rowValuesSetBeforeDisabling); + } } @Test @@ -331,6 +466,34 @@ public void testSelectWithDistinctQueriesWithReload() } catch (IllegalStateException e) { assertEquals(e.getMessage(), "Forward index disabled for column: column7, cannot create DataFetcher!"); } + + // Re-enable forward index for column7 and column6 + reenableForwardIndexForSomeColumns(); + + // Distinct query without filters including column7 + query = "SELECT DISTINCT column1, column7, column9, column6 FROM testTable ORDER BY column1 LIMIT 10"; + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 10); + assertEquals(brokerResponseNative.getTotalDocs(), 400_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 400000L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 1600000L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 0L); + dataSchema = new DataSchema(new String[]{"column1", "column7", "column9", "column6"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.INT, + DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.INT}); + assertEquals(resultTable.getDataSchema(), dataSchema); + resultRows = resultTable.getRows(); + previousColumn1 = Integer.MIN_VALUE; + for (Object[] resultRow : resultRows) { + assertEquals(resultRow.length, 4); + assertTrue(previousColumn1 <= (int) resultRow[0]); + previousColumn1 = (int) resultRow[0]; + } } @Test @@ -382,6 +545,58 @@ public void testSelectWithGroupByOrderByQueriesWithReload() assertTrue(e.getMessage().contains("Forward index disabled for column:") && e.getMessage().contains("cannot create DataFetcher!")); } + + // Re-enable forward index for column7 and column6 + reenableForwardIndexForSomeColumns(); + + // Select non-forwardIndexDisabled columns with group by order by + query = "SELECT column1, column7, column6 FROM testTable GROUP BY column1, column7, column6 ORDER BY column1, " + + "column7, column6 LIMIT 10"; + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 10); + assertEquals(brokerResponseNative.getTotalDocs(), 400_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 400000L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 1200000L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 0L); + assertEquals(resultTable.getDataSchema(), new DataSchema(new String[]{"column1", "column7", "column6"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.INT, + DataSchema.ColumnDataType.INT})); + resultRows = resultTable.getRows(); + previousVal = -1; + for (Object[] resultRow : resultRows) { + assertEquals(resultRow.length, 3); + assertTrue((int) resultRow[0] >= previousVal); + previousVal = (int) resultRow[0]; + } + + // Select forwardIndexDisabled columns using transform with group by order by + query = "SELECT ARRAYLENGTH(column7) FROM testTable GROUP BY ARRAYLENGTH(column7) ORDER BY " + + "ARRAYLENGTH(column7) LIMIT 10"; + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 10); + assertEquals(brokerResponseNative.getTotalDocs(), 400_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 400000L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 400000L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 0L); + assertEquals(resultTable.getDataSchema(), new DataSchema(new String[]{"arraylength(column7)"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.INT})); + resultRows = resultTable.getRows(); + previousVal = -1; + for (Object[] resultRow : resultRows) { + assertEquals(resultRow.length, 1); + assertTrue((int) resultRow[0] >= previousVal); + previousVal = (int) resultRow[0]; + } } @Test @@ -478,6 +693,54 @@ public void testSelectWithAggregationQueriesWithReload() assertTrue(e.getMessage().contains("Forward index disabled for column:") && e.getMessage().contains("cannot create DataFetcher!")); } + + // Re-enable forward index for column7 and column6 + reenableForwardIndexForSomeColumns(); + + query = "SELECT MAX(ARRAYLENGTH(column7)) from testTable LIMIT 10"; + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 1); + assertEquals(brokerResponseNative.getTotalDocs(), 400_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 400_000L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 400_000L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 0L); + assertEquals(resultTable.getDataSchema(), new DataSchema(new String[]{"max(arraylength(column7))"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.DOUBLE})); + resultRows = resultTable.getRows(); + for (Object[] resultRow : resultRows) { + assertEquals(resultRow.length, 1); + assertEquals(resultRow[0], 24.0); + } + + // Not allowed aggregation functions on non-forwardIndexDisabled columns + query = "SELECT summv(column7), avgmv(column7), summv(column6) from testTable"; + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 1); + assertEquals(brokerResponseNative.getTotalDocs(), 400_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 400_000L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 800_000L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 0L); + assertEquals(resultTable.getDataSchema(), new DataSchema(new String[]{"summv(column7)", "avgmv(column7)", + "summv(column6)"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.DOUBLE, DataSchema.ColumnDataType.DOUBLE, + DataSchema.ColumnDataType.DOUBLE})); + resultRows = resultTable.getRows(); + for (Object[] resultRow : resultRows) { + assertEquals(resultRow.length, 3); + assertEquals(resultRow[0], 4.28972873682684E14); + assertEquals(resultRow[1], 7.997853562582668E8); + assertEquals(resultRow[2], 4.8432460181028E14); + } } private void disableForwardIndexForSomeColumns() @@ -510,4 +773,41 @@ private void disableForwardIndexForSomeColumns() assertNotNull(immutableSegment2.getInvertedIndex("column7")); assertNotNull(immutableSegment2.getDictionary("column7")); } + + private void reenableForwardIndexForSomeColumns() + throws Exception { + // Now re-enable forward index for column7 in the index loading config. + // Also re-enable forward index for column6 + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(); + indexLoadingConfig.setTableConfig(_tableConfig); + Set invertedIndexEnabledColumns = new HashSet<>(_invertedIndexColumns); + indexLoadingConfig.setInvertedIndexColumns(invertedIndexEnabledColumns); + Set forwardIndexDisabledColumns = new HashSet<>(_forwardIndexDisabledColumns); + forwardIndexDisabledColumns.remove("column6"); + indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + indexLoadingConfig.getNoDictionaryColumns().add("column7"); + indexLoadingConfig.setReadMode(ReadMode.heap); + + // Reload the segments to pick up the new configs + File indexDir = new File(INDEX_DIR, SEGMENT_NAME_1); + ImmutableSegment immutableSegment1 = reloadSegment(indexDir, indexLoadingConfig, SCHEMA); + indexDir = new File(INDEX_DIR, SEGMENT_NAME_2); + ImmutableSegment immutableSegment2 = reloadSegment(indexDir, indexLoadingConfig, SCHEMA); + _indexSegment = immutableSegment1; + _indexSegments = Arrays.asList(immutableSegment1, immutableSegment2); + + assertNotNull(immutableSegment1.getForwardIndex("column7")); + assertNull(immutableSegment1.getInvertedIndex("column7")); + assertNull(immutableSegment1.getDictionary("column7")); + assertNotNull(immutableSegment1.getForwardIndex("column6")); + assertNotNull(immutableSegment1.getInvertedIndex("column6")); + assertNotNull(immutableSegment1.getDictionary("column6")); + + assertNotNull(immutableSegment2.getForwardIndex("column7")); + assertNull(immutableSegment2.getInvertedIndex("column7")); + assertNull(immutableSegment2.getDictionary("column7")); + assertNotNull(immutableSegment2.getForwardIndex("column6")); + assertNotNull(immutableSegment2.getInvertedIndex("column6")); + assertNotNull(immutableSegment2.getDictionary("column6")); + } } diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/ForwardIndexDisabledSingleValueQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/ForwardIndexDisabledSingleValueQueriesTest.java index b40ea15d79f2..51718d06e7ea 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/ForwardIndexDisabledSingleValueQueriesTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/ForwardIndexDisabledSingleValueQueriesTest.java @@ -845,6 +845,146 @@ public void testSelectQueriesWithReload() assertEquals(resultRow[0], 240528); assertEquals(resultRow[1], "gFuH"); } + + // Re-enable forward index for column9, column11, and column6 + reenableForwardIndexForSomeColumns(); + + // The first query should work now + query = "SELECT column1, column5, column9, column11, column6 FROM testTable WHERE column6 < 2147458029 AND " + + "column6 > 1699000 ORDER BY column1"; + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 10); + assertEquals(brokerResponseNative.getTotalDocs(), 120_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 119_980L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 120_140L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 0L); + assertNotNull(brokerResponseNative.getProcessingExceptions()); + assertEquals(brokerResponseNative.getProcessingExceptions().size(), 0); + dataSchema = new DataSchema(new String[]{"column1", "column5", "column9", "column11", "column6"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.STRING, + DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.STRING, DataSchema.ColumnDataType.INT}); + assertEquals(resultTable.getDataSchema(), dataSchema); + resultRows = resultTable.getRows(); + previousColumn1 = Integer.MIN_VALUE; + for (Object[] resultRow : resultRows) { + assertEquals(resultRow.length, 5); + assertEquals((String) resultRow[1], "gFuH"); + assertTrue((Integer) resultRow[0] >= previousColumn1); + previousColumn1 = (Integer) resultRow[0]; + } + firstRow = resultRows.get(0); + // Column 11 + assertEquals((String) firstRow[3], "o"); + + // Transform function on a filter clause for a non-forwardIndexDisabled column in transform + query = "SELECT column1, column11 from testTable WHERE CONCAT(column5, ADD(column9, column1), '-') = " + + "'gFuH-2.96708164E8' ORDER BY column1"; + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 10); + assertEquals(brokerResponseNative.getTotalDocs(), 120_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 28L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 56L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 120000L); + assertNotNull(brokerResponseNative.getProcessingExceptions()); + assertEquals(brokerResponseNative.getProcessingExceptions().size(), 0); + assertEquals(resultTable.getDataSchema(), new DataSchema(new String[]{"column1", "column11"}, + new DataSchema.ColumnDataType[]{ + DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.STRING + })); + resultRows = resultTable.getRows(); + assertEquals(resultRows.size(), 10); + for (Object[] resultRow : resultRows) { + assertEquals(resultRow.length, 2); + assertEquals(resultRow[0], 240528); + assertEquals(resultRow[1], "o"); + } + } + + @Test + public void testSelectAllResultsQueryWithReload() + throws Exception { + // Select query with order by on column9 with limit == totalDocs + String query = "SELECT column9 FROM testTable ORDER BY column9 LIMIT 120000"; + BrokerResponseNative brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + ResultTable resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 120_000); + assertEquals(brokerResponseNative.getTotalDocs(), 120_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 120_000L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 120_000L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 0L); + assertNotNull(brokerResponseNative.getProcessingExceptions()); + assertEquals(brokerResponseNative.getProcessingExceptions().size(), 0); + DataSchema dataSchema = new DataSchema(new String[]{"column9"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.INT}); + assertEquals(resultTable.getDataSchema(), dataSchema); + List resultRowsBeforeDisabling = resultTable.getRows(); + int previousColumn9 = Integer.MIN_VALUE; + for (Object[] resultRow : resultRowsBeforeDisabling) { + assertEquals(resultRow.length, 1); + assertTrue((Integer) resultRow[0] >= previousColumn9); + previousColumn9 = (Integer) resultRow[0]; + } + + // Disable forward index for columns: column9 and column11 + disableForwardIndexForSomeColumns(); + + // Run the same query and validate that an exception is thrown since we are running select query on forward index + // disabled column + try { + getBrokerResponse(query); + Assert.fail("Query should fail since forwardIndexDisabled on column9 and is on select list"); + } catch (IllegalStateException e) { + assertTrue(e.getMessage().contains("Forward index disabled for column:") + && e.getMessage().contains("cannot create DataFetcher!")); + } + + // Re-enable forward index for column9, column11, and column6 + reenableForwardIndexForSomeColumns(); + + // The first query should work now + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 120_000); + assertEquals(brokerResponseNative.getTotalDocs(), 120_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 120_000L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 120_000L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 0L); + assertNotNull(brokerResponseNative.getProcessingExceptions()); + assertEquals(brokerResponseNative.getProcessingExceptions().size(), 0); + dataSchema = new DataSchema(new String[]{"column9"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.INT}); + assertEquals(resultTable.getDataSchema(), dataSchema); + List resultRowsAfterReenabling = resultTable.getRows(); + // Validate that the result row size before disabling the forward index matches the result row size after + // re-enabling the forward index + assertEquals(resultRowsAfterReenabling.size(), resultRowsBeforeDisabling.size()); + previousColumn9 = Integer.MIN_VALUE; + for (int i = 0; i < resultRowsAfterReenabling.size(); i++) { + Object[] resultRow = resultRowsAfterReenabling.get(i); + assertEquals(resultRow.length, 1); + assertTrue((Integer) resultRow[0] >= previousColumn9); + previousColumn9 = (Integer) resultRow[0]; + // Validate that the value of result row matches the value at this index before forward index was disabled + assertEquals(resultRow[0], resultRowsBeforeDisabling.get(i)[0]); + } } @Test @@ -931,6 +1071,36 @@ public void testSelectWithDistinctQueriesWithReload() } catch (IllegalStateException e) { assertEquals(e.getMessage(), "Forward index disabled for column: column9, cannot create DataFetcher!"); } + + // Re-enable forward index for column9, column11, and column6 + reenableForwardIndexForSomeColumns(); + + // Select non-forwardIndexDisabled columns with distinct + query = "SELECT DISTINCT column6, column5, column9 FROM testTable ORDER BY column6 LIMIT 10"; + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 10); + assertEquals(brokerResponseNative.getTotalDocs(), 120_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 120_000L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 360_000L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 0L); + assertNotNull(brokerResponseNative.getProcessingExceptions()); + assertEquals(brokerResponseNative.getProcessingExceptions().size(), 0); + assertEquals(resultTable.getDataSchema(), new DataSchema(new String[]{"column6", "column5", "column9"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.STRING, + DataSchema.ColumnDataType.INT})); + resultRows = resultTable.getRows(); + previousColumn1 = Integer.MIN_VALUE; + for (Object[] resultRow : resultRows) { + assertEquals(resultRow.length, 3); + assertEquals(resultRow[1], "gFuH"); + assertTrue((Integer) resultRow[0] >= previousColumn1); + previousColumn1 = (Integer) resultRow[0]; + } } @Test @@ -1164,6 +1334,61 @@ public void testSelectWithGroupByOrderByQueriesWithReload() } catch (IllegalStateException e) { assertEquals(e.getMessage(), "Forward index disabled for column: column9, cannot create DataFetcher!"); } + + // Re-enable forward index for column9, column11, and column6 + reenableForwardIndexForSomeColumns(); + + // Select non-forwardIndexDisabled columns using nested transform with group by order by + query = "SELECT CONCAT(ADD(column6, column9), column5, '-') FROM testTable GROUP BY " + + "CONCAT(ADD(column6, column9), column5, '-') ORDER BY CONCAT(ADD(column6, column9), column5, '-') LIMIT 10"; + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 10); + assertEquals(brokerResponseNative.getTotalDocs(), 120_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 120000L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 360000L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 0L); + assertNotNull(brokerResponseNative.getProcessingExceptions()); + assertEquals(brokerResponseNative.getProcessingExceptions().size(), 0); + assertEquals(resultTable.getDataSchema(), new DataSchema(new String[]{"concat(add(column6,column9),column5,'-')"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING})); + resultRows = resultTable.getRows(); + for (Object[] resultRow : resultRows) { + assertEquals(resultRow.length, 1); + assertTrue(resultRow[0].toString().endsWith("-gFuH")); + } + + // Select non-forwardIndexDisabled columns with group by order by + query = "SELECT column9, column5, column6 FROM testTable GROUP BY column9, column5, column6 ORDER BY column9, " + + "column5, column6 LIMIT 10"; + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 10); + assertEquals(brokerResponseNative.getTotalDocs(), 120_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 120000L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 360000L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 0L); + assertNotNull(brokerResponseNative.getProcessingExceptions()); + assertEquals(brokerResponseNative.getProcessingExceptions().size(), 0); + assertEquals(resultTable.getDataSchema(), new DataSchema(new String[]{"column9", "column5", "column6"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.STRING, + DataSchema.ColumnDataType.INT})); + resultRows = resultTable.getRows(); + int previousVal = -1; + for (Object[] resultRow : resultRows) { + assertEquals(resultRow.length, 3); + assertEquals(resultRow[1], "gFuH"); + assertTrue((int) resultRow[0] >= previousVal); + previousVal = (int) resultRow[0]; + } } @Test @@ -1540,6 +1765,35 @@ public void testSelectWithAggregationQueriesWithReload() assertEquals(resultRow.length, 1); assertEquals(resultRow[0], 2.146952047E9); } + + // Re-enable forward index for column9, column11, and column6 + reenableForwardIndexForSomeColumns(); + + // Transform inside aggregation not involving any forwardIndexDisabled column with group by order by + query = "SELECT column6, MAX(ADD(column1, column9)) from testTable GROUP BY column6 ORDER BY column6 " + + "DESC LIMIT 10"; + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 10); + assertEquals(brokerResponseNative.getTotalDocs(), 120_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 120_000L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 360000L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 0L); + assertNotNull(brokerResponseNative.getProcessingExceptions()); + assertEquals(brokerResponseNative.getProcessingExceptions().size(), 0); + assertEquals(resultTable.getDataSchema(), new DataSchema(new String[]{"column6", "max(add(column1,column9))"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.DOUBLE})); + resultRows = resultTable.getRows(); + previousVal = Integer.MAX_VALUE; + for (Object[] resultRow : resultRows) { + assertEquals(resultRow.length, 2); + assertTrue((int) resultRow[0] <= previousVal); + previousVal = (int) resultRow[0]; + } } @Test @@ -1637,6 +1891,36 @@ public void testSelectWithAggregationGroupByHavingWithReload() } catch (IllegalStateException e) { assertEquals(e.getMessage(), "Forward index disabled for column: column9, cannot create DataFetcher!"); } + + // Re-enable forward index for column9, column11, and column6 + reenableForwardIndexForSomeColumns(); + + // forwardIndexDisabled column not used in HAVING clause or aggregation select + query = "SELECT min(column9), sum(column6), column1 from testTable GROUP BY column1, column9 HAVING min(column9) " + + "> 11270 ORDER BY column9 DESC LIMIT 10"; + brokerResponseNative = getBrokerResponse(query); + assertTrue(brokerResponseNative.getProcessingExceptions() == null + || brokerResponseNative.getProcessingExceptions().size() == 0); + resultTable = brokerResponseNative.getResultTable(); + assertEquals(brokerResponseNative.getNumRowsResultSet(), 10); + assertEquals(brokerResponseNative.getTotalDocs(), 120_000L); + assertEquals(brokerResponseNative.getNumDocsScanned(), 120_000L); + assertEquals(brokerResponseNative.getNumSegmentsProcessed(), 4L); + assertEquals(brokerResponseNative.getNumSegmentsMatched(), 4L); + assertEquals(brokerResponseNative.getNumEntriesScannedPostFilter(), 360000L); + assertEquals(brokerResponseNative.getNumEntriesScannedInFilter(), 0L); + assertNotNull(brokerResponseNative.getProcessingExceptions()); + assertEquals(brokerResponseNative.getProcessingExceptions().size(), 0); + assertEquals(resultTable.getDataSchema(), new DataSchema(new String[]{"min(column9)", "sum(column6)", "column1"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.DOUBLE, DataSchema.ColumnDataType.DOUBLE, + DataSchema.ColumnDataType.INT})); + resultRows = resultTable.getRows(); + previousVal = Double.MAX_VALUE; + for (Object[] resultRow : resultRows) { + assertEquals(resultRow.length, 3); + assertTrue((double) resultRow[0] <= previousVal); + previousVal = (double) resultRow[0]; + } } private void disableForwardIndexForSomeColumns() @@ -1665,17 +1949,63 @@ private void disableForwardIndexForSomeColumns() _indexSegments = Arrays.asList(immutableSegment1, immutableSegment2); assertNull(immutableSegment1.getForwardIndex("column9")); - assertNull(immutableSegment1.getForwardIndex("column11")); assertNotNull(immutableSegment1.getInvertedIndex("column9")); - assertNotNull(immutableSegment1.getInvertedIndex("column11")); assertNotNull(immutableSegment1.getDictionary("column9")); + assertNull(immutableSegment1.getForwardIndex("column11")); + assertNotNull(immutableSegment1.getInvertedIndex("column11")); assertNotNull(immutableSegment1.getDictionary("column11")); assertNull(immutableSegment2.getForwardIndex("column9")); - assertNull(immutableSegment2.getForwardIndex("column11")); assertNotNull(immutableSegment2.getInvertedIndex("column9")); - assertNotNull(immutableSegment2.getInvertedIndex("column11")); assertNotNull(immutableSegment2.getDictionary("column9")); + assertNull(immutableSegment2.getForwardIndex("column11")); + assertNotNull(immutableSegment2.getInvertedIndex("column11")); + assertNotNull(immutableSegment2.getDictionary("column11")); + } + + private void reenableForwardIndexForSomeColumns() + throws Exception { + // Now re-enable forward index for column9 and column11 in the index loading config, while disabling inverted index + // and range index for column9. column11 already had inverted index enabled so leave it as is. + // Also re-enable forward index for column6 + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(); + indexLoadingConfig.setTableConfig(_tableConfig); + Set invertedIndexEnabledColumns = new HashSet<>(_invertedIndexColumns); + invertedIndexEnabledColumns.remove("column9"); + indexLoadingConfig.setInvertedIndexColumns(invertedIndexEnabledColumns); + Set forwardIndexDisabledColumns = new HashSet<>(_forwardIndexDisabledColumns); + forwardIndexDisabledColumns.remove("column6"); + indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + indexLoadingConfig.setRangeIndexColumns(new HashSet<>(Collections.singletonList("column6"))); + indexLoadingConfig.getNoDictionaryColumns().add("column9"); + indexLoadingConfig.setReadMode(ReadMode.heap); + + // Reload the segments to pick up the new configs + File indexDir = new File(INDEX_DIR, SEGMENT_NAME_1); + ImmutableSegment immutableSegment1 = reloadSegment(indexDir, indexLoadingConfig, SCHEMA); + indexDir = new File(INDEX_DIR, SEGMENT_NAME_2); + ImmutableSegment immutableSegment2 = reloadSegment(indexDir, indexLoadingConfig, SCHEMA); + _indexSegment = immutableSegment1; + _indexSegments = Arrays.asList(immutableSegment1, immutableSegment2); + + assertNotNull(immutableSegment1.getForwardIndex("column9")); + assertNull(immutableSegment1.getInvertedIndex("column9")); + assertNull(immutableSegment1.getDictionary("column9")); + assertNotNull(immutableSegment1.getForwardIndex("column11")); + assertNotNull(immutableSegment1.getInvertedIndex("column11")); + assertNotNull(immutableSegment1.getDictionary("column11")); + assertNotNull(immutableSegment1.getForwardIndex("column6")); + assertNotNull(immutableSegment1.getInvertedIndex("column6")); + assertNotNull(immutableSegment1.getDictionary("column6")); + + assertNotNull(immutableSegment2.getForwardIndex("column9")); + assertNull(immutableSegment2.getInvertedIndex("column9")); + assertNull(immutableSegment2.getDictionary("column9")); + assertNotNull(immutableSegment2.getForwardIndex("column11")); + assertNotNull(immutableSegment2.getInvertedIndex("column11")); assertNotNull(immutableSegment2.getDictionary("column11")); + assertNotNull(immutableSegment2.getForwardIndex("column6")); + assertNotNull(immutableSegment2.getInvertedIndex("column6")); + assertNotNull(immutableSegment2.getDictionary("column6")); } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/BaseIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/BaseIndexHandler.java new file mode 100644 index 000000000000..c2314e771c72 --- /dev/null +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/BaseIndexHandler.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.segment.index.loader; + +import com.google.common.base.Preconditions; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; +import org.apache.pinot.segment.spi.ColumnMetadata; +import org.apache.pinot.segment.spi.SegmentMetadata; +import org.apache.pinot.segment.spi.creator.IndexCreatorProvider; +import org.apache.pinot.segment.spi.store.ColumnIndexType; +import org.apache.pinot.segment.spi.store.SegmentDirectory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Base class for all of the {@link IndexHandler} classes. This class provides a mechanism to rebuild the forward + * index if the forward index does not exist and is required to rebuild the index of interest. It also handles cleaning + * up the forward index if temporarily built once all handlers have completed via overriding the + * postUpdateIndicesCleanup() method. For {@link IndexHandler} classes which do not utilize the forward index or do not + * need this behavior, the postUpdateIndicesCleanup() method can be overridden to be a no-op. + */ +public abstract class BaseIndexHandler implements IndexHandler { + private static final Logger LOGGER = LoggerFactory.getLogger(BaseIndexHandler.class); + + protected final SegmentMetadata _segmentMetadata; + protected final IndexLoadingConfig _indexLoadingConfig; + protected final Set _tmpForwardIndexColumns; + + public BaseIndexHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig) { + _segmentMetadata = segmentMetadata; + _indexLoadingConfig = indexLoadingConfig; + _tmpForwardIndexColumns = new HashSet<>(); + } + + @Override + public void postUpdateIndicesCleanup(SegmentDirectory.Writer segmentWriter) + throws Exception { + // Delete the forward index for columns which have it disabled. Perform this as a post-processing step after all + // IndexHandlers have updated their indexes as some of them need to temporarily create a forward index to + // generate other indexes off of. + for (String column : _tmpForwardIndexColumns) { + segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX); + } + } + + protected void createForwardIndexIfNeeded(SegmentDirectory.Writer segmentWriter, ColumnMetadata columnMetadata, + IndexCreatorProvider indexCreatorProvider, boolean isTemporaryForwardIndex) + throws IOException { + String columnName = columnMetadata.getColumnName(); + if (segmentWriter.hasIndexFor(columnName, ColumnIndexType.FORWARD_INDEX)) { + LOGGER.info("Forward index already exists for column: {}, skip trying to create it", columnName); + return; + } + + // If forward index is disabled it means that it has to be dictionary based and the inverted index must exist. + Preconditions.checkState(segmentWriter.hasIndexFor(columnName, ColumnIndexType.DICTIONARY), + String.format("Forward index disabled column %s must have a dictionary", columnName)); + Preconditions.checkState(segmentWriter.hasIndexFor(columnName, ColumnIndexType.INVERTED_INDEX), + String.format("Forward index disabled column %s must have an inverted index", columnName)); + + LOGGER.info("Rebuilding the forward index for column: {}, is temporary: {}", columnName, isTemporaryForwardIndex); + InvertedIndexAndDictionaryBasedForwardIndexCreator invertedIndexAndDictionaryBasedForwardIndexCreator = + new InvertedIndexAndDictionaryBasedForwardIndexCreator(columnName, _segmentMetadata, _indexLoadingConfig, + segmentWriter, indexCreatorProvider, isTemporaryForwardIndex); + invertedIndexAndDictionaryBasedForwardIndexCreator.regenerateForwardIndex(); + + // Validate that the forward index is created. + if (!segmentWriter.hasIndexFor(columnName, ColumnIndexType.FORWARD_INDEX)) { + throw new IOException(String.format("Forward index was not created for column: %s, is temporary: %s", columnName, + isTemporaryForwardIndex ? "true" : "false")); + } + + if (isTemporaryForwardIndex) { + _tmpForwardIndexColumns.add(columnName); + } + + LOGGER.info("Rebuilt the forward index for column: {}, is temporary: {}", columnName, isTemporaryForwardIndex); + } +} diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java index f9331b617421..53e8a8760d1b 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java @@ -22,9 +22,12 @@ import com.google.common.base.Preconditions; import java.io.File; import java.io.FileOutputStream; +import java.io.IOException; import java.math.BigDecimal; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import javax.annotation.Nullable; @@ -79,33 +82,33 @@ * 2. Enable dictionary * 3. Disable dictionary * 4. Disable forward index + * 5. Rebuild the forward index for a forwardIndexDisabled column * * TODO: Add support for the following: * 1. Segment versions < V3 - * 2. Enable forward index on a forward index disabled column */ -public class ForwardIndexHandler implements IndexHandler { +public class ForwardIndexHandler extends BaseIndexHandler { private static final Logger LOGGER = LoggerFactory.getLogger(ForwardIndexHandler.class); - private final SegmentMetadata _segmentMetadata; - private final IndexLoadingConfig _indexLoadingConfig; + // This should contain a list of all indexes that need to be rewritten if the dictionary is enabled or disabled + private static final List DICTIONARY_BASED_INDEXES_TO_REWRITE = + Arrays.asList(ColumnIndexType.RANGE_INDEX, ColumnIndexType.FST_INDEX, ColumnIndexType.INVERTED_INDEX); + private final Schema _schema; - private final Set _forwardIndexDisabledColumnsToCleanup; protected enum Operation { - // TODO: Add other operations like ADD_FORWARD_INDEX_FOR_DICT_COLUMN, ADD_FORWARD_INDEX_FOR_RAW_COLUMN DISABLE_FORWARD_INDEX_FOR_DICT_COLUMN, DISABLE_FORWARD_INDEX_FOR_RAW_COLUMN, + ENABLE_FORWARD_INDEX_FOR_DICT_COLUMN, + ENABLE_FORWARD_INDEX_FOR_RAW_COLUMN, ENABLE_DICTIONARY, DISABLE_DICTIONARY, CHANGE_RAW_INDEX_COMPRESSION_TYPE, } public ForwardIndexHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig, Schema schema) { - _segmentMetadata = segmentMetadata; - _indexLoadingConfig = indexLoadingConfig; + super(segmentMetadata, indexLoadingConfig); _schema = schema; - _forwardIndexDisabledColumnsToCleanup = new HashSet<>(); } @Override @@ -132,7 +135,7 @@ public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorPro // Deletion of the forward index will be handled outside the index handler to ensure that other index // handlers that need the forward index to construct their own indexes will have it available. // The existing forward index must be in dictionary format for this to be a no-op. - _forwardIndexDisabledColumnsToCleanup.add(column); + _tmpForwardIndexColumns.add(column); break; } case DISABLE_FORWARD_INDEX_FOR_RAW_COLUMN: { @@ -142,9 +145,29 @@ public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorPro // forward index here which is dictionary based and allow the post deletion step handle the actual deletion // of the forward index. createDictBasedForwardIndex(column, segmentWriter, indexCreatorProvider); - Preconditions.checkState(segmentWriter.hasIndexFor(column, ColumnIndexType.FORWARD_INDEX), - String.format("Temporary forward index was not created for column: %s", column)); - _forwardIndexDisabledColumnsToCleanup.add(column); + if (!segmentWriter.hasIndexFor(column, ColumnIndexType.FORWARD_INDEX)) { + throw new IOException(String.format("Temporary forward index was not created for column: %s", column)); + } + _tmpForwardIndexColumns.add(column); + break; + } + case ENABLE_FORWARD_INDEX_FOR_DICT_COLUMN: { + createForwardIndexIfNeeded(segmentWriter, _segmentMetadata.getColumnMetadataFor(column), indexCreatorProvider, + false); + if (!segmentWriter.hasIndexFor(column, ColumnIndexType.DICTIONARY)) { + throw new IOException( + String.format("Dictionary should still exist after rebuilding forward index for dictionary column: %s", + column)); + } + break; + } + case ENABLE_FORWARD_INDEX_FOR_RAW_COLUMN: { + createForwardIndexIfNeeded(segmentWriter, _segmentMetadata.getColumnMetadataFor(column), indexCreatorProvider, + false); + if (segmentWriter.hasIndexFor(column, ColumnIndexType.DICTIONARY)) { + throw new IOException( + String.format("Dictionary should not exist after rebuilding forward index for raw column: %s", column)); + } break; } case ENABLE_DICTIONARY: { @@ -165,17 +188,6 @@ public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorPro } } - @Override - public void postUpdateIndicesCleanup(SegmentDirectory.Writer segmentWriter) - throws Exception { - // Delete the forward index for columns which have it disabled. Perform this as a post-processing step after all - // IndexHandlers have updated their indexes as some of them need to temporarily create a forward index to - // generate other indexes off of. - for (String column : _forwardIndexDisabledColumnsToCleanup) { - segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX); - } - } - @VisibleForTesting Map computeOperation(SegmentDirectory.Reader segmentReader) throws Exception { @@ -232,14 +244,27 @@ Map computeOperation(SegmentDirectory.Reader segmentReader) } else { columnOperationMap.put(column, Operation.DISABLE_FORWARD_INDEX_FOR_RAW_COLUMN); } - } else if (existingForwardIndexDisabledColumns.contains(column) && !newForwardIndexDisabledColumns.contains( - column)) { - // TODO: Add support: existing column has its forward index disabled. New column config enables the forward - // index - throw new UnsupportedOperationException(String.format("Recreating forward index for column: %s is not yet " - + "supported. Please backfill or refresh the data for now.", column)); - } else if (existingForwardIndexDisabledColumns.contains(column) && newForwardIndexDisabledColumns.contains( - column)) { + } else if (existingForwardIndexDisabledColumns.contains(column) + && !newForwardIndexDisabledColumns.contains(column)) { + // Existing column does not have a forward index. New column config enables the forward index + ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(column); + if (columnMetadata != null && columnMetadata.isSorted()) { + // Check if the column is sorted. If sorted, disabling forward index should be a no-op and forward index + // should already exist. Do not return an operation for this column related to enabling forward index. + LOGGER.warn("Trying to enable the forward index for a sorted column {}, ignoring", column); + continue; + } + + if (newNoDictColumns.contains(column)) { + Preconditions.checkState(!_indexLoadingConfig.getInvertedIndexColumns().contains(column), + String.format("Must disable inverted index to enable the forward index as noDictionary for column: %s", + column)); + columnOperationMap.put(column, Operation.ENABLE_FORWARD_INDEX_FOR_RAW_COLUMN); + } else { + columnOperationMap.put(column, Operation.ENABLE_FORWARD_INDEX_FOR_DICT_COLUMN); + } + } else if (existingForwardIndexDisabledColumns.contains(column) + && newForwardIndexDisabledColumns.contains(column)) { // Forward index is disabled for the existing column and should remain disabled based on the latest config Preconditions.checkState(existingDictColumns.contains(column) && !newNoDictColumns.contains(column), String.format("Not allowed to disable the dictionary for a column: %s without forward index", column)); @@ -317,11 +342,7 @@ private boolean shouldChangeCompressionType(String column, SegmentDirectory.Read // Note that default compression type (PASS_THROUGH for metric and LZ4 for dimension) is not considered if the // compressionType is not explicitly provided in tableConfig. This is to avoid incorrectly rewriting all the // forward indexes during segmentReload when the default compressionType changes. - if (newCompressionType == null || existingCompressionType == newCompressionType) { - return false; - } - - return true; + return newCompressionType != null && existingCompressionType != newCompressionType; } } @@ -803,17 +824,15 @@ private void writeDictEnabledForwardIndex(String column, ColumnMetadata existing } } - private void removeDictRelatedIndexes(String column, SegmentDirectory.Writer segmentWriter) { + static void removeDictRelatedIndexes(String column, SegmentDirectory.Writer segmentWriter) { // TODO: Move this logic as a static function in each index creator. // Remove all dictionary related indexes. They will be recreated if necessary by the respective handlers. Note that // the remove index call will be a no-op if the index doesn't exist. - segmentWriter.removeIndex(column, ColumnIndexType.RANGE_INDEX); - segmentWriter.removeIndex(column, ColumnIndexType.FST_INDEX); - segmentWriter.removeIndex(column, ColumnIndexType.INVERTED_INDEX); + DICTIONARY_BASED_INDEXES_TO_REWRITE.forEach((index) -> segmentWriter.removeIndex(column, index)); } - private void updateMetadataProperties(File indexDir, Map metadataProperties) + static void updateMetadataProperties(File indexDir, Map metadataProperties) throws Exception { File v3Dir = SegmentDirectoryPaths.segmentDirectoryFor(indexDir, SegmentVersion.v3); File metadataFile = new File(v3Dir, V1Constants.MetadataKeys.METADATA_FILE_NAME); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandler.java index 720e692cd9d1..2d65fc5b8499 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandler.java @@ -44,7 +44,6 @@ boolean needUpdateIndices(SegmentDirectory.Reader segmentReader) * Performs any cleanup actions required after the indexes have been updated. * Should be called only after all IndexHandlers have run. */ - default void postUpdateIndicesCleanup(SegmentDirectory.Writer segmentWriter) - throws Exception { - } + void postUpdateIndicesCleanup(SegmentDirectory.Writer segmentWriter) + throws Exception; } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandlerFactory.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandlerFactory.java index b8d972e46970..11f860a26a96 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandlerFactory.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandlerFactory.java @@ -45,6 +45,10 @@ public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorPro public boolean needUpdateIndices(SegmentDirectory.Reader segmentReader) { return false; } + + @Override + public void postUpdateIndicesCleanup(SegmentDirectory.Writer segmentWriter) { + } }; public static IndexHandler getIndexHandler(ColumnIndexType type, SegmentMetadataImpl segmentMetadata, diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java new file mode 100644 index 000000000000..534f3bcb33ec --- /dev/null +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java @@ -0,0 +1,615 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.segment.index.loader; + +import com.google.common.base.Preconditions; +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.io.FileUtils; +import org.apache.pinot.segment.local.segment.creator.impl.SegmentColumnarIndexCreator; +import org.apache.pinot.segment.local.segment.index.readers.BitmapInvertedIndexReader; +import org.apache.pinot.segment.spi.ColumnMetadata; +import org.apache.pinot.segment.spi.SegmentMetadata; +import org.apache.pinot.segment.spi.V1Constants; +import org.apache.pinot.segment.spi.compression.ChunkCompressionType; +import org.apache.pinot.segment.spi.creator.IndexCreationContext; +import org.apache.pinot.segment.spi.creator.IndexCreatorProvider; +import org.apache.pinot.segment.spi.index.creator.ForwardIndexCreator; +import org.apache.pinot.segment.spi.index.reader.Dictionary; +import org.apache.pinot.segment.spi.memory.PinotDataBuffer; +import org.apache.pinot.segment.spi.store.ColumnIndexType; +import org.apache.pinot.segment.spi.store.SegmentDirectory; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.utils.BigDecimalUtils; +import org.apache.pinot.spi.utils.ByteArray; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Column.DICTIONARY_ELEMENT_SIZE; +import static org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Column.HAS_DICTIONARY; +import static org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Column.MAX_MULTI_VALUE_ELEMENTS; +import static org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Column.TOTAL_NUMBER_OF_ENTRIES; +import static org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Column.getKeyFor; + + +/** + * Helper classed used by the {@link SegmentPreProcessor} to generate the forward index from inverted index and + * dictionary when the forward index is enabled for columns where it was previously disabled. This is also invoked by + * the {@link IndexHandler} code in scenarios where the forward index needs to be temporarily created to generate other + * indexes for the given column. In such cases the forward index will be cleaned up after the {@link IndexHandler} code + * completes. + * + * For multi-value columns the following invariants cannot be maintained: + * - Ordering of elements within a given multi-value row. This will always be a limitation. + * + * TODO: Currently for multi-value columns generating the forward index can lead to a data loss as frequency information + * is not available for repeats within a given row. This needs to be addressed by tracking the frequency data + * as part of an on-disk structure when forward index is disabled for a column. + */ +public class InvertedIndexAndDictionaryBasedForwardIndexCreator implements AutoCloseable { + private static final Logger LOGGER = + LoggerFactory.getLogger(InvertedIndexAndDictionaryBasedForwardIndexCreator.class); + + // Use MMapBuffer if the value buffer size is larger than 2G + private static final int NUM_VALUES_THRESHOLD_FOR_MMAP_BUFFER = 500_000_000; + + private static final String FORWARD_INDEX_VALUE_BUFFER_SUFFIX = ".fwd.idx.val.buf"; + private static final String FORWARD_INDEX_LENGTH_BUFFER_SUFFIX = ".fwd.idx.len.buf"; + private static final String FORWARD_INDEX_MAX_SIZE_BUFFER_SUFFIX = ".fwd.idx.maxsize.buf"; + + private final String _columnName; + private final SegmentMetadata _segmentMetadata; + private final IndexLoadingConfig _indexLoadingConfig; + private final SegmentDirectory.Writer _segmentWriter; + private final IndexCreatorProvider _indexCreatorProvider; + private final boolean _isTemporaryForwardIndex; + + // Metadata + private final ColumnMetadata _columnMetadata; + private final boolean _singleValue; + private final int _cardinality; + private final int _numDocs; + private final int _maxNumberOfMultiValues; + private final FieldSpec.DataType _storedType; + private final int _totalNumberOfEntries; + private final boolean _dictionaryEnabled; + private final ChunkCompressionType _chunkCompressionType; + private final boolean _useMMapBuffer; + + // Files and temporary buffers + private final File _forwardIndexFile; + private final File _forwardIndexValueBufferFile; + private final File _forwardIndexLengthBufferFile; + private final File _forwardIndexMaxSizeBufferFile; + + // Forward index buffers (to store the dictId at the correct docId) + private PinotDataBuffer _forwardIndexValueBuffer; + // For multi-valued column only because each docId can have multiple dictIds + private PinotDataBuffer _forwardIndexLengthBuffer; + private int _nextValueId; + // For multi-valued column only to track max row size + private PinotDataBuffer _forwardIndexMaxSizeBuffer; + + public InvertedIndexAndDictionaryBasedForwardIndexCreator(String columnName, SegmentMetadata segmentMetadata, + IndexLoadingConfig indexLoadingConfig, SegmentDirectory.Writer segmentWriter, + IndexCreatorProvider indexCreatorProvider, boolean isTemporaryForwardIndex) + throws IOException { + _columnName = columnName; + _segmentMetadata = segmentMetadata; + _indexLoadingConfig = indexLoadingConfig; + _segmentWriter = segmentWriter; + _indexCreatorProvider = indexCreatorProvider; + _isTemporaryForwardIndex = isTemporaryForwardIndex; + + _columnMetadata = segmentMetadata.getColumnMetadataFor(columnName); + _singleValue = _columnMetadata.isSingleValue(); + _cardinality = _columnMetadata.getCardinality(); + _numDocs = _columnMetadata.getTotalDocs(); + _totalNumberOfEntries = _columnMetadata.getTotalNumberOfEntries(); + _maxNumberOfMultiValues = _columnMetadata.getMaxNumberOfMultiValues(); + _storedType = _columnMetadata.getFieldSpec().getDataType().getStoredType(); + _dictionaryEnabled = !_indexLoadingConfig.getNoDictionaryColumns().contains(columnName); + _chunkCompressionType = getColumnCompressionType(); + int numValues = _singleValue ? _numDocs : _totalNumberOfEntries; + _useMMapBuffer = numValues > NUM_VALUES_THRESHOLD_FOR_MMAP_BUFFER; + + // Sorted columns should never need recreation of the forward index as the forwardIndexDisabled flag is treated as + // a no-op for sorted columns + File indexDir = segmentMetadata.getIndexDir(); + String fileExtension; + if (_dictionaryEnabled) { + fileExtension = _singleValue ? V1Constants.Indexes.UNSORTED_SV_FORWARD_INDEX_FILE_EXTENSION + : V1Constants.Indexes.UNSORTED_MV_FORWARD_INDEX_FILE_EXTENSION; + } else { + fileExtension = _singleValue ? V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION + : V1Constants.Indexes.RAW_MV_FORWARD_INDEX_FILE_EXTENSION; + } + _forwardIndexFile = new File(indexDir, columnName + fileExtension); + _forwardIndexValueBufferFile = new File(indexDir, columnName + FORWARD_INDEX_VALUE_BUFFER_SUFFIX); + _forwardIndexLengthBufferFile = new File(indexDir, columnName + FORWARD_INDEX_LENGTH_BUFFER_SUFFIX); + _forwardIndexMaxSizeBufferFile = new File(indexDir, columnName + FORWARD_INDEX_MAX_SIZE_BUFFER_SUFFIX); + + // Create the temporary buffers needed + try { + _forwardIndexValueBuffer = createTempBuffer((long) numValues * Integer.BYTES, _forwardIndexValueBufferFile); + if (!_singleValue) { + _forwardIndexLengthBuffer = createTempBuffer((long) _numDocs * Integer.BYTES, _forwardIndexLengthBufferFile); + for (int i = 0; i < _numDocs; i++) { + // We need to clear the forward index length buffer because we rely on the initial value of 0, and keep + // updating the value instead of directly setting the value + _forwardIndexLengthBuffer.putInt((long) i * Integer.BYTES, 0); + } + _forwardIndexMaxSizeBuffer = createTempBuffer((long) _numDocs * Integer.BYTES, _forwardIndexMaxSizeBufferFile); + for (int i = 0; i < _numDocs; i++) { + // We need to clear the forward index max size buffer because we rely on the initial value of 0, and keep + // updating the value instead of directly setting the value + _forwardIndexMaxSizeBuffer.putInt((long) i * Integer.BYTES, 0); + } + } + } catch (Exception e) { + destroyBuffer(_forwardIndexValueBuffer, _forwardIndexValueBufferFile); + destroyBuffer(_forwardIndexLengthBuffer, _forwardIndexLengthBufferFile); + destroyBuffer(_forwardIndexMaxSizeBuffer, _forwardIndexMaxSizeBufferFile); + throw new IOException("Couldn't create temp buffers to construct forward index", e); + } + } + + private ChunkCompressionType getColumnCompressionType() { + if (_dictionaryEnabled) { + return null; + } + + Map compressionConfigs = _indexLoadingConfig.getCompressionConfigs(); + Map noDictionaryConfig = _indexLoadingConfig.getNoDictionaryConfig(); + ChunkCompressionType compressionType; + if (compressionConfigs.containsKey(_columnName)) { + compressionType = compressionConfigs.get(_columnName); + } else if (noDictionaryConfig.containsKey(_columnName)) { + compressionType = ChunkCompressionType.valueOf(_indexLoadingConfig.getNoDictionaryConfig().get(_columnName)); + } else { + compressionType = SegmentColumnarIndexCreator.getDefaultCompressionType(_columnMetadata.getFieldType()); + } + return compressionType; + } + + public void regenerateForwardIndex() + throws IOException { + File indexDir = _segmentMetadata.getIndexDir(); + String segmentName = _segmentMetadata.getName(); + File inProgress = new File(indexDir, _columnName + ".fwd.inprogress"); + + if (!inProgress.exists()) { + // Marker file does not exist, which means last run ended normally. + // Create a marker file. + FileUtils.touch(inProgress); + } else { + // Marker file exists, which means last run was interrupted. + // Remove forward index if exists. + FileUtils.deleteQuietly(_forwardIndexFile); + } + + // Create new forward index for the column. + LOGGER.info("Creating a new forward index for segment: {}, column: {}, isTemporary: {}", segmentName, _columnName, + _isTemporaryForwardIndex); + + Map metadataProperties; + if (_singleValue) { + metadataProperties = createForwardIndexForSVColumn(); + } else { + metadataProperties = createForwardIndexForMVColumn(); + } + + LoaderUtils.writeIndexToV3Format(_segmentWriter, _columnName, _forwardIndexFile, ColumnIndexType.FORWARD_INDEX); + + if (!_isTemporaryForwardIndex) { + // Only update the metadata and cleanup other indexes if the forward index to be created is permanent. If the + // forward index is temporary, it is meant to be used only for construction of other indexes and will be deleted + // once all the IndexHandlers have completed. + try { + LOGGER.info("Created forward index from inverted index and dictionary. Updating metadata properties for " + + "segment: {}, column: {}, property list: {}", segmentName, _columnName, metadataProperties); + ForwardIndexHandler.updateMetadataProperties(_segmentMetadata.getIndexDir(), metadataProperties); + } catch (Exception e) { + throw new IOException( + String.format("Failed to update metadata properties for segment: %s, column: %s", segmentName, _columnName), + e); + } + + if (!_dictionaryEnabled) { + LOGGER.info("Clean up indexes no longer needed or which need to be rewritten for segment: {}, column: {}", + segmentName, _columnName); + // Delete the dictionary + _segmentWriter.removeIndex(_columnName, ColumnIndexType.DICTIONARY); + + // We remove indexes that have to be rewritten when a dictEnabled is toggled. Note that the respective index + // handler will take care of recreating the index. + ForwardIndexHandler.removeDictRelatedIndexes(_columnName, _segmentWriter); + } + } + + // Delete the marker file. + FileUtils.deleteQuietly(inProgress); + + LOGGER.info("Created a new forward index for segment: {}, column: {}, isTemporary: {}", segmentName, _columnName, + _isTemporaryForwardIndex); + } + + private Map createForwardIndexForSVColumn() + throws IOException { + try (BitmapInvertedIndexReader invertedIndexReader = + (BitmapInvertedIndexReader) LoaderUtils.getInvertedIndexReader(_segmentWriter, _columnMetadata); + Dictionary dictionary = LoaderUtils.getDictionary(_segmentWriter, _columnMetadata)) { + boolean isFixedWidth = _columnMetadata.getFieldSpec().getDataType().isFixedWidth(); + int lengthOfLongestEntry = isFixedWidth ? -1 : 0; + // Construct the forward index in the values buffer + for (int dictId = 0; dictId < _cardinality; dictId++) { + ImmutableRoaringBitmap docIdsBitmap = invertedIndexReader.getDocIds(dictId); + int finalDictId = dictId; + docIdsBitmap.stream().forEach(docId -> putInt(_forwardIndexValueBuffer, docId, finalDictId)); + if (!isFixedWidth) { + lengthOfLongestEntry = trackLengthOfLongestEntry(dictionary, lengthOfLongestEntry, dictId); + } + } + + IndexCreationContext.Forward context = + IndexCreationContext.builder().withIndexDir(_segmentMetadata.getIndexDir()) + .withColumnMetadata(_columnMetadata).withforwardIndexDisabled(false).withDictionary(_dictionaryEnabled) + .withLengthOfLongestEntry(lengthOfLongestEntry).build() + .forForwardIndex(_chunkCompressionType, _indexLoadingConfig.getColumnProperties()); + + writeToForwardIndex(dictionary, context); + + // Setup and return the metadata properties to update + Map metadataProperties = new HashMap<>(); + metadataProperties.put(getKeyFor(_columnName, HAS_DICTIONARY), String.valueOf(_dictionaryEnabled)); + metadataProperties.put(getKeyFor(_columnName, DICTIONARY_ELEMENT_SIZE), + String.valueOf(_dictionaryEnabled ? _columnMetadata.getColumnMaxLength() : 0)); + return metadataProperties; + } + } + + private Map createForwardIndexForMVColumn() + throws IOException { + try (BitmapInvertedIndexReader invertedIndexReader = + (BitmapInvertedIndexReader) LoaderUtils.getInvertedIndexReader(_segmentWriter, _columnMetadata); + Dictionary dictionary = LoaderUtils.getDictionary(_segmentWriter, _columnMetadata)) { + // Construct the forward index length buffer and create the inverted index values and length buffers + int[] maxNumberOfMultiValues = new int[]{0}; + final boolean isFixedWidth = _columnMetadata.getFieldSpec().getDataType().isFixedWidth(); + int lengthOfLongestEntry = isFixedWidth ? -1 : 0; + int[] maxRowLengthInBytes = isFixedWidth ? new int[]{-1} : new int[]{0}; + for (int dictId = 0; dictId < _cardinality; dictId++) { + ImmutableRoaringBitmap docIdsBitmap = invertedIndexReader.getDocIds(dictId); + docIdsBitmap.stream().forEach(docId -> { + int newRowLength = getInt(_forwardIndexLengthBuffer, docId) + 1; + maxNumberOfMultiValues[0] = Math.max(maxNumberOfMultiValues[0], newRowLength); + putInt(_forwardIndexLengthBuffer, docId, newRowLength); + _nextValueId++; + }); + + if (!isFixedWidth) { + lengthOfLongestEntry = trackLengthOfLongestEntry(dictionary, lengthOfLongestEntry, dictId); + } + } + + if (_nextValueId < _totalNumberOfEntries) { + LOGGER.warn("Total number of entries: {} less than expected total number of entries: {}, multi-value column: " + + "{} duplicates detected, duplicate entries within each row lost! Expected maxNumberOfMultiValues: " + + "{}, actual maxNumberOfMultiValues: {}", _nextValueId, _totalNumberOfEntries, _columnName, + _maxNumberOfMultiValues, maxNumberOfMultiValues[0]); + } else { + Preconditions.checkState(_nextValueId == _totalNumberOfEntries, + String.format("Number of entries found %d cannot be higher than expected total number of entries: %d for " + + "column: %s", _nextValueId, _totalNumberOfEntries, _columnName)); + Preconditions.checkState(maxNumberOfMultiValues[0] == _maxNumberOfMultiValues, + String.format("Actual maxNumberOfMultiValues: %d doesn't match expected maxNumberOfMultiValues: %d for " + + "column %s", maxNumberOfMultiValues[0], _maxNumberOfMultiValues, _columnName)); + } + + // Calculate value index for each docId in the forward index value buffer + // Re-use forward index length buffer to store the value index for each docId, where value index is the index in + // the forward index value buffer where we should put next dictId for the docId + int forwardValueIndex = 0; + for (int docId = 0; docId < _numDocs; docId++) { + int length = getInt(_forwardIndexLengthBuffer, docId); + putInt(_forwardIndexLengthBuffer, docId, forwardValueIndex); + forwardValueIndex += length; + } + + // Construct the forward index values buffer from the inverted index using the length buffer for index tracking + for (int dictId = 0; dictId < _cardinality; dictId++) { + ImmutableRoaringBitmap docIdsBitmap = invertedIndexReader.getDocIds(dictId); + int finalDictId = dictId; + docIdsBitmap.stream().forEach(docId -> { + int index = getInt(_forwardIndexLengthBuffer, docId); + putInt(_forwardIndexValueBuffer, index, finalDictId); + putInt(_forwardIndexLengthBuffer, docId, index + 1); + if (!isFixedWidth) { + trackMaxRowLengthInBytes(dictionary, maxRowLengthInBytes, docId, finalDictId); + } + }); + } + + IndexCreationContext.Forward context = + IndexCreationContext.builder().withIndexDir(_segmentMetadata.getIndexDir()) + .withColumnMetadata(_columnMetadata).withforwardIndexDisabled(false).withDictionary(_dictionaryEnabled) + .withTotalNumberOfEntries(_nextValueId).withMaxNumberOfMultiValueElements(maxNumberOfMultiValues[0]) + .withMaxRowLengthInBytes(maxRowLengthInBytes[0]).withLengthOfLongestEntry(lengthOfLongestEntry) + .build().forForwardIndex(_chunkCompressionType, _indexLoadingConfig.getColumnProperties()); + + writeToForwardIndex(dictionary, context); + + // Setup and return the metadata properties to update + Map metadataProperties = new HashMap<>(); + metadataProperties.put(getKeyFor(_columnName, HAS_DICTIONARY), String.valueOf(_dictionaryEnabled)); + metadataProperties.put(getKeyFor(_columnName, DICTIONARY_ELEMENT_SIZE), + String.valueOf(_dictionaryEnabled ? _columnMetadata.getColumnMaxLength() : 0)); + metadataProperties.put(getKeyFor(_columnName, MAX_MULTI_VALUE_ELEMENTS), + String.valueOf(maxNumberOfMultiValues[0])); + metadataProperties.put(getKeyFor(_columnName, TOTAL_NUMBER_OF_ENTRIES), + String.valueOf(_nextValueId)); + return metadataProperties; + } + } + + private int trackLengthOfLongestEntry(Dictionary dictionary, int lengthOfLongestEntry, int dictId) { + int updatedLengthOfLongestEntry; + switch (_storedType) { + case STRING: + updatedLengthOfLongestEntry = Math.max(dictionary.getStringValue(dictId).getBytes(UTF_8).length, + lengthOfLongestEntry); + break; + case BYTES: + ByteArray value = new ByteArray(dictionary.getBytesValue(dictId)); + updatedLengthOfLongestEntry = Math.max(value.length(), lengthOfLongestEntry); + break; + case BIG_DECIMAL: + updatedLengthOfLongestEntry = Math.max( + BigDecimalUtils.byteSize(dictionary.getBigDecimalValue(dictId)), lengthOfLongestEntry); + break; + default: + throw new IllegalStateException("Trying to calculate lengthOfLongestEntry for invalid stored type: " + + _storedType); + } + return updatedLengthOfLongestEntry; + } + + private void trackMaxRowLengthInBytes(Dictionary dictionary, int[] maxRowLengthInBytes, int docId, int dictId) { + int curSizeOfRow = getInt(_forwardIndexMaxSizeBuffer, docId); + switch (_storedType) { + case STRING: + int newSizeOfEntry = dictionary.getStringValue(dictId).length() + curSizeOfRow; + putInt(_forwardIndexMaxSizeBuffer, docId, newSizeOfEntry); + maxRowLengthInBytes[0] = Math.max(newSizeOfEntry, maxRowLengthInBytes[0]); + break; + case BYTES: + ByteArray value = new ByteArray(dictionary.getBytesValue(dictId)); + newSizeOfEntry = value.length() + curSizeOfRow; + putInt(_forwardIndexMaxSizeBuffer, docId, newSizeOfEntry); + maxRowLengthInBytes[0] = Math.max(newSizeOfEntry, maxRowLengthInBytes[0]); + break; + case BIG_DECIMAL: + newSizeOfEntry = BigDecimalUtils.byteSize(dictionary.getBigDecimalValue(dictId)) + curSizeOfRow; + putInt(_forwardIndexMaxSizeBuffer, docId, newSizeOfEntry); + maxRowLengthInBytes[0] = Math.max(newSizeOfEntry, maxRowLengthInBytes[0]); + break; + default: + throw new IllegalStateException("Trying to calculate maxRowLengthInBytes for invalid stored type: " + + _storedType); + } + } + + private void writeToForwardIndex(Dictionary dictionary, IndexCreationContext.Forward context) + throws IOException { + try (ForwardIndexCreator creator = _indexCreatorProvider.newForwardIndexCreator(context)) { + if (_dictionaryEnabled) { + if (_singleValue) { + for (int docId = 0; docId < _numDocs; docId++) { + creator.putDictId(getInt(_forwardIndexValueBuffer, docId)); + } + } else { + int startIdx = 0; + for (int docId = 0; docId < _numDocs; docId++) { + int endIdx = getInt(_forwardIndexLengthBuffer, docId); + int[] values = new int[endIdx - startIdx]; + int valuesIdx = 0; + for (int i = startIdx; i < endIdx; i++) { + values[valuesIdx++] = getInt(_forwardIndexValueBuffer, i); + } + creator.putDictIdMV(values); + startIdx = endIdx; + } + } + } else { + switch (creator.getValueType()) { + case INT: + if (_singleValue) { + for (int docId = 0; docId < _numDocs; docId++) { + creator.putInt(dictionary.getIntValue(getInt(_forwardIndexValueBuffer, docId))); + } + } else { + int startIdx = 0; + for (int docId = 0; docId < _numDocs; docId++) { + int endIdx = getInt(_forwardIndexLengthBuffer, docId); + int[] values = new int[endIdx - startIdx]; + int valuesIdx = 0; + for (int i = startIdx; i < endIdx; i++) { + values[valuesIdx++] = dictionary.getIntValue(getInt(_forwardIndexValueBuffer, i)); + } + creator.putIntMV(values); + startIdx = endIdx; + } + } + break; + case LONG: + if (_singleValue) { + for (int docId = 0; docId < _numDocs; docId++) { + creator.putLong(dictionary.getLongValue(getInt(_forwardIndexValueBuffer, docId))); + } + } else { + int startIdx = 0; + for (int docId = 0; docId < _numDocs; docId++) { + int endIdx = getInt(_forwardIndexLengthBuffer, docId); + long[] values = new long[endIdx - startIdx]; + int valuesIdx = 0; + for (int i = startIdx; i < endIdx; i++) { + values[valuesIdx++] = dictionary.getLongValue(getInt(_forwardIndexValueBuffer, i)); + } + creator.putLongMV(values); + startIdx = endIdx; + } + } + break; + case FLOAT: + if (_singleValue) { + for (int docId = 0; docId < _numDocs; docId++) { + creator.putFloat(dictionary.getFloatValue(getInt(_forwardIndexValueBuffer, docId))); + } + } else { + int startIdx = 0; + for (int docId = 0; docId < _numDocs; docId++) { + int endIdx = getInt(_forwardIndexLengthBuffer, docId); + float[] values = new float[endIdx - startIdx]; + int valuesIdx = 0; + for (int i = startIdx; i < endIdx; i++) { + values[valuesIdx++] = dictionary.getFloatValue(getInt(_forwardIndexValueBuffer, i)); + } + creator.putFloatMV(values); + startIdx = endIdx; + } + } + break; + case DOUBLE: + if (_singleValue) { + for (int docId = 0; docId < _numDocs; docId++) { + creator.putDouble(dictionary.getDoubleValue(getInt(_forwardIndexValueBuffer, docId))); + } + } else { + int startIdx = 0; + for (int docId = 0; docId < _numDocs; docId++) { + int endIdx = getInt(_forwardIndexLengthBuffer, docId); + double[] values = new double[endIdx - startIdx]; + int valuesIdx = 0; + for (int i = startIdx; i < endIdx; i++) { + values[valuesIdx++] = dictionary.getDoubleValue(getInt(_forwardIndexValueBuffer, i)); + } + creator.putDoubleMV(values); + startIdx = endIdx; + } + } + break; + case STRING: + if (_singleValue) { + for (int docId = 0; docId < _numDocs; docId++) { + creator.putString(dictionary.getStringValue(getInt(_forwardIndexValueBuffer, docId))); + } + } else { + int startIdx = 0; + for (int docId = 0; docId < _numDocs; docId++) { + int endIdx = getInt(_forwardIndexLengthBuffer, docId); + String[] values = new String[endIdx - startIdx]; + int valuesIdx = 0; + for (int i = startIdx; i < endIdx; i++) { + values[valuesIdx++] = dictionary.getStringValue(getInt(_forwardIndexValueBuffer, i)); + } + creator.putStringMV(values); + startIdx = endIdx; + } + } + break; + case BYTES: + if (_singleValue) { + for (int docId = 0; docId < _numDocs; docId++) { + creator.putBytes(dictionary.getBytesValue(getInt(_forwardIndexValueBuffer, docId))); + } + } else { + int startIdx = 0; + for (int docId = 0; docId < _numDocs; docId++) { + int endIdx = getInt(_forwardIndexLengthBuffer, docId); + byte[][] values = new byte[endIdx - startIdx][]; + int valuesIdx = 0; + for (int i = startIdx; i < endIdx; i++) { + values[valuesIdx++] = dictionary.getBytesValue(getInt(_forwardIndexValueBuffer, i)); + } + creator.putBytesMV(values); + startIdx = endIdx; + } + } + break; + case BIG_DECIMAL: + Preconditions.checkState(_singleValue, "BIG_DECIMAL type not supported for multi-value columns"); + for (int docId = 0; docId < _numDocs; docId++) { + creator.putBigDecimal(dictionary.getBigDecimalValue(getInt(_forwardIndexValueBuffer, docId))); + } + break; + default: + throw new IllegalStateException("Invalid type" + creator.getValueType() + " cannot create forward index"); + } + } + } catch (Exception e) { + throw new IOException(String.format( + "Cannot create the forward index from inverted index for column %s", _columnName), e); + } finally { + destroyBuffer(_forwardIndexValueBuffer, _forwardIndexValueBufferFile); + destroyBuffer(_forwardIndexLengthBuffer, _forwardIndexLengthBufferFile); + destroyBuffer(_forwardIndexMaxSizeBuffer, _forwardIndexMaxSizeBufferFile); + } + } + + private static void putInt(PinotDataBuffer buffer, long index, int value) { + buffer.putInt(index << 2, value); + } + + private static int getInt(PinotDataBuffer buffer, long index) { + return buffer.getInt(index << 2); + } + + private PinotDataBuffer createTempBuffer(long size, File mmapFile) + throws IOException { + if (_useMMapBuffer) { + return PinotDataBuffer.mapFile(mmapFile, false, 0, size, PinotDataBuffer.NATIVE_ORDER, + "InvertedIndexAndDictionaryBasedForwardIndexCreator: temp mmapped buffer for " + mmapFile.getName()); + } else { + return PinotDataBuffer.allocateDirect(size, PinotDataBuffer.NATIVE_ORDER, + "InvertedIndexAndDictionaryBasedForwardIndexCreator: temp direct buffer for " + mmapFile.getName()); + } + } + + private void destroyBuffer(PinotDataBuffer buffer, File mmapFile) + throws IOException { + if (buffer != null) { + buffer.close(); + } + if (mmapFile.exists()) { + FileUtils.forceDelete(mmapFile); + } + } + + @Override + public void close() + throws Exception { + destroyBuffer(_forwardIndexValueBuffer, _forwardIndexValueBufferFile); + destroyBuffer(_forwardIndexLengthBuffer, _forwardIndexLengthBufferFile); + destroyBuffer(_forwardIndexMaxSizeBuffer, _forwardIndexMaxSizeBufferFile); + } +} diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/LoaderUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/LoaderUtils.java index 136e4660b65e..22c0a243a243 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/LoaderUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/LoaderUtils.java @@ -30,6 +30,7 @@ import org.apache.pinot.segment.spi.ColumnMetadata; import org.apache.pinot.segment.spi.index.IndexingOverrides; import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader; +import org.apache.pinot.segment.spi.index.reader.InvertedIndexReader; import org.apache.pinot.segment.spi.memory.PinotDataBuffer; import org.apache.pinot.segment.spi.store.ColumnIndexType; import org.apache.pinot.segment.spi.store.SegmentDirectory; @@ -65,6 +66,17 @@ public static BaseImmutableDictionary getDictionary(SegmentDirectory.Reader segm return PhysicalColumnIndexContainer.loadDictionary(dataBuffer, columnMetadata, false); } + /** + * Returns the inverted index reader for the given column. + */ + public static InvertedIndexReader getInvertedIndexReader(SegmentDirectory.Reader segmentReader, + ColumnMetadata columnMetadata) + throws IOException { + PinotDataBuffer dataBuffer = + segmentReader.getIndexFor(columnMetadata.getColumnName(), ColumnIndexType.INVERTED_INDEX); + return IndexingOverrides.getIndexReaderProvider().newInvertedIndexReader(dataBuffer, columnMetadata); + } + /** * Write an index file to v3 format single index file and remove the old one. * diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessor.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessor.java index 80f5cb88f49b..074e89e70b9a 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessor.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessor.java @@ -120,14 +120,14 @@ public void process() } } - // Perform post-cleanup operations on the index handlers + // Create/modify/remove star-trees if required. + processStarTrees(indexDir); + + // Perform post-cleanup operations on the index handlers. This should be called after processing the startrees for (IndexHandler handler : indexHandlers) { handler.postUpdateIndicesCleanup(segmentWriter); } - // Create/modify/remove star-trees if required. - processStarTrees(indexDir); - // Add min/max value to column metadata according to the prune mode. // For star-tree index, because it can only increase the range, so min/max value can still be used in pruner. ColumnMinMaxValueGeneratorMode columnMinMaxValueGeneratorMode = diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/bloomfilter/BloomFilterHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/bloomfilter/BloomFilterHandler.java index ddb53e04b3db..836e593c8ebe 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/bloomfilter/BloomFilterHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/bloomfilter/BloomFilterHandler.java @@ -24,7 +24,7 @@ import java.util.Map; import java.util.Set; import org.apache.commons.io.FileUtils; -import org.apache.pinot.segment.local.segment.index.loader.IndexHandler; +import org.apache.pinot.segment.local.segment.index.loader.BaseIndexHandler; import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; import org.apache.pinot.segment.local.segment.index.loader.LoaderUtils; import org.apache.pinot.segment.local.segment.index.readers.BaseImmutableDictionary; @@ -55,14 +55,13 @@ import org.slf4j.LoggerFactory; -public class BloomFilterHandler implements IndexHandler { +public class BloomFilterHandler extends BaseIndexHandler { private static final Logger LOGGER = LoggerFactory.getLogger(BloomFilterHandler.class); - private final SegmentMetadata _segmentMetadata; private final Map _bloomFilterConfigs; public BloomFilterHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig) { - _segmentMetadata = segmentMetadata; + super(segmentMetadata, indexLoadingConfig); _bloomFilterConfigs = indexLoadingConfig.getBloomFilterConfigs(); } @@ -106,7 +105,7 @@ public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorPro for (String column : columnsToAddBF) { ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(column); if (shouldCreateBloomFilter(columnMetadata)) { - createBloomFilterForColumn(segmentWriter, columnMetadata, indexCreatorProvider); + createBloomFilterForColumn(segmentWriter, columnMetadata, indexCreatorProvider, indexCreatorProvider); } } } @@ -246,7 +245,7 @@ private void createAndSealBloomFilterForNonDictionaryColumn(BloomFilterCreatorPr } private void createBloomFilterForColumn(SegmentDirectory.Writer segmentWriter, ColumnMetadata columnMetadata, - BloomFilterCreatorProvider indexCreatorProvider) + BloomFilterCreatorProvider bloomFilterCreatorProvider, IndexCreatorProvider indexCreatorProvider) throws Exception { File indexDir = _segmentMetadata.getIndexDir(); String segmentName = _segmentMetadata.getName(); @@ -264,16 +263,21 @@ private void createBloomFilterForColumn(SegmentDirectory.Writer segmentWriter, C FileUtils.deleteQuietly(bloomFilterFile); } + if (!columnMetadata.hasDictionary()) { + // Create a temporary forward index if it is disabled and does not exist + createForwardIndexIfNeeded(segmentWriter, columnMetadata, indexCreatorProvider, true); + } + // Create new bloom filter for the column. BloomFilterConfig bloomFilterConfig = _bloomFilterConfigs.get(columnName); LOGGER.info("Creating new bloom filter for segment: {}, column: {} with config: {}", segmentName, columnName, bloomFilterConfig); if (columnMetadata.hasDictionary()) { - createAndSealBloomFilterForDictionaryColumn(indexCreatorProvider, indexDir, columnMetadata, bloomFilterConfig, - segmentWriter); + createAndSealBloomFilterForDictionaryColumn(bloomFilterCreatorProvider, indexDir, columnMetadata, + bloomFilterConfig, segmentWriter); } else { - createAndSealBloomFilterForNonDictionaryColumn(indexCreatorProvider, indexDir, columnMetadata, bloomFilterConfig, - segmentWriter); + createAndSealBloomFilterForNonDictionaryColumn(bloomFilterCreatorProvider, indexDir, columnMetadata, + bloomFilterConfig, segmentWriter); } // For v3, write the generated bloom filter file into the single file and remove it. diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java index 1c9646b97b94..2f507ef06fd7 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java @@ -43,6 +43,7 @@ import org.apache.pinot.segment.local.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator; import org.apache.pinot.segment.local.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator; import org.apache.pinot.segment.local.segment.creator.impl.inv.BitSlicedRangeIndexCreator; +import org.apache.pinot.segment.local.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator; import org.apache.pinot.segment.local.segment.creator.impl.nullvalue.NullValueVectorCreator; import org.apache.pinot.segment.local.segment.creator.impl.stats.BytesColumnPredIndexStatsCollector; import org.apache.pinot.segment.local.segment.creator.impl.stats.DoubleColumnPreIndexStatsCollector; @@ -57,6 +58,7 @@ import org.apache.pinot.segment.spi.V1Constants; import org.apache.pinot.segment.spi.creator.ColumnIndexCreationInfo; import org.apache.pinot.segment.spi.creator.StatsCollectorConfig; +import org.apache.pinot.segment.spi.index.creator.DictionaryBasedInvertedIndexCreator; import org.apache.pinot.segment.spi.index.creator.ForwardIndexCreator; import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl; import org.apache.pinot.segment.spi.index.reader.Dictionary; @@ -226,21 +228,6 @@ Map computeDefaultColumnActionMap() { if (columnMetadata != null) { // Column exists in the segment, check if we need to update the value. - if (_segmentWriter != null && !columnMetadata.isAutoGenerated()) { - // TODO: Add support for recreating forward index for a column with forward index disabled. - boolean forwardIndexDisabled = !_segmentWriter.hasIndexFor(column, ColumnIndexType.FORWARD_INDEX); - if (forwardIndexDisabled && !_indexLoadingConfig.getForwardIndexDisabledColumns() - .contains(column)) { - String failureMessage = - "Forward index disabled in segment: " + forwardIndexDisabled + " for column: " + column - + " does not match forward index disabled flag: " - + _indexLoadingConfig.getForwardIndexDisabledColumns().contains(column) + " in the TableConfig, " - + "setting this flag on new columns or updating this flag to disable it is not supported at the " - + "moment. Please backfill or refresh segments to use this feature."; - throw new RuntimeException(failureMessage); - } - } - // Only check for auto-generated column. if (!columnMetadata.isAutoGenerated()) { continue; @@ -430,8 +417,6 @@ protected void validateForwardIndexDisabledConfigsIfPresent(String column, boole if (!forwardIndexDisabled) { return; } - LOGGER.warn("Disabling forward index on a new column {} is currently not supported. Treating this as a no-op!", - column); FieldSpec fieldSpec = _schema.getFieldSpecFor(column); Preconditions.checkState(_indexLoadingConfig.getInvertedIndexColumns().contains(column), String.format("Inverted index must be enabled for forward index disabled column: %s", column)); @@ -457,7 +442,6 @@ protected boolean isForwardIndexDisabled(String column) { /** * Helper method to create the V1 indices (dictionary and forward index) for a column with default values. - * TODO: Add support for handling the forwardIndexDisabled flag. Today this flag is ignored for default columns */ private void createDefaultValueColumnV1Indices(String column) throws Exception { @@ -469,11 +453,8 @@ private void createDefaultValueColumnV1Indices(String column) Object defaultValue = fieldSpec.getDefaultNullValue(); boolean isSingleValue = fieldSpec.isSingleValueField(); int maxNumberOfMultiValueElements = isSingleValue ? 0 : 1; - - // Validate that the forwardIndexDisabled flag, if enabled, is compatible with other indexes and configs - // For now the forwardIndexDisabled flag is ignored for default columns but will be handled as part of reload - // changes boolean forwardIndexDisabled = isForwardIndexDisabled(column); + validateForwardIndexDisabledConfigsIfPresent(column, forwardIndexDisabled); Object sortedArray; @@ -539,16 +520,25 @@ private void createDefaultValueColumnV1Indices(String column) } } } else { - // TODO: Add support to disable the forward index if the forwardIndexDisabled flag is true and the column is a - // multi-value column. // Multi-value column. - try ( - MultiValueUnsortedForwardIndexCreator mvFwdIndexCreator = new MultiValueUnsortedForwardIndexCreator(_indexDir, - fieldSpec.getName(), 1/*cardinality*/, totalDocs/*numDocs*/, totalDocs/*totalNumberOfValues*/)) { - int[] dictIds = {0}; - for (int docId = 0; docId < totalDocs; docId++) { - mvFwdIndexCreator.putDictIdMV(dictIds); + if (forwardIndexDisabled) { + // Generate an inverted index instead of forward index for multi-value columns when forward index is disabled + try (DictionaryBasedInvertedIndexCreator creator = new OffHeapBitmapInvertedIndexCreator(_indexDir, fieldSpec, + 1, totalDocs, totalDocs)) { + for (int docId = 0; docId < totalDocs; docId++) { + creator.add(0); + } + creator.seal(); + } + } else { + try (MultiValueUnsortedForwardIndexCreator mvFwdIndexCreator = new MultiValueUnsortedForwardIndexCreator( + _indexDir, fieldSpec.getName(), 1/*cardinality*/, totalDocs/*numDocs*/, + totalDocs/*totalNumberOfValues*/)) { + int[] dictIds = {0}; + for (int docId = 0; docId < totalDocs; docId++) { + mvFwdIndexCreator.putDictIdMV(dictIds); + } } } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java index 89a92b337d30..30af7457f67f 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java @@ -62,16 +62,28 @@ protected boolean updateDefaultColumn(String column, DefaultColumnAction action) FieldSpec fieldSpec = _schema.getFieldSpecFor(column); Preconditions.checkNotNull(fieldSpec); boolean isSingleValue = fieldSpec.isSingleValueField(); - File forwardIndexFile; + boolean forwardIndexDisabled = !isSingleValue && isForwardIndexDisabled(column); + File forwardIndexFile = null; + File invertedIndexFile = null; if (isSingleValue) { forwardIndexFile = new File(_indexDir, column + V1Constants.Indexes.SORTED_SV_FORWARD_INDEX_FILE_EXTENSION); if (!forwardIndexFile.exists()) { forwardIndexFile = new File(_indexDir, column + V1Constants.Indexes.UNSORTED_SV_FORWARD_INDEX_FILE_EXTENSION); } } else { - forwardIndexFile = new File(_indexDir, column + V1Constants.Indexes.UNSORTED_MV_FORWARD_INDEX_FILE_EXTENSION); + if (forwardIndexDisabled) { + // An inverted index is created instead of forward index for multi-value columns with forward index disabled + invertedIndexFile = new File(_indexDir, column + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION); + } else { + forwardIndexFile = new File(_indexDir, column + V1Constants.Indexes.UNSORTED_MV_FORWARD_INDEX_FILE_EXTENSION); + } + } + if (forwardIndexFile != null) { + LoaderUtils.writeIndexToV3Format(_segmentWriter, column, forwardIndexFile, ColumnIndexType.FORWARD_INDEX); + } + if (invertedIndexFile != null) { + LoaderUtils.writeIndexToV3Format(_segmentWriter, column, invertedIndexFile, ColumnIndexType.INVERTED_INDEX); } - LoaderUtils.writeIndexToV3Format(_segmentWriter, column, forwardIndexFile, ColumnIndexType.FORWARD_INDEX); File dictionaryFile = new File(_indexDir, column + V1Constants.Dict.FILE_EXTENSION); LoaderUtils.writeIndexToV3Format(_segmentWriter, column, dictionaryFile, ColumnIndexType.DICTIONARY); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java index 3b74a5cbbb71..7d4e7151fb8d 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java @@ -24,7 +24,7 @@ import java.util.HashSet; import java.util.Set; import org.apache.commons.io.FileUtils; -import org.apache.pinot.segment.local.segment.index.loader.IndexHandler; +import org.apache.pinot.segment.local.segment.index.loader.BaseIndexHandler; import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; import org.apache.pinot.segment.local.segment.index.loader.LoaderUtils; import org.apache.pinot.segment.local.segment.index.loader.SegmentPreProcessor; @@ -64,15 +64,14 @@ * added column. In this case, the default column handler would have taken care of adding * dictionary for the new column. Read the dictionary to create FST index. */ -public class FSTIndexHandler implements IndexHandler { +public class FSTIndexHandler extends BaseIndexHandler { private static final Logger LOGGER = LoggerFactory.getLogger(FSTIndexHandler.class); - private final SegmentMetadata _segmentMetadata; private final Set _columnsToAddIdx; private final FSTType _fstType; public FSTIndexHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig) { - _segmentMetadata = segmentMetadata; + super(segmentMetadata, indexLoadingConfig); _fstType = indexLoadingConfig.getFSTIndexType(); _columnsToAddIdx = indexLoadingConfig.getFSTIndexColumns(); } @@ -122,6 +121,11 @@ public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorPro } } + @Override + public void postUpdateIndicesCleanup(SegmentDirectory.Writer segmentWriter) + throws Exception { + } + private boolean shouldCreateFSTIndex(ColumnMetadata columnMetadata) { if (columnMetadata != null) { // Fail fast upon unsupported operations. diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/H3IndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/H3IndexHandler.java index cdfbca28d5aa..20b6a85030c0 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/H3IndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/H3IndexHandler.java @@ -25,7 +25,7 @@ import java.util.Map; import java.util.Set; import org.apache.commons.io.FileUtils; -import org.apache.pinot.segment.local.segment.index.loader.IndexHandler; +import org.apache.pinot.segment.local.segment.index.loader.BaseIndexHandler; import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; import org.apache.pinot.segment.local.segment.index.loader.LoaderUtils; import org.apache.pinot.segment.local.utils.GeometrySerializer; @@ -49,14 +49,13 @@ @SuppressWarnings({"rawtypes", "unchecked"}) -public class H3IndexHandler implements IndexHandler { +public class H3IndexHandler extends BaseIndexHandler { private static final Logger LOGGER = LoggerFactory.getLogger(H3IndexHandler.class); - private final SegmentMetadata _segmentMetadata; private final Map _h3Configs; public H3IndexHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig) { - _segmentMetadata = segmentMetadata; + super(segmentMetadata, indexLoadingConfig); _h3Configs = indexLoadingConfig.getH3IndexConfigs(); } @@ -100,7 +99,7 @@ public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorPro for (String column : columnsToAddIdx) { ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(column); if (shouldCreateH3Index(columnMetadata)) { - createH3IndexForColumn(segmentWriter, columnMetadata, indexCreatorProvider); + createH3IndexForColumn(segmentWriter, columnMetadata, indexCreatorProvider, indexCreatorProvider); } } } @@ -110,7 +109,7 @@ private boolean shouldCreateH3Index(ColumnMetadata columnMetadata) { } private void createH3IndexForColumn(SegmentDirectory.Writer segmentWriter, ColumnMetadata columnMetadata, - GeoSpatialIndexCreatorProvider indexCreatorProvider) + GeoSpatialIndexCreatorProvider geoSpatialIndexCreatorProvider, IndexCreatorProvider indexCreatorProvider) throws Exception { File indexDir = _segmentMetadata.getIndexDir(); String segmentName = _segmentMetadata.getName(); @@ -129,14 +128,17 @@ private void createH3IndexForColumn(SegmentDirectory.Writer segmentWriter, Colum FileUtils.deleteQuietly(h3IndexFile); } + // Create a temporary forward index if it is disabled and does not exist + createForwardIndexIfNeeded(segmentWriter, columnMetadata, indexCreatorProvider, true); + // Create new H3 index for the column. LOGGER.info("Creating new H3 index for segment: {}, column: {}", segmentName, columnName); Preconditions .checkState(columnMetadata.getDataType() == DataType.BYTES, "H3 index can only be applied to BYTES columns"); if (columnMetadata.hasDictionary()) { - handleDictionaryBasedColumn(segmentWriter, columnMetadata, indexCreatorProvider); + handleDictionaryBasedColumn(segmentWriter, columnMetadata, geoSpatialIndexCreatorProvider); } else { - handleNonDictionaryBasedColumn(segmentWriter, columnMetadata, indexCreatorProvider); + handleNonDictionaryBasedColumn(segmentWriter, columnMetadata, geoSpatialIndexCreatorProvider); } // For v3, write the generated H3 index file into the single file and remove it. diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/InvertedIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/InvertedIndexHandler.java index d9508cca0615..df864256776d 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/InvertedIndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/InvertedIndexHandler.java @@ -23,7 +23,7 @@ import java.util.HashSet; import java.util.Set; import org.apache.commons.io.FileUtils; -import org.apache.pinot.segment.local.segment.index.loader.IndexHandler; +import org.apache.pinot.segment.local.segment.index.loader.BaseIndexHandler; import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; import org.apache.pinot.segment.local.segment.index.loader.LoaderUtils; import org.apache.pinot.segment.spi.ColumnMetadata; @@ -43,14 +43,13 @@ @SuppressWarnings({"rawtypes", "unchecked"}) -public class InvertedIndexHandler implements IndexHandler { +public class InvertedIndexHandler extends BaseIndexHandler { private static final Logger LOGGER = LoggerFactory.getLogger(InvertedIndexHandler.class); - private final SegmentMetadata _segmentMetadata; private final Set _columnsToAddIdx; public InvertedIndexHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig) { - _segmentMetadata = segmentMetadata; + super(segmentMetadata, indexLoadingConfig); _columnsToAddIdx = indexLoadingConfig.getInvertedIndexColumns(); } @@ -101,6 +100,11 @@ public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorPro } } + @Override + public void postUpdateIndicesCleanup(SegmentDirectory.Writer segmentWriter) + throws Exception { + } + private boolean shouldCreateInvertedIndex(ColumnMetadata columnMetadata) { // Only create inverted index on dictionary-encoded unsorted columns. return columnMetadata != null && !columnMetadata.isSorted() && columnMetadata.hasDictionary(); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/JsonIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/JsonIndexHandler.java index dcdaf7f81d11..9f8ae532b3b3 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/JsonIndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/JsonIndexHandler.java @@ -25,7 +25,7 @@ import java.util.Map; import java.util.Set; import org.apache.commons.io.FileUtils; -import org.apache.pinot.segment.local.segment.index.loader.IndexHandler; +import org.apache.pinot.segment.local.segment.index.loader.BaseIndexHandler; import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; import org.apache.pinot.segment.local.segment.index.loader.LoaderUtils; import org.apache.pinot.segment.spi.ColumnMetadata; @@ -48,14 +48,13 @@ @SuppressWarnings({"rawtypes", "unchecked"}) -public class JsonIndexHandler implements IndexHandler { +public class JsonIndexHandler extends BaseIndexHandler { private static final Logger LOGGER = LoggerFactory.getLogger(JsonIndexHandler.class); - private final SegmentMetadata _segmentMetadata; private final Map _jsonIndexConfigs; public JsonIndexHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig) { - _segmentMetadata = segmentMetadata; + super(segmentMetadata, indexLoadingConfig); _jsonIndexConfigs = indexLoadingConfig.getJsonIndexConfigs(); } @@ -99,7 +98,7 @@ public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorPro for (String column : columnsToAddIdx) { ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(column); if (shouldCreateJsonIndex(columnMetadata)) { - createJsonIndexForColumn(segmentWriter, columnMetadata, indexCreatorProvider); + createJsonIndexForColumn(segmentWriter, columnMetadata, indexCreatorProvider, indexCreatorProvider); } } } @@ -109,7 +108,7 @@ private boolean shouldCreateJsonIndex(ColumnMetadata columnMetadata) { } private void createJsonIndexForColumn(SegmentDirectory.Writer segmentWriter, ColumnMetadata columnMetadata, - JsonIndexCreatorProvider indexCreatorProvider) + JsonIndexCreatorProvider jsonIndexCreatorProvider, IndexCreatorProvider indexCreatorProvider) throws Exception { File indexDir = _segmentMetadata.getIndexDir(); String segmentName = _segmentMetadata.getName(); @@ -128,15 +127,18 @@ private void createJsonIndexForColumn(SegmentDirectory.Writer segmentWriter, Col FileUtils.deleteQuietly(jsonIndexFile); } + // Create a temporary forward index if it is disabled and does not exist + createForwardIndexIfNeeded(segmentWriter, columnMetadata, indexCreatorProvider, true); + // Create new json index for the column. LOGGER.info("Creating new json index for segment: {}, column: {}", segmentName, columnName); Preconditions.checkState(columnMetadata.isSingleValue() && (columnMetadata.getDataType() == DataType.STRING || columnMetadata.getDataType() == DataType.JSON), "Json index can only be applied to single-value STRING or JSON columns"); if (columnMetadata.hasDictionary()) { - handleDictionaryBasedColumn(segmentWriter, columnMetadata, indexCreatorProvider); + handleDictionaryBasedColumn(segmentWriter, columnMetadata, jsonIndexCreatorProvider); } else { - handleNonDictionaryBasedColumn(segmentWriter, columnMetadata, indexCreatorProvider); + handleNonDictionaryBasedColumn(segmentWriter, columnMetadata, jsonIndexCreatorProvider); } // For v3, write the generated json index file into the single file and remove it. diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/RangeIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/RangeIndexHandler.java index 4f19a2b6d487..1c876f5ac3a5 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/RangeIndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/RangeIndexHandler.java @@ -23,7 +23,7 @@ import java.util.HashSet; import java.util.Set; import org.apache.commons.io.FileUtils; -import org.apache.pinot.segment.local.segment.index.loader.IndexHandler; +import org.apache.pinot.segment.local.segment.index.loader.BaseIndexHandler; import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; import org.apache.pinot.segment.local.segment.index.loader.LoaderUtils; import org.apache.pinot.segment.spi.ColumnMetadata; @@ -43,15 +43,14 @@ @SuppressWarnings({"rawtypes", "unchecked"}) -public class RangeIndexHandler implements IndexHandler { +public class RangeIndexHandler extends BaseIndexHandler { private static final Logger LOGGER = LoggerFactory.getLogger(RangeIndexHandler.class); - private final SegmentMetadata _segmentMetadata; private final Set _columnsToAddIdx; private final int _rangeIndexVersion; public RangeIndexHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig) { - _segmentMetadata = segmentMetadata; + super(segmentMetadata, indexLoadingConfig); _columnsToAddIdx = indexLoadingConfig.getRangeIndexColumns(); _rangeIndexVersion = indexLoadingConfig.getRangeIndexVersion(); } @@ -96,7 +95,7 @@ public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorPro for (String column : columnsToAddIdx) { ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(column); if (shouldCreateRangeIndex(columnMetadata)) { - createRangeIndexForColumn(segmentWriter, columnMetadata, indexCreatorProvider); + createRangeIndexForColumn(segmentWriter, columnMetadata, indexCreatorProvider, indexCreatorProvider); } } } @@ -107,7 +106,7 @@ private boolean shouldCreateRangeIndex(ColumnMetadata columnMetadata) { } private void createRangeIndexForColumn(SegmentDirectory.Writer segmentWriter, ColumnMetadata columnMetadata, - RangeIndexCreatorProvider indexCreatorProvider) + RangeIndexCreatorProvider rangeIndexCreatorProvider, IndexCreatorProvider indexCreatorProvider) throws IOException { File indexDir = _segmentMetadata.getIndexDir(); String segmentName = _segmentMetadata.getName(); @@ -126,12 +125,15 @@ private void createRangeIndexForColumn(SegmentDirectory.Writer segmentWriter, Co FileUtils.deleteQuietly(rangeIndexFile); } + // Create a temporary forward index if it is disabled and does not exist + createForwardIndexIfNeeded(segmentWriter, columnMetadata, indexCreatorProvider, true); + // Create new range index for the column. LOGGER.info("Creating new range index for segment: {}, column: {}", segmentName, columnName); if (columnMetadata.hasDictionary()) { - handleDictionaryBasedColumn(segmentWriter, columnMetadata, indexCreatorProvider); + handleDictionaryBasedColumn(segmentWriter, columnMetadata, rangeIndexCreatorProvider); } else { - handleNonDictionaryBasedColumn(segmentWriter, columnMetadata, indexCreatorProvider); + handleNonDictionaryBasedColumn(segmentWriter, columnMetadata, rangeIndexCreatorProvider); } // For v3, write the generated range index file into the single file and remove it. diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/TextIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/TextIndexHandler.java index 9af8fc406595..c74315cfa6c3 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/TextIndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/TextIndexHandler.java @@ -41,7 +41,7 @@ import java.util.HashSet; import java.util.Map; import java.util.Set; -import org.apache.pinot.segment.local.segment.index.loader.IndexHandler; +import org.apache.pinot.segment.local.segment.index.loader.BaseIndexHandler; import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; import org.apache.pinot.segment.local.segment.index.loader.LoaderUtils; import org.apache.pinot.segment.local.segment.index.loader.SegmentPreProcessor; @@ -83,16 +83,15 @@ * forward index for the new column. Read the forward index to create text index. */ @SuppressWarnings({"rawtypes", "unchecked"}) -public class TextIndexHandler implements IndexHandler { +public class TextIndexHandler extends BaseIndexHandler { private static final Logger LOGGER = LoggerFactory.getLogger(TextIndexHandler.class); - private final SegmentMetadata _segmentMetadata; private final Set _columnsToAddIdx; private final FSTType _fstType; private final Map> _columnProperties; public TextIndexHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig) { - _segmentMetadata = segmentMetadata; + super(segmentMetadata, indexLoadingConfig); _fstType = indexLoadingConfig.getFSTIndexType(); _columnsToAddIdx = indexLoadingConfig.getTextIndexColumns(); _columnProperties = indexLoadingConfig.getColumnProperties(); @@ -138,7 +137,7 @@ public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorPro for (String column : columnsToAddIdx) { ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(column); if (shouldCreateTextIndex(columnMetadata)) { - createTextIndexForColumn(segmentWriter, columnMetadata, indexCreatorProvider); + createTextIndexForColumn(segmentWriter, columnMetadata, indexCreatorProvider, indexCreatorProvider); } } } @@ -165,13 +164,17 @@ private void checkUnsupportedOperationsForTextIndex(ColumnMetadata columnMetadat } private void createTextIndexForColumn(SegmentDirectory.Writer segmentWriter, ColumnMetadata columnMetadata, - TextIndexCreatorProvider indexCreatorProvider) + TextIndexCreatorProvider textIndexCreatorProvider, IndexCreatorProvider indexCreatorProvider) throws Exception { File indexDir = _segmentMetadata.getIndexDir(); String segmentName = _segmentMetadata.getName(); String columnName = columnMetadata.getColumnName(); int numDocs = columnMetadata.getTotalDocs(); boolean hasDictionary = columnMetadata.hasDictionary(); + + // Create a temporary forward index if it is disabled and does not exist + createForwardIndexIfNeeded(segmentWriter, columnMetadata, indexCreatorProvider, true); + LOGGER.info("Creating new text index for column: {} in segment: {}, hasDictionary: {}", columnName, segmentName, hasDictionary); File segmentDirectory = SegmentDirectoryPaths.segmentDirectoryFor(indexDir, _segmentMetadata.getVersion()); @@ -182,7 +185,7 @@ private void createTextIndexForColumn(SegmentDirectory.Writer segmentWriter, Col // based on segmentVersion. try (ForwardIndexReader forwardIndexReader = LoaderUtils.getForwardIndexReader(segmentWriter, columnMetadata); ForwardIndexReaderContext readerContext = forwardIndexReader.createContext(); - TextIndexCreator textIndexCreator = indexCreatorProvider.newTextIndexCreator(IndexCreationContext.builder() + TextIndexCreator textIndexCreator = textIndexCreatorProvider.newTextIndexCreator(IndexCreationContext.builder() .withColumnMetadata(columnMetadata).withIndexDir(segmentDirectory).build().forTextIndex(_fstType, true, TextIndexUtils.extractStopWordsInclude(columnName, _columnProperties), TextIndexUtils.extractStopWordsExclude(columnName, _columnProperties)))) { diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandlerTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandlerTest.java index f498e335533e..e24de8930d9f 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandlerTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandlerTest.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.IOException; import java.math.BigDecimal; +import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; @@ -58,7 +59,6 @@ import org.apache.pinot.spi.data.readers.RecordReader; import org.apache.pinot.spi.utils.ReadMode; import org.apache.pinot.spi.utils.builder.TableConfigBuilder; -import org.testng.Assert; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -134,6 +134,16 @@ public class ForwardIndexHandlerTest { private static final String DIM_MV_FORWARD_INDEX_DISABLED_STRING = "DIM_MV_FORWARD_INDEX_DISABLED_STRING"; private static final String DIM_MV_FORWARD_INDEX_DISABLED_BYTES = "DIM_MV_FORWARD_INDEX_DISABLED_BYTES"; + // Forward index disabled multi-value columns with duplicates + private static final String DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_INTEGER = + "DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_INTEGER"; + private static final String DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_LONG = + "DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_LONG"; + private static final String DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_STRING = + "DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_STRING"; + private static final String DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_BYTES = + "DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_BYTES"; + private static final List RAW_SNAPPY_INDEX_COLUMNS = Arrays.asList(DIM_SNAPPY_STRING, DIM_SNAPPY_LONG, DIM_SNAPPY_INTEGER, DIM_SNAPPY_BYTES, METRIC_SNAPPY_BIG_DECIMAL, METRIC_SNAPPY_INTEGER); @@ -163,6 +173,10 @@ public class ForwardIndexHandlerTest { Arrays.asList(DIM_MV_FORWARD_INDEX_DISABLED_INTEGER, DIM_MV_FORWARD_INDEX_DISABLED_LONG, DIM_MV_FORWARD_INDEX_DISABLED_STRING, DIM_MV_FORWARD_INDEX_DISABLED_BYTES); + private static final List MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS = + Arrays.asList(DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_INTEGER, DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_LONG, + DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_STRING, DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_BYTES); + private final List _noDictionaryColumns = new ArrayList<>(); private final List _forwardIndexDisabledColumns = new ArrayList<>(); TableConfig _tableConfig; @@ -193,7 +207,7 @@ private void buildSegment() List fieldConfigs = new ArrayList<>( RAW_SNAPPY_INDEX_COLUMNS.size() + RAW_ZSTANDARD_INDEX_COLUMNS.size() + RAW_PASS_THROUGH_INDEX_COLUMNS.size() + RAW_LZ4_INDEX_COLUMNS.size() + SV_FORWARD_INDEX_DISABLED_COLUMNS.size() - + MV_FORWARD_INDEX_DISABLED_COLUMNS.size()); + + MV_FORWARD_INDEX_DISABLED_COLUMNS.size() + MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS.size()); for (String indexColumn : RAW_SNAPPY_INDEX_COLUMNS) { fieldConfigs.add(new FieldConfig(indexColumn, FieldConfig.EncodingType.RAW, Collections.emptyList(), @@ -227,6 +241,12 @@ private void buildSegment() Collections.singletonMap(FieldConfig.FORWARD_INDEX_DISABLED, Boolean.TRUE.toString()))); } + for (String indexColumn : MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS) { + fieldConfigs.add(new FieldConfig(indexColumn, FieldConfig.EncodingType.DICTIONARY, Collections.singletonList( + FieldConfig.IndexType.INVERTED), null, + Collections.singletonMap(FieldConfig.FORWARD_INDEX_DISABLED, Boolean.TRUE.toString()))); + } + _noDictionaryColumns.addAll(RAW_SNAPPY_INDEX_COLUMNS); _noDictionaryColumns.addAll(RAW_ZSTANDARD_INDEX_COLUMNS); _noDictionaryColumns.addAll(RAW_PASS_THROUGH_INDEX_COLUMNS); @@ -234,6 +254,7 @@ private void buildSegment() _forwardIndexDisabledColumns.addAll(SV_FORWARD_INDEX_DISABLED_COLUMNS); _forwardIndexDisabledColumns.addAll(MV_FORWARD_INDEX_DISABLED_COLUMNS); + _forwardIndexDisabledColumns.addAll(MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS); _tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setNoDictionaryColumns(_noDictionaryColumns) @@ -281,7 +302,11 @@ private void buildSegment() .addMultiValueDimension(DIM_MV_FORWARD_INDEX_DISABLED_INTEGER, FieldSpec.DataType.INT) .addMultiValueDimension(DIM_MV_FORWARD_INDEX_DISABLED_LONG, FieldSpec.DataType.LONG) .addMultiValueDimension(DIM_MV_FORWARD_INDEX_DISABLED_STRING, FieldSpec.DataType.STRING) - .addMultiValueDimension(DIM_MV_FORWARD_INDEX_DISABLED_BYTES, FieldSpec.DataType.BYTES).build(); + .addMultiValueDimension(DIM_MV_FORWARD_INDEX_DISABLED_BYTES, FieldSpec.DataType.BYTES) + .addMultiValueDimension(DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_INTEGER, FieldSpec.DataType.INT) + .addMultiValueDimension(DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_LONG, FieldSpec.DataType.LONG) + .addMultiValueDimension(DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_STRING, FieldSpec.DataType.STRING) + .addMultiValueDimension(DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_BYTES, FieldSpec.DataType.BYTES).build(); SegmentGeneratorConfig config = new SegmentGeneratorConfig(_tableConfig, _schema); config.setOutDir(INDEX_DIR.getPath()); @@ -315,6 +340,14 @@ private List createTestData() { Long[][] tempMVLongRows = new Long[rowLength][maxNumberOfMVEntries]; byte[][][] tempMVByteRows = new byte[rowLength][maxNumberOfMVEntries][]; + // For MV columns today adding duplicate entries within the same row will result in the total number of MV entries + // reducing for that row since we cannot support rebuilding the forward index without losing duplicates within a + // row today. + String[][] tempMVStringRowsForwardIndexDisabled = new String[rowLength][maxNumberOfMVEntries]; + Integer[][] tempMVIntRowsForwardIndexDisabled = new Integer[rowLength][maxNumberOfMVEntries]; + Long[][] tempMVLongRowsForwardIndexDisabled = new Long[rowLength][maxNumberOfMVEntries]; + byte[][][] tempMVByteRowsForwardIndexDisabled = new byte[rowLength][maxNumberOfMVEntries][]; + for (int i = 0; i < rowLength; i++) { // Adding a fixed value to check for filter queries if (i % 10 == 0) { @@ -350,6 +383,17 @@ private List createTestData() { tempMVByteRows[i][j] = str.getBytes(); } } + + // Populate data for the MV columns with forward index disabled to have unique entries per row. + // Avoid creating empty arrays. + int numMVElements = random.nextInt(maxNumberOfMVEntries - 1) + 1; + for (int j = 0; j < numMVElements; j++) { + String str = "n" + i + j; + tempMVIntRowsForwardIndexDisabled[i][j] = j; + tempMVLongRowsForwardIndexDisabled[i][j] = (long) j; + tempMVStringRowsForwardIndexDisabled[i][j] = str; + tempMVByteRowsForwardIndexDisabled[i][j] = str.getBytes(); + } } for (int i = 0; i < rowLength; i++) { @@ -414,10 +458,16 @@ private List createTestData() { row.putValue(DIM_SV_FORWARD_INDEX_DISABLED_BYTES, tempBytesRows[i]); // Forward index disabled MV columns - row.putValue(DIM_MV_FORWARD_INDEX_DISABLED_INTEGER, tempMVIntRows[i]); - row.putValue(DIM_MV_FORWARD_INDEX_DISABLED_LONG, tempMVLongRows[i]); - row.putValue(DIM_MV_FORWARD_INDEX_DISABLED_STRING, tempMVStringRows[i]); - row.putValue(DIM_MV_FORWARD_INDEX_DISABLED_BYTES, tempMVByteRows[i]); + row.putValue(DIM_MV_FORWARD_INDEX_DISABLED_INTEGER, tempMVIntRowsForwardIndexDisabled[i]); + row.putValue(DIM_MV_FORWARD_INDEX_DISABLED_LONG, tempMVLongRowsForwardIndexDisabled[i]); + row.putValue(DIM_MV_FORWARD_INDEX_DISABLED_STRING, tempMVStringRowsForwardIndexDisabled[i]); + row.putValue(DIM_MV_FORWARD_INDEX_DISABLED_BYTES, tempMVByteRowsForwardIndexDisabled[i]); + + // Forward index disabled MV columns with duplicates + row.putValue(DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_INTEGER, tempMVIntRows[i]); + row.putValue(DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_LONG, tempMVLongRows[i]); + row.putValue(DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_STRING, tempMVStringRows[i]); + row.putValue(DIM_MV_FORWARD_INDEX_DISABLED_DUPLICATES_BYTES, tempMVByteRows[i]); rows.add(row); } @@ -551,7 +601,8 @@ public void testComputeOperationChangeCompression() throws Exception { // Only try to change compression type for forward index enabled columns randIdx = rand.nextInt(fieldConfigs.size()); name = fieldConfigs.get(randIdx).getName(); - } while (SV_FORWARD_INDEX_DISABLED_COLUMNS.contains(name) || MV_FORWARD_INDEX_DISABLED_COLUMNS.contains(name)); + } while (SV_FORWARD_INDEX_DISABLED_COLUMNS.contains(name) || MV_FORWARD_INDEX_DISABLED_COLUMNS.contains(name) + || MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS.contains(name)); FieldConfig config = fieldConfigs.remove(randIdx); FieldConfig.CompressionCodec newCompressionType = null; for (FieldConfig.CompressionCodec type : _allCompressionTypes) { @@ -687,7 +738,7 @@ public void testComputeOperationDisableForwardIndex() assertEquals(operationMap.get(DIM_DICT_STRING), ForwardIndexHandler.Operation.DISABLE_FORWARD_INDEX_FOR_DICT_COLUMN); - // TEST7: Try to change compression type for a forward index disabled column. Should be a no-op + // TEST7: Try to change compression type for a forward index disabled column and enable forward index for it List fieldConfigs = new ArrayList<>(_tableConfig.getFieldConfigList()); int randIdx; Random rand = new Random(); @@ -710,20 +761,87 @@ public void testComputeOperationDisableForwardIndex() new FieldConfig(config.getName(), FieldConfig.EncodingType.RAW, Collections.emptyList(), newCompressionType, null); fieldConfigs.add(newConfig); + List noDictionaryColumns = new ArrayList<>(_noDictionaryColumns); + noDictionaryColumns.add(config.getName()); TableConfig tableConfig = - new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setNoDictionaryColumns(_noDictionaryColumns) - .setFieldConfigList(fieldConfigs).build(); + new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setNoDictionaryColumns(noDictionaryColumns) + .setInvertedIndexColumns(_forwardIndexDisabledColumns).setFieldConfigList(fieldConfigs).build(); tableConfig.setFieldConfigList(fieldConfigs); indexLoadingConfig = new IndexLoadingConfig(null, tableConfig); + indexLoadingConfig.getNoDictionaryColumns().add(config.getName()); + indexLoadingConfig.getInvertedIndexColumns().remove(config.getName()); fwdIndexHandler = new ForwardIndexHandler(existingSegmentMetadata, indexLoadingConfig, null); - try { - fwdIndexHandler.computeOperation(writer); - Assert.fail("Trying to recreate forward index should fail for now"); - } catch (UnsupportedOperationException e) { - assertEquals(e.getMessage(), String.format("Recreating forward index for column: %s " - + "is not yet supported. Please backfill or refresh the data for now.", config.getName())); - } + operationMap = fwdIndexHandler.computeOperation(writer); + assertEquals(operationMap.size(), 1); + assertEquals(operationMap.get(config.getName()), + ForwardIndexHandler.Operation.ENABLE_FORWARD_INDEX_FOR_RAW_COLUMN); + + // TEST8: Enable forward index in dictionary format for a column with forward index disabled + indexLoadingConfig = new IndexLoadingConfig(null, _tableConfig); + indexLoadingConfig.getForwardIndexDisabledColumns().remove(DIM_SV_FORWARD_INDEX_DISABLED_BYTES); + fwdIndexHandler = new ForwardIndexHandler(existingSegmentMetadata, indexLoadingConfig, _schema); + operationMap = fwdIndexHandler.computeOperation(writer); + assertEquals(operationMap.size(), 1); + assertEquals(operationMap.get(DIM_SV_FORWARD_INDEX_DISABLED_BYTES), + ForwardIndexHandler.Operation.ENABLE_FORWARD_INDEX_FOR_DICT_COLUMN); + + // TEST9: Enable forward index in raw format for a column with forward index disabled. Remove column from inverted + // index as well (inverted index needs dictionary) + indexLoadingConfig = new IndexLoadingConfig(null, _tableConfig); + indexLoadingConfig.getForwardIndexDisabledColumns().remove(DIM_MV_FORWARD_INDEX_DISABLED_INTEGER); + indexLoadingConfig.getNoDictionaryColumns().add(DIM_MV_FORWARD_INDEX_DISABLED_INTEGER); + indexLoadingConfig.getInvertedIndexColumns().remove(DIM_MV_FORWARD_INDEX_DISABLED_INTEGER); + fwdIndexHandler = new ForwardIndexHandler(existingSegmentMetadata, indexLoadingConfig, _schema); + operationMap = fwdIndexHandler.computeOperation(writer); + assertEquals(operationMap.size(), 1); + assertEquals(operationMap.get(DIM_MV_FORWARD_INDEX_DISABLED_INTEGER), + ForwardIndexHandler.Operation.ENABLE_FORWARD_INDEX_FOR_RAW_COLUMN); + + // TEST10: Enable forward index in dictionary format for two columns with forward index disabled. Disable inverted + // index for one of them + indexLoadingConfig = new IndexLoadingConfig(null, _tableConfig); + indexLoadingConfig.getForwardIndexDisabledColumns().remove(DIM_SV_FORWARD_INDEX_DISABLED_LONG); + indexLoadingConfig.getForwardIndexDisabledColumns().remove(DIM_MV_FORWARD_INDEX_DISABLED_STRING); + indexLoadingConfig.getInvertedIndexColumns().remove(DIM_SV_FORWARD_INDEX_DISABLED_LONG); + fwdIndexHandler = new ForwardIndexHandler(existingSegmentMetadata, indexLoadingConfig, _schema); + operationMap = fwdIndexHandler.computeOperation(writer); + assertEquals(operationMap.size(), 2); + assertEquals(operationMap.get(DIM_SV_FORWARD_INDEX_DISABLED_LONG), + ForwardIndexHandler.Operation.ENABLE_FORWARD_INDEX_FOR_DICT_COLUMN); + assertEquals(operationMap.get(DIM_MV_FORWARD_INDEX_DISABLED_STRING), + ForwardIndexHandler.Operation.ENABLE_FORWARD_INDEX_FOR_DICT_COLUMN); + + // TEST11: Enable forward index in raw format for two columns with forward index disabled. Remove column from + // inverted index as well (inverted index needs dictionary) + indexLoadingConfig = new IndexLoadingConfig(null, _tableConfig); + indexLoadingConfig.getForwardIndexDisabledColumns().remove(DIM_SV_FORWARD_INDEX_DISABLED_STRING); + indexLoadingConfig.getNoDictionaryColumns().add(DIM_SV_FORWARD_INDEX_DISABLED_STRING); + indexLoadingConfig.getInvertedIndexColumns().remove(DIM_SV_FORWARD_INDEX_DISABLED_STRING); + indexLoadingConfig.getForwardIndexDisabledColumns().remove(DIM_MV_FORWARD_INDEX_DISABLED_LONG); + indexLoadingConfig.getNoDictionaryColumns().add(DIM_MV_FORWARD_INDEX_DISABLED_LONG); + indexLoadingConfig.getInvertedIndexColumns().remove(DIM_MV_FORWARD_INDEX_DISABLED_LONG); + fwdIndexHandler = new ForwardIndexHandler(existingSegmentMetadata, indexLoadingConfig, _schema); + operationMap = fwdIndexHandler.computeOperation(writer); + assertEquals(operationMap.size(), 2); + assertEquals(operationMap.get(DIM_SV_FORWARD_INDEX_DISABLED_STRING), + ForwardIndexHandler.Operation.ENABLE_FORWARD_INDEX_FOR_RAW_COLUMN); + assertEquals(operationMap.get(DIM_MV_FORWARD_INDEX_DISABLED_LONG), + ForwardIndexHandler.Operation.ENABLE_FORWARD_INDEX_FOR_RAW_COLUMN); + + // TEST12: Enable forward index in dictionary format and one in raw format for columns with forward index disabled + indexLoadingConfig = new IndexLoadingConfig(null, _tableConfig); + indexLoadingConfig.getForwardIndexDisabledColumns().remove(DIM_MV_FORWARD_INDEX_DISABLED_LONG); + indexLoadingConfig.getNoDictionaryColumns().add(DIM_MV_FORWARD_INDEX_DISABLED_LONG); + indexLoadingConfig.getInvertedIndexColumns().remove(DIM_MV_FORWARD_INDEX_DISABLED_LONG); + indexLoadingConfig.getForwardIndexDisabledColumns().remove(DIM_SV_FORWARD_INDEX_DISABLED_BYTES); + fwdIndexHandler = new ForwardIndexHandler(existingSegmentMetadata, indexLoadingConfig, _schema); + operationMap = fwdIndexHandler.computeOperation(writer); + assertEquals(operationMap.size(), 2); + assertEquals(operationMap.get(DIM_MV_FORWARD_INDEX_DISABLED_LONG), + ForwardIndexHandler.Operation.ENABLE_FORWARD_INDEX_FOR_RAW_COLUMN); + assertEquals(operationMap.get(DIM_SV_FORWARD_INDEX_DISABLED_BYTES), + ForwardIndexHandler.Operation.ENABLE_FORWARD_INDEX_FOR_DICT_COLUMN); // Tear down segmentLocalFSDirectory.close(); @@ -785,7 +903,7 @@ public void testChangeCompressionForSingleColumn() metadata.getCardinality(), metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), metadata.getMinValue(), - metadata.getMaxValue()); + metadata.getMaxValue(), false); } } } @@ -810,7 +928,8 @@ public void testChangeCompressionForMultipleColumns() // Only try to change compression type for forward index enabled columns randomIdx = rand.nextInt(fieldConfigs.size()); name = fieldConfigs.get(randomIdx).getName(); - } while (SV_FORWARD_INDEX_DISABLED_COLUMNS.contains(name) || MV_FORWARD_INDEX_DISABLED_COLUMNS.contains(name)); + } while (SV_FORWARD_INDEX_DISABLED_COLUMNS.contains(name) || MV_FORWARD_INDEX_DISABLED_COLUMNS.contains(name) + || MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS.contains(name)); FieldConfig config1 = fieldConfigs.remove(randomIdx); String column1 = config1.getName(); FieldConfig newConfig1 = @@ -822,7 +941,8 @@ public void testChangeCompressionForMultipleColumns() // Only try to change compression type for forward index enabled columns randomIdx = rand.nextInt(fieldConfigs.size()); name = fieldConfigs.get(randomIdx).getName(); - } while (SV_FORWARD_INDEX_DISABLED_COLUMNS.contains(name) || MV_FORWARD_INDEX_DISABLED_COLUMNS.contains(name)); + } while (SV_FORWARD_INDEX_DISABLED_COLUMNS.contains(name) || MV_FORWARD_INDEX_DISABLED_COLUMNS.contains(name) + || MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS.contains(name)); FieldConfig config2 = fieldConfigs.remove(randomIdx); String column2 = config2.getName(); FieldConfig newConfig2 = @@ -852,7 +972,8 @@ public void testChangeCompressionForMultipleColumns() validateMetadataProperties(column1, metadata.hasDictionary(), metadata.getColumnMaxLength(), metadata.getCardinality(), metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), - metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue()); + metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue(), + false); testIndexExists(column2, ColumnIndexType.FORWARD_INDEX); validateIndexMap(column2, false, false); @@ -861,7 +982,8 @@ public void testChangeCompressionForMultipleColumns() validateMetadataProperties(column2, metadata.hasDictionary(), metadata.getColumnMaxLength(), metadata.getCardinality(), metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), - metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue()); + metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue(), + false); } @Test @@ -905,7 +1027,7 @@ public void testEnableDictionaryForMultipleColumns() validateMetadataProperties(col1, true, dictionaryElementSize, metadata.getCardinality(), metadata.getTotalDocs(), dataType, metadata.getFieldType(), metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), - metadata.getMinValue(), metadata.getMaxValue()); + metadata.getMinValue(), metadata.getMaxValue(), false); // Col2 validation. testIndexExists(col2, ColumnIndexType.FORWARD_INDEX); @@ -925,7 +1047,7 @@ public void testEnableDictionaryForMultipleColumns() validateMetadataProperties(col2, true, dictionaryElementSize, metadata.getCardinality(), metadata.getTotalDocs(), dataType, metadata.getFieldType(), metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), - metadata.getMinValue(), metadata.getMaxValue()); + metadata.getMinValue(), metadata.getMaxValue(), false); } @Test @@ -968,7 +1090,7 @@ public void testEnableDictionaryForSingleColumn() validateMetadataProperties(column, true, dictionaryElementSize, metadata.getCardinality(), metadata.getTotalDocs(), dataType, metadata.getFieldType(), metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), - metadata.getMinValue(), metadata.getMaxValue()); + metadata.getMinValue(), metadata.getMaxValue(), false); } } @@ -1009,7 +1131,7 @@ public void testDisableForwardIndexForMultipleDictColumns() validateMetadataProperties(col1, metadata.hasDictionary(), metadata.getColumnMaxLength(), metadata.getCardinality(), metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), - metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue()); + metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue(), false); // Col2 validation. validateIndexMap(col2, true, true); @@ -1019,15 +1141,15 @@ public void testDisableForwardIndexForMultipleDictColumns() validateMetadataProperties(col2, metadata.hasDictionary(), metadata.getColumnMaxLength(), metadata.getCardinality(), metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), - metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue()); + metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue(), false); } @Test public void testDisableForwardIndexForSingleDictColumn() throws Exception { - // Must include columns which already have forward index disabled as enabling forward index is not yet supported Set forwardIndexDisabledColumns = new HashSet<>(SV_FORWARD_INDEX_DISABLED_COLUMNS); forwardIndexDisabledColumns.addAll(MV_FORWARD_INDEX_DISABLED_COLUMNS); + forwardIndexDisabledColumns.addAll(MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS); for (String column : DICT_ENABLED_COLUMNS_WITH_FORWARD_INDEX) { SegmentMetadataImpl existingSegmentMetadata = new SegmentMetadataImpl(_segmentDirectory); SegmentDirectory segmentLocalFSDirectory = @@ -1056,7 +1178,7 @@ public void testDisableForwardIndexForSingleDictColumn() metadata.getCardinality(), metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), metadata.getMinValue(), - metadata.getMaxValue()); + metadata.getMaxValue(), false); } } @@ -1092,7 +1214,7 @@ public void testDisableDictionaryForSingleColumn() validateMetadataProperties(column, false, 0, metadata.getCardinality(), metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), - metadata.getMinValue(), metadata.getMaxValue()); + metadata.getMinValue(), metadata.getMaxValue(), false); } } @@ -1134,7 +1256,7 @@ public void testDisableDictionaryForMultipleColumns() validateMetadataProperties(column1, false, 0, metadata.getCardinality(), metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), - metadata.getMinValue(), metadata.getMaxValue()); + metadata.getMinValue(), metadata.getMaxValue(), false); // Column2 validation. testIndexExists(column2, ColumnIndexType.FORWARD_INDEX); @@ -1147,7 +1269,7 @@ public void testDisableDictionaryForMultipleColumns() validateMetadataProperties(column2, false, 0, metadata.getCardinality(), metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), - metadata.getMinValue(), metadata.getMaxValue()); + metadata.getMinValue(), metadata.getMaxValue(), false); } @Test @@ -1194,7 +1316,7 @@ public void testDisableForwardIndexForMultipleRawColumns() validateMetadataProperties(col1, true, dictionaryElementSize, metadata.getCardinality(), metadata.getTotalDocs(), dataType, metadata.getFieldType(), metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), - metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue()); + metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue(), false); // Col2 validation. validateIndexMap(col2, true, true); @@ -1212,15 +1334,15 @@ public void testDisableForwardIndexForMultipleRawColumns() validateMetadataProperties(col2, true, dictionaryElementSize, metadata.getCardinality(), metadata.getTotalDocs(), dataType, metadata.getFieldType(), metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), - metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue()); + metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue(), false); } @Test public void testDisableForwardIndexForSingleRawColumn() throws Exception { - // Must include columns which already have forward index disabled as enabling forward index is not yet supported Set forwardIndexDisabledColumns = new HashSet<>(SV_FORWARD_INDEX_DISABLED_COLUMNS); forwardIndexDisabledColumns.addAll(MV_FORWARD_INDEX_DISABLED_COLUMNS); + forwardIndexDisabledColumns.addAll(MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS); for (String column : _noDictionaryColumns) { SegmentMetadataImpl existingSegmentMetadata = new SegmentMetadataImpl(_segmentDirectory); SegmentDirectory segmentLocalFSDirectory = @@ -1257,7 +1379,256 @@ public void testDisableForwardIndexForSingleRawColumn() validateMetadataProperties(column, true, dictionaryElementSize, metadata.getCardinality(), metadata.getTotalDocs(), dataType, metadata.getFieldType(), metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), - metadata.getMinValue(), metadata.getMaxValue()); + metadata.getMinValue(), metadata.getMaxValue(), false); + } + } + + @Test + public void testEnableForwardIndexInDictModeForMultipleForwardIndexDisabledColumns() + throws Exception { + SegmentMetadataImpl existingSegmentMetadata = new SegmentMetadataImpl(_segmentDirectory); + SegmentDirectory segmentLocalFSDirectory = + new SegmentLocalFSDirectory(_segmentDirectory, existingSegmentMetadata, ReadMode.mmap); + SegmentDirectory.Writer writer = segmentLocalFSDirectory.createWriter(); + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(null, _tableConfig); + + Random rand = new Random(); + // Remove from forward index list but keep the inverted index enabled + String col1 = SV_FORWARD_INDEX_DISABLED_COLUMNS.get(rand.nextInt(SV_FORWARD_INDEX_DISABLED_COLUMNS.size())); + indexLoadingConfig.getForwardIndexDisabledColumns().remove(col1); + String col2 = MV_FORWARD_INDEX_DISABLED_COLUMNS.get(rand.nextInt(MV_FORWARD_INDEX_DISABLED_COLUMNS.size())); + indexLoadingConfig.getForwardIndexDisabledColumns().remove(col2); + + ForwardIndexHandler fwdIndexHandler = new ForwardIndexHandler(existingSegmentMetadata, indexLoadingConfig, _schema); + IndexCreatorProvider indexCreatorProvider = IndexingOverrides.getIndexCreatorProvider(); + fwdIndexHandler.updateIndices(writer, indexCreatorProvider); + fwdIndexHandler.postUpdateIndicesCleanup(writer); + + // Tear down before validation. Because columns.psf and index map cleanup happens at segmentDirectory.close() + segmentLocalFSDirectory.close(); + + // Col1 validation. + validateIndexMap(col1, true, false); + validateForwardIndex(col1, null); + // In column metadata, nothing should change. + ColumnMetadata metadata = existingSegmentMetadata.getColumnMetadataFor(col1); + validateMetadataProperties(col1, metadata.hasDictionary(), metadata.getColumnMaxLength(), metadata.getCardinality(), + metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), metadata.isSorted(), + metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), + metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue(), false); + + // Col2 validation. + validateIndexMap(col2, true, false); + validateForwardIndex(col2, null); + // In column metadata, nothing should change. + metadata = existingSegmentMetadata.getColumnMetadataFor(col2); + validateMetadataProperties(col2, metadata.hasDictionary(), metadata.getColumnMaxLength(), metadata.getCardinality(), + metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), metadata.isSorted(), + metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), + metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue(), false); + } + + @Test + public void testEnableForwardIndexInDictModeForMVForwardIndexDisabledColumnWithDuplicates() + throws Exception { + SegmentMetadataImpl existingSegmentMetadata = new SegmentMetadataImpl(_segmentDirectory); + SegmentDirectory segmentLocalFSDirectory = + new SegmentLocalFSDirectory(_segmentDirectory, existingSegmentMetadata, ReadMode.mmap); + SegmentDirectory.Writer writer = segmentLocalFSDirectory.createWriter(); + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(null, _tableConfig); + + Random rand = new Random(); + // Remove from forward index list but keep the inverted index enabled + String column = MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS + .get(rand.nextInt(MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS.size())); + indexLoadingConfig.getForwardIndexDisabledColumns().remove(column); + + ForwardIndexHandler fwdIndexHandler = new ForwardIndexHandler(existingSegmentMetadata, indexLoadingConfig, _schema); + IndexCreatorProvider indexCreatorProvider = IndexingOverrides.getIndexCreatorProvider(); + fwdIndexHandler.updateIndices(writer, indexCreatorProvider); + fwdIndexHandler.postUpdateIndicesCleanup(writer); + + // Tear down before validation. Because columns.psf and index map cleanup happens at segmentDirectory.close() + segmentLocalFSDirectory.close(); + + // Column validation. + validateIndexMap(column, true, false); + validateForwardIndex(column, null); + // In column metadata, some values can change since MV columns with duplicates lose the duplicates on forward index + // regeneration. + ColumnMetadata metadata = existingSegmentMetadata.getColumnMetadataFor(column); + validateMetadataProperties(column, metadata.hasDictionary(), metadata.getColumnMaxLength(), + metadata.getCardinality(), metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), + metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), + metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue(), + true); + } + + @Test + public void testEnableForwardIndexInDictModeForSingleForwardIndexDisabledColumn() + throws Exception { + Set forwardIndexDisabledColumns = new HashSet<>(SV_FORWARD_INDEX_DISABLED_COLUMNS); + forwardIndexDisabledColumns.addAll(MV_FORWARD_INDEX_DISABLED_COLUMNS); + forwardIndexDisabledColumns.addAll(MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS); + List allForwardIndexDisabledColumns = new ArrayList<>(SV_FORWARD_INDEX_DISABLED_COLUMNS); + allForwardIndexDisabledColumns.addAll(MV_FORWARD_INDEX_DISABLED_COLUMNS); + for (String column : allForwardIndexDisabledColumns) { + SegmentMetadataImpl existingSegmentMetadata = new SegmentMetadataImpl(_segmentDirectory); + SegmentDirectory segmentLocalFSDirectory = + new SegmentLocalFSDirectory(_segmentDirectory, existingSegmentMetadata, ReadMode.mmap); + SegmentDirectory.Writer writer = segmentLocalFSDirectory.createWriter(); + + // Leave the inverted index as is, should ideally work even if inverted index is disabled + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(null, _tableConfig); + forwardIndexDisabledColumns.remove(column); + indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + ForwardIndexHandler fwdIndexHandler = + new ForwardIndexHandler(existingSegmentMetadata, indexLoadingConfig, _schema); + IndexCreatorProvider indexCreatorProvider = IndexingOverrides.getIndexCreatorProvider(); + fwdIndexHandler.updateIndices(writer, indexCreatorProvider); + fwdIndexHandler.postUpdateIndicesCleanup(writer); + + // Tear down before validation. Because columns.psf and index map cleanup happens at segmentDirectory.close() + segmentLocalFSDirectory.close(); + + validateIndexMap(column, true, false); + validateForwardIndex(column, null); + + // In column metadata, nothing should change. + ColumnMetadata metadata = existingSegmentMetadata.getColumnMetadataFor(column); + validateMetadataProperties(column, metadata.hasDictionary(), metadata.getColumnMaxLength(), + metadata.getCardinality(), metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), + metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), + metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), metadata.getMinValue(), + metadata.getMaxValue(), false); + } + } + + @Test + public void testEnableForwardIndexInRawModeForMultipleForwardIndexDisabledColumns() + throws Exception { + SegmentMetadataImpl existingSegmentMetadata = new SegmentMetadataImpl(_segmentDirectory); + SegmentDirectory segmentLocalFSDirectory = + new SegmentLocalFSDirectory(_segmentDirectory, existingSegmentMetadata, ReadMode.mmap); + SegmentDirectory.Writer writer = segmentLocalFSDirectory.createWriter(); + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(null, _tableConfig); + + Random rand = new Random(); + String col1 = SV_FORWARD_INDEX_DISABLED_COLUMNS.get(rand.nextInt(SV_FORWARD_INDEX_DISABLED_COLUMNS.size())); + indexLoadingConfig.getForwardIndexDisabledColumns().remove(col1); + indexLoadingConfig.getInvertedIndexColumns().remove(col1); + indexLoadingConfig.getNoDictionaryColumns().add(col1); + String col2 = MV_FORWARD_INDEX_DISABLED_COLUMNS.get(rand.nextInt(MV_FORWARD_INDEX_DISABLED_COLUMNS.size())); + indexLoadingConfig.getForwardIndexDisabledColumns().remove(col2); + indexLoadingConfig.getInvertedIndexColumns().remove(col2); + indexLoadingConfig.getNoDictionaryColumns().add(col2); + + ForwardIndexHandler fwdIndexHandler = new ForwardIndexHandler(existingSegmentMetadata, indexLoadingConfig, _schema); + IndexCreatorProvider indexCreatorProvider = IndexingOverrides.getIndexCreatorProvider(); + fwdIndexHandler.updateIndices(writer, indexCreatorProvider); + fwdIndexHandler.postUpdateIndicesCleanup(writer); + + // Tear down before validation. Because columns.psf and index map cleanup happens at segmentDirectory.close() + segmentLocalFSDirectory.close(); + + // Col1 validation. + validateIndexMap(col1, false, false); + validateForwardIndex(col1, FieldConfig.CompressionCodec.LZ4); + // In column metadata, nothing should change. + ColumnMetadata metadata = existingSegmentMetadata.getColumnMetadataFor(col1); + validateMetadataProperties(col1, false, 0, metadata.getCardinality(), + metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), metadata.isSorted(), + metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), + metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue(), false); + + // Col2 validation. + validateIndexMap(col2, false, false); + validateForwardIndex(col2, FieldConfig.CompressionCodec.LZ4); + // In column metadata, nothing should change. + metadata = existingSegmentMetadata.getColumnMetadataFor(col2); + validateMetadataProperties(col2, false, 0, metadata.getCardinality(), + metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), metadata.isSorted(), + metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), + metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue(), false); + } + + @Test + public void testEnableForwardIndexInRawModeForMVForwardIndexDisabledColumnWithDuplicates() + throws Exception { + SegmentMetadataImpl existingSegmentMetadata = new SegmentMetadataImpl(_segmentDirectory); + SegmentDirectory segmentLocalFSDirectory = + new SegmentLocalFSDirectory(_segmentDirectory, existingSegmentMetadata, ReadMode.mmap); + SegmentDirectory.Writer writer = segmentLocalFSDirectory.createWriter(); + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(null, _tableConfig); + + Random rand = new Random(); + // Remove from forward index list but keep the inverted index enabled + String column = MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS + .get(rand.nextInt(MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS.size())); + indexLoadingConfig.getForwardIndexDisabledColumns().remove(column); + indexLoadingConfig.getInvertedIndexColumns().remove(column); + indexLoadingConfig.getNoDictionaryColumns().add(column); + + ForwardIndexHandler fwdIndexHandler = new ForwardIndexHandler(existingSegmentMetadata, indexLoadingConfig, _schema); + IndexCreatorProvider indexCreatorProvider = IndexingOverrides.getIndexCreatorProvider(); + fwdIndexHandler.updateIndices(writer, indexCreatorProvider); + fwdIndexHandler.postUpdateIndicesCleanup(writer); + + // Tear down before validation. Because columns.psf and index map cleanup happens at segmentDirectory.close() + segmentLocalFSDirectory.close(); + + // Column validation. + validateIndexMap(column, false, false); + validateForwardIndex(column, FieldConfig.CompressionCodec.LZ4); + // In column metadata, some values can change since MV columns with duplicates lose the duplicates on forward index + // regeneration. + ColumnMetadata metadata = existingSegmentMetadata.getColumnMetadataFor(column); + validateMetadataProperties(column, false, 0, metadata.getCardinality(), + metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), metadata.isSorted(), + metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), metadata.getTotalNumberOfEntries(), + metadata.isAutoGenerated(), metadata.getMinValue(), metadata.getMaxValue(), true); + } + + @Test + public void testEnableForwardIndexInRawModeForSingleForwardIndexDisabledColumn() + throws Exception { + Set forwardIndexDisabledColumns = new HashSet<>(SV_FORWARD_INDEX_DISABLED_COLUMNS); + forwardIndexDisabledColumns.addAll(MV_FORWARD_INDEX_DISABLED_COLUMNS); + forwardIndexDisabledColumns.addAll(MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS); + List allForwardIndexDisabledColumns = new ArrayList<>(SV_FORWARD_INDEX_DISABLED_COLUMNS); + allForwardIndexDisabledColumns.addAll(MV_FORWARD_INDEX_DISABLED_COLUMNS); + List columnList = new ArrayList<>(); + for (String column : allForwardIndexDisabledColumns) { + SegmentMetadataImpl existingSegmentMetadata = new SegmentMetadataImpl(_segmentDirectory); + SegmentDirectory segmentLocalFSDirectory = + new SegmentLocalFSDirectory(_segmentDirectory, existingSegmentMetadata, ReadMode.mmap); + SegmentDirectory.Writer writer = segmentLocalFSDirectory.createWriter(); + + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(null, _tableConfig); + forwardIndexDisabledColumns.remove(column); + indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + indexLoadingConfig.setInvertedIndexColumns(forwardIndexDisabledColumns); + columnList.add(column); + indexLoadingConfig.getNoDictionaryColumns().addAll(columnList); + ForwardIndexHandler fwdIndexHandler = + new ForwardIndexHandler(existingSegmentMetadata, indexLoadingConfig, _schema); + IndexCreatorProvider indexCreatorProvider = IndexingOverrides.getIndexCreatorProvider(); + fwdIndexHandler.updateIndices(writer, indexCreatorProvider); + fwdIndexHandler.postUpdateIndicesCleanup(writer); + + // Tear down before validation. Because columns.psf and index map cleanup happens at segmentDirectory.close() + segmentLocalFSDirectory.close(); + + validateIndexMap(column, false, false); + validateForwardIndex(column, FieldConfig.CompressionCodec.LZ4); + + // In column metadata, nothing should change. + ColumnMetadata metadata = existingSegmentMetadata.getColumnMetadataFor(column); + validateMetadataProperties(column, false, 0, + metadata.getCardinality(), metadata.getTotalDocs(), metadata.getDataType(), metadata.getFieldType(), + metadata.isSorted(), metadata.isSingleValue(), metadata.getMaxNumberOfMultiValues(), + metadata.getTotalNumberOfEntries(), metadata.isAutoGenerated(), metadata.getMinValue(), + metadata.getMaxValue(), false); } } @@ -1291,6 +1662,13 @@ private void validateForwardIndex(String columnName, @Nullable FieldConfig.Compr ColumnMetadata columnMetadata = existingSegmentMetadata.getColumnMetadataFor(columnName); boolean isSingleValue = columnMetadata.isSingleValue(); + if (expectedCompressionType == null) { + assertTrue(writer.hasIndexFor(columnName, ColumnIndexType.DICTIONARY)); + } else { + assertFalse(writer.hasIndexFor(columnName, ColumnIndexType.DICTIONARY)); + } + assertTrue(writer.hasIndexFor(columnName, ColumnIndexType.FORWARD_INDEX)); + // Check Compression type in header ForwardIndexReader fwdIndexReader = LoaderUtils.getForwardIndexReader(writer, columnMetadata); ChunkCompressionType fwdIndexCompressionType = fwdIndexReader.getCompressionType(); @@ -1307,7 +1685,8 @@ private void validateForwardIndex(String columnName, @Nullable FieldConfig.Compr columnMetadata.getMaxNumberOfMultiValues()); for (int rowIdx = 0; rowIdx < columnMetadata.getTotalDocs(); rowIdx++) { - if (rowIdx % 10 == 0) { + // For MV forward index disabled columns cannot do this validation as we had to create a unique set of elements + if (!MV_FORWARD_INDEX_DISABLED_COLUMNS.contains(columnName) && (rowIdx % 10 == 0)) { Object val = columnReader.getValue(rowIdx); FieldSpec.DataType dataType = columnMetadata.getDataType(); @@ -1371,6 +1750,72 @@ private void validateForwardIndex(String columnName, @Nullable FieldConfig.Compr // Unreachable code. throw new IllegalStateException("Invalid datatype for column=" + columnName); } + } else if (MV_FORWARD_INDEX_DISABLED_COLUMNS.contains(columnName)) { + Object val = columnReader.getValue(rowIdx); + FieldSpec.DataType dataType = columnMetadata.getDataType(); + + // Regenerating forward index from inverted index and dictionary does not guarantee ordering within a given MV + // row. To validate the data in the row is correct, first generate a set of all possible entries stored at + // each row and then ensure that every entry is found in the pre-constructed set. + switch (dataType) { + case STRING: { + Object[] values = (Object[]) val; + int length = values.length; + Set stringSet = new HashSet<>(); + for (int i = 0; i < length; i++) { + stringSet.add("n" + rowIdx + i); + } + for (Object value : values) { + assertTrue(stringSet.contains((String) value)); + stringSet.remove((String) value); + } + break; + } + case INT: { + Object[] values = (Object[]) val; + int length = values.length; + Set integerSet = new HashSet<>(); + for (int i = 0; i < length; i++) { + integerSet.add(i); + } + for (Object value : values) { + assertTrue(integerSet.contains((Integer) value)); + integerSet.remove((Integer) value); + } + break; + } + case LONG: { + Object[] values = (Object[]) val; + int length = values.length; + Set longSet = new HashSet<>(); + for (int i = 0; i < length; i++) { + longSet.add((long) i); + } + for (Object value : values) { + assertTrue(longSet.contains((Long) value)); + longSet.remove((Long) value); + } + break; + } + case BYTES: { + Object[] values = (Object[]) val; + int length = values.length; + Set bytesSet = new HashSet<>(); + for (int i = 0; i < length; i++) { + String expectedString = "n" + rowIdx + i; + ByteBuffer expectedVal = ByteBuffer.wrap(expectedString.getBytes()); + bytesSet.add(expectedVal); + } + for (Object value : values) { + assertTrue(bytesSet.contains(ByteBuffer.wrap((byte[]) value))); + bytesSet.remove(ByteBuffer.wrap((byte[]) value)); + } + break; + } + default: + // Unreachable code. + throw new IllegalStateException("Invalid datatype for column=" + columnName); + } } } } @@ -1415,7 +1860,7 @@ private void validateIndexMap(String columnName, boolean dictionaryEnabled, bool private void validateMetadataProperties(String column, boolean hasDictionary, int dictionaryElementSize, int cardinality, int totalDocs, FieldSpec.DataType dataType, FieldSpec.FieldType fieldType, boolean isSorted, boolean isSingleValue, int maxNumberOfMVEntries, int totalNumberOfEntries, boolean isAutoGenerated, - Comparable minValue, Comparable maxValue) + Comparable minValue, Comparable maxValue, boolean isRegeneratedMVColumnWithDuplicates) throws IOException { SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(_segmentDirectory); ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column); @@ -1428,8 +1873,24 @@ private void validateMetadataProperties(String column, boolean hasDictionary, in assertEquals(columnMetadata.getFieldType(), fieldType); assertEquals(columnMetadata.isSorted(), isSorted); assertEquals(columnMetadata.isSingleValue(), isSingleValue); - assertEquals(columnMetadata.getMaxNumberOfMultiValues(), maxNumberOfMVEntries); - assertEquals(columnMetadata.getTotalNumberOfEntries(), totalNumberOfEntries); + if (isRegeneratedMVColumnWithDuplicates) { + // For MV columns with duplicates within a row, the duplicates are removed when regenerating the forward index. + // Thus the metadata might not match depending on how many duplicates were found. Relax these metadata checks + assertTrue(MV_FORWARD_INDEX_DISABLED_DUPLICATES_COLUMNS.contains(column)); + if (dataType == FieldSpec.DataType.STRING || dataType == FieldSpec.DataType.BYTES) { + // Every entry is duplicated within the row so total number of entries matches number of docs and max number of + // MVs per row is 1 + assertEquals(columnMetadata.getMaxNumberOfMultiValues(), 1); + assertEquals(columnMetadata.getTotalNumberOfEntries(), totalDocs); + } else { + // Cannot check for exact numbers as it will vary depending on number of entries generated per row + assertTrue(columnMetadata.getMaxNumberOfMultiValues() <= maxNumberOfMVEntries); + assertTrue(columnMetadata.getTotalNumberOfEntries() <= totalNumberOfEntries); + } + } else { + assertEquals(columnMetadata.getMaxNumberOfMultiValues(), maxNumberOfMVEntries); + assertEquals(columnMetadata.getTotalNumberOfEntries(), totalNumberOfEntries); + } assertEquals(columnMetadata.isAutoGenerated(), isAutoGenerated); assertEquals(columnMetadata.getMinValue(), minValue); assertEquals(columnMetadata.getMaxValue(), maxValue); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java index cd1ce10eb894..9a5fd74bb538 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java @@ -111,6 +111,8 @@ public class SegmentPreProcessorTest { // For create no forward index column tests private static final String NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_SV = "newForwardIndexDisabledColumnSV"; private static final String NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_MV = "newForwardIndexDisabledColumnMV"; + private static final String EXISTING_FORWARD_INDEX_DISABLED_COL_SV = "column10"; + private static final String EXISTING_FORWARD_INDEX_DISABLED_COL_MV = "column7"; // For update default value tests. private static final String NEW_COLUMNS_SCHEMA1 = "data/newColumnsSchema1.json"; @@ -226,7 +228,7 @@ private IndexLoadingConfig getDefaultIndexLoadingConfig() { } private void constructV1Segment(List invertedIndexCols, List textIndexCols, - List rangeIndexCols) + List rangeIndexCols, List forwardIndexDisabledCols) throws Exception { FileUtils.deleteQuietly(INDEX_DIR); @@ -251,6 +253,10 @@ private void constructV1Segment(List invertedIndexCols, List tex if (rangeIndexCols.size() > 0) { segmentGeneratorConfig.setRangeIndexCreationColumns(rangeIndexCols); } + if (forwardIndexDisabledCols.size() > 0) { + segmentGeneratorConfig.setForwardIndexDisabledColumns(forwardIndexDisabledCols); + segmentGeneratorConfig.getInvertedIndexCreationColumns().addAll(forwardIndexDisabledCols); + } SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null); driver.init(segmentGeneratorConfig); @@ -261,7 +267,8 @@ private void constructV1Segment(List invertedIndexCols, List tex private void constructV3Segment() throws Exception { - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); new SegmentV1V2ToV3FormatConverter().convert(_indexDir); } @@ -292,7 +299,8 @@ public void testEnableTextIndexOnNewColumnRaw() 1); // Create a segment in V1, add a new raw column with text index enabled - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); segmentMetadata = new SegmentMetadataImpl(_indexDir); columnMetadata = segmentMetadata.getColumnMetadataFor(NEWLY_ADDED_STRING_COL_RAW); // should be null since column does not exist in the schema @@ -314,7 +322,8 @@ public void testEnableFSTIndexOnExistingColumnRaw() new SegmentPreProcessor(segmentDirectory, _indexLoadingConfig, _newColumnsSchemaWithFST); expectThrows(UnsupportedOperationException.class, () -> v3Processor.process()); - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); segmentDirectory = SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader().load(_indexDir.toURI(), new SegmentDirectoryLoaderContext.Builder().setSegmentDirectoryConfigs(_configuration).build()); SegmentPreProcessor v1Processor = @@ -332,7 +341,8 @@ public void testEnableFSTIndexOnNewColumnDictEncoded() constructV3Segment(); checkFSTIndexCreation(NEWLY_ADDED_FST_COL_DICT, 1, 1, _newColumnsSchemaWithFST, true, true, 4); - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); checkFSTIndexCreation(NEWLY_ADDED_FST_COL_DICT, 1, 1, _newColumnsSchemaWithFST, true, true, 4); } @@ -349,7 +359,8 @@ public void testEnableFSTIndexOnExistingColumnDictEncoded() assertNotNull(columnMetadata); checkFSTIndexCreation(EXISTING_STRING_COL_DICT, 9, 4, _newColumnsSchemaWithFST, false, false, 26); - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); segmentMetadata = new SegmentMetadataImpl(_indexDir); columnMetadata = segmentMetadata.getColumnMetadataFor(EXISTING_STRING_COL_DICT); assertNotNull(columnMetadata); @@ -360,7 +371,8 @@ public void testEnableFSTIndexOnExistingColumnDictEncoded() public void testSimpleEnableDictionarySV() throws Exception { // TEST 1. Check running forwardIndexHandler on a V1 segment. No-op for all existing raw columns. - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); checkForwardIndexCreation(EXISTING_STRING_COL_RAW, 5, 3, _schema, false, false, false, 0, ChunkCompressionType.LZ4, true, 0, DataType.STRING, 100000); validateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_INT_COL_RAW, 42242, 16, _schema, false, false, false, 0, true, @@ -388,7 +400,8 @@ public void testSimpleEnableDictionaryMV() _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_INT_COL_RAW_MV); // TEST 1. Check running forwardIndexHandler on a V1 segment. No-op for all existing raw columns. - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); checkForwardIndexCreation(EXISTING_INT_COL_RAW_MV, 18499, 15, _schema, false, false, false, 0, ChunkCompressionType.LZ4, false, 13, DataType.INT, 106688); @@ -408,7 +421,8 @@ public void testEnableDictAndOtherIndexesSV() // enabled columns _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_INT_COL_RAW); _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_STRING_COL_RAW); - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); new SegmentV1V2ToV3FormatConverter().convert(_indexDir); // TEST 1: EXISTING_STRING_COL_RAW. Enable dictionary. Also add inverted index and text index. Reload code path @@ -426,7 +440,8 @@ public void testEnableDictAndOtherIndexesSV() // TEST 2: EXISTING_STRING_COL_RAW. Enable dictionary on a raw column that already has text index. List textIndexCols = new ArrayList<>(); textIndexCols.add(EXISTING_STRING_COL_RAW); - constructV1Segment(Collections.emptyList(), textIndexCols, Collections.emptyList()); + constructV1Segment(Collections.emptyList(), textIndexCols, Collections.emptyList(), + Collections.emptyList()); new SegmentV1V2ToV3FormatConverter().convert(_indexDir); validateIndex(ColumnIndexType.TEXT_INDEX, EXISTING_STRING_COL_RAW, 5, 3, _schema, false, false, false, 0, true, 0, null, false, DataType.STRING, 100000); @@ -442,7 +457,7 @@ public void testEnableDictAndOtherIndexesSV() // TEST 3: EXISTING_INT_COL_RAW. Enable dictionary on a column that already has range index. List rangeIndexCols = new ArrayList<>(); rangeIndexCols.add(EXISTING_INT_COL_RAW); - constructV1Segment(Collections.emptyList(), Collections.emptyList(), rangeIndexCols); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), rangeIndexCols, Collections.emptyList()); new SegmentV1V2ToV3FormatConverter().convert(_indexDir); validateIndex(ColumnIndexType.RANGE_INDEX, EXISTING_INT_COL_RAW, 42242, 16, _schema, false, false, false, 0, true, 0, ChunkCompressionType.LZ4, false, DataType.INT, 100000); @@ -471,7 +486,8 @@ public void testEnableDictAndOtherIndexesMV() // Add raw columns in indexingConfig so that the ForwardIndexHandler doesn't end up converting them to dictionary // enabled columns _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_INT_COL_RAW_MV); - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); new SegmentV1V2ToV3FormatConverter().convert(_indexDir); // TEST 1: EXISTING_INT_COL_RAW_MV. Enable dictionary for an MV column. Also enable inverted index and range index. @@ -488,7 +504,7 @@ public void testEnableDictAndOtherIndexesMV() // TEST 2: EXISTING_INT_COL_RAW_MV. Enable dictionary for an MV column that already has range index. List rangeIndexCols = new ArrayList<>(); rangeIndexCols.add(EXISTING_INT_COL_RAW_MV); - constructV1Segment(Collections.emptyList(), Collections.emptyList(), rangeIndexCols); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), rangeIndexCols, Collections.emptyList()); new SegmentV1V2ToV3FormatConverter().convert(_indexDir); validateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_INT_COL_RAW_MV, 18499, 15, _schema, false, false, false, 0, false, 13, ChunkCompressionType.LZ4, false, DataType.INT, 106688); @@ -507,7 +523,8 @@ public void testEnableDictAndOtherIndexesMV() public void testSimpleDisableDictionary() throws Exception { // TEST 1. Check running forwardIndexHandler on a V1 segment. No-op for all existing dict columns. - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); checkForwardIndexCreation(EXISTING_STRING_COL_DICT, 9, 4, _schema, false, true, false, 26, null, true, 0, DataType.STRING, 100000); validateIndex(ColumnIndexType.FORWARD_INDEX, COLUMN10_NAME, 3960, 12, _schema, false, true, false, 0, true, 0, null, @@ -531,7 +548,8 @@ public void testSimpleDisableDictionary() public void testDisableDictAndOtherIndexesSV() throws Exception { // Validate No-op. - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); new SegmentV1V2ToV3FormatConverter().convert(_indexDir); // TEST 1: Disable dictionary on a column that has inverted index. Should be a no-op and column should still have @@ -549,7 +567,7 @@ public void testDisableDictAndOtherIndexesSV() // TEST 3: Disable dictionary for a column (Column10) that has range index. List rangeIndexCols = new ArrayList<>(); rangeIndexCols.add(COLUMN10_NAME); - constructV1Segment(Collections.emptyList(), Collections.emptyList(), rangeIndexCols); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), rangeIndexCols, Collections.emptyList()); new SegmentV1V2ToV3FormatConverter().convert(_indexDir); validateIndex(ColumnIndexType.FORWARD_INDEX, COLUMN10_NAME, 3960, 12, _schema, false, true, false, 0, true, 0, null, false, DataType.INT, 100000); @@ -585,7 +603,8 @@ public void testDisableDictAndOtherIndexesSV() public void testDisableDictAndOtherIndexesMV() throws Exception { // Set up: Enable dictionary on MV column6 and validate. - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); _indexLoadingConfig.getNoDictionaryColumns().remove(EXISTING_INT_COL_RAW_MV); _indexLoadingConfig.getRangeIndexColumns().add(EXISTING_INT_COL_RAW_MV); @@ -603,7 +622,8 @@ public void testDisableDictAndOtherIndexesMV() false, 13, null, false, DataType.INT, 106688); // TEST 2. Disable dictionary on a column where inverted index is enabled. Should be a no-op. - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); new SegmentV1V2ToV3FormatConverter().convert(_indexDir); validateIndex(ColumnIndexType.FORWARD_INDEX, COLUMN7_NAME, 359, 9, _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, false, 24, null, false, DataType.INT, 134090); @@ -617,7 +637,8 @@ public void testDisableDictAndOtherIndexesMV() false, true, false, 0, false, 24, null, false, DataType.INT, 134090); // TEST 3: Disable dictionary and disable inverted index on column7. - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); new SegmentV1V2ToV3FormatConverter().convert(_indexDir); _indexLoadingConfig.getNoDictionaryColumns().add(COLUMN7_NAME); @@ -636,7 +657,8 @@ public void testForwardIndexHandlerChangeCompression() _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_STRING_COL_RAW); // Test1: Rewriting forward index will be a no-op for v1 segments. Default LZ4 compressionType will be retained. - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); checkForwardIndexCreation(EXISTING_STRING_COL_RAW, 5, 3, _schema, false, false, false, 0, ChunkCompressionType.LZ4, true, 0, DataType.STRING, 100000); @@ -685,7 +707,8 @@ public void testForwardIndexHandlerChangeCompression() _indexLoadingConfig.setCompressionConfigs(compressionConfigs); _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_INT_COL_RAW_MV); - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); new SegmentV1V2ToV3FormatConverter().convert(_indexDir); checkForwardIndexCreation(EXISTING_INT_COL_RAW_MV, 18499, 15, _schema, false, false, false, 0, ChunkCompressionType.ZSTANDARD, false, 13, DataType.INT, 106688); @@ -719,7 +742,8 @@ public void testEnableTextIndexOnNewColumnDictEncoded() 1); // Create a segment in V1, add a new dict encoded column with text index enabled - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); segmentMetadata = new SegmentMetadataImpl(_indexDir); columnMetadata = segmentMetadata.getColumnMetadataFor(NEWLY_ADDED_STRING_COL_RAW); // should be null since column does not exist in the schema @@ -748,7 +772,8 @@ public void testEnableTextIndexOnExistingRawColumn() checkTextIndexCreation(EXISTING_STRING_COL_RAW, 5, 3, _schema, false, false, false, 0); // Create a segment in V1, add a new column with text index enabled - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); segmentMetadata = new SegmentMetadataImpl(_indexDir); columnMetadata = segmentMetadata.getColumnMetadataFor(EXISTING_STRING_COL_RAW); assertNotNull(columnMetadata); @@ -778,7 +803,8 @@ public void testEnableTextIndexOnExistingDictEncodedColumn() checkTextIndexCreation(EXISTING_STRING_COL_DICT, 9, 4, _schema, false, true, false, 26); // Create a segment in V1, add a new column with text index enabled - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); segmentMetadata = new SegmentMetadataImpl(_indexDir); columnMetadata = segmentMetadata.getColumnMetadataFor(EXISTING_STRING_COL_RAW); assertNotNull(columnMetadata); @@ -866,7 +892,7 @@ private void validateIndex(ColumnIndexType indexType, String column, int cardina assertTrue(reader.hasIndexFor(column, ColumnIndexType.FORWARD_INDEX)); assertFalse(reader.hasIndexFor(column, ColumnIndexType.INVERTED_INDEX)); } else if (forwardIndexDisabled) { - if (segmentMetadata.getVersion() == SegmentVersion.v3) { + if (segmentMetadata.getVersion() == SegmentVersion.v3 || isAutoGenerated) { assertFalse(reader.hasIndexFor(column, ColumnIndexType.FORWARD_INDEX)); assertTrue(reader.hasIndexFor(column, ColumnIndexType.INVERTED_INDEX)); assertTrue(reader.hasIndexFor(column, ColumnIndexType.DICTIONARY)); @@ -912,10 +938,21 @@ private void validateIndex(ColumnIndexType indexType, String column, int cardina } } + private void validateIndexDoesNotExist(String column, ColumnIndexType indexType) + throws Exception { + try (SegmentDirectory segmentDirectory1 = SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader() + .load(_indexDir.toURI(), + new SegmentDirectoryLoaderContext.Builder().setSegmentDirectoryConfigs(_configuration).build()); + SegmentDirectory.Reader reader = segmentDirectory1.createReader()) { + assertFalse(reader.hasIndexFor(column, indexType)); + } + } + @Test public void testV1CreateInvertedIndices() throws Exception { - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(_indexDir); assertEquals(segmentMetadata.getVersion(), SegmentVersion.v1); @@ -1045,7 +1082,8 @@ private void checkInvertedIndexCreation(boolean reCreate) @Test public void testV1UpdateDefaultColumns() throws Exception { - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); IngestionConfig ingestionConfig = new IngestionConfig(); ingestionConfig.setTransformConfigs( Collections.singletonList(new TransformConfig(NEW_INT_SV_DIMENSION_COLUMN_NAME, "plus(column1, 1)"))); @@ -1259,7 +1297,8 @@ private void checkUpdateDefaultColumns() @Test public void testColumnMinMaxValue() throws Exception { - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); // Remove min/max value from the metadata removeMinMaxValuesFromMetadataFile(_indexDir); @@ -1341,7 +1380,8 @@ public void testColumnMinMaxValue() @Test public void testV1CleanupIndices() throws Exception { - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(_indexDir); assertEquals(segmentMetadata.getVersion(), SegmentVersion.v1); @@ -1466,7 +1506,8 @@ public void testV3CleanupIndices() @Test public void testV1CleanupH3AndTextIndices() throws Exception { - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); // Remove all indices and add the two derived columns for H3 and Json index. try (SegmentDirectory segmentDirectory = SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader() @@ -1577,7 +1618,8 @@ public void testV3CleanupH3AndTextIndices() @Test public void testV1IfNeedProcess() throws Exception { - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(_indexDir); assertEquals(segmentMetadata.getVersion(), SegmentVersion.v1); @@ -1594,6 +1636,18 @@ public void testV3IfNeedProcess() testIfNeedProcess(); } + @Test + public void testV3IfNeedProcessWithForwardIndexDisabledColumn() + throws Exception { + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.singletonList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV)); + new SegmentV1V2ToV3FormatConverter().convert(_indexDir); + SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(_indexDir); + assertEquals(segmentMetadata.getVersion(), SegmentVersion.v3); + + testIfNeedProcess(); + } + private void testIfNeedProcess() throws Exception { // There are a few indices initially. Require to remove them with an empty IndexLoadingConfig. @@ -1856,7 +1910,8 @@ public void testForwardIndexDisabledOnNewColumnsSV() _newColumnsSchemaWithForwardIndexDisabled, true, true, true, 4, true, 0, null, true, DataType.STRING, 100000); // Create a segment in V1, add a column with no forward index enabled - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); segmentMetadata = new SegmentMetadataImpl(_indexDir); columnMetadata = segmentMetadata.getColumnMetadataFor(NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_SV); // should be null since column does not exist in the schema @@ -1888,7 +1943,8 @@ public void testForwardIndexDisabledOnNewColumnsSV() + "forward index"); } - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); segmentMetadata = new SegmentMetadataImpl(_indexDir); columnMetadata = segmentMetadata.getColumnMetadataFor(NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_SV); // should be null since column does not exist in the schema @@ -1924,7 +1980,8 @@ public void testForwardIndexDisabledOnNewColumnsSV() NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_SV, 1, 1, _newColumnsSchemaWithForwardIndexDisabled, true, true, true, 4, true, 0, null, false, DataType.STRING, 100000); - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); segmentMetadata = new SegmentMetadataImpl(_indexDir); columnMetadata = segmentMetadata.getColumnMetadataFor(NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_SV); // should be null since column does not exist in the schema @@ -1959,7 +2016,8 @@ public void testForwardIndexDisabledOnNewColumnsSV() + "newForwardIndexDisabledColumnSV"); } - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); segmentMetadata = new SegmentMetadataImpl(_indexDir); columnMetadata = segmentMetadata.getColumnMetadataFor(NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_SV); // should be null since column does not exist in the schema @@ -1969,6 +2027,7 @@ public void testForwardIndexDisabledOnNewColumnsSV() createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_SV, 1, 1, _newColumnsSchemaWithForwardIndexDisabled, true, true, true, 4, true, 0, null, false, DataType.STRING, 100000); + Assert.fail("Should not be able to disable forward index without inverted index for column"); } catch (IllegalStateException e) { assertEquals(e.getMessage(), "Inverted index must be enabled for forward index disabled column: " + "newForwardIndexDisabledColumnSV"); @@ -1979,9 +2038,8 @@ public void testForwardIndexDisabledOnNewColumnsSV() /** * Test to check the behavior of the forward index disabled feature when enabled on a new MV column - * TODO: Add support for handling the forwardIndexDisabled flag on the reload path then enable and fix this test */ - @Test(enabled = false) + @Test public void testForwardIndexDisabledOnNewColumnsMV() throws Exception { Set forwardIndexDisabledColumns = new HashSet<>(); @@ -2005,7 +2063,8 @@ public void testForwardIndexDisabledOnNewColumnsMV() _newColumnsSchemaWithForwardIndexDisabled, true, true, false, 4, false, 1, null, true, DataType.STRING, 100000); // Create a segment in V1, add a column with no forward index enabled - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); segmentMetadata = new SegmentMetadataImpl(_indexDir); columnMetadata = segmentMetadata.getColumnMetadataFor(NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_MV); // should be null since column does not exist in the schema @@ -2028,21 +2087,32 @@ public void testForwardIndexDisabledOnNewColumnsMV() // should be null since column does not exist in the schema assertNull(columnMetadata); - assertThrows(IllegalStateException.class, - () -> createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_MV, 1, 1, - _newColumnsSchemaWithForwardIndexDisabled, true, true, false, 4, false, 1, null, false, DataType.STRING, - 100000)); + try { + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_MV, 1, 1, + _newColumnsSchemaWithForwardIndexDisabled, true, true, false, 4, false, 1, null, false, DataType.STRING, + 100000); + Assert.fail("Should not be able to disable forward index for raw column"); + } catch (IllegalStateException e) { + assertEquals(e.getMessage(), "Dictionary disabled column: newForwardIndexDisabledColumnMV cannot disable the " + + "forward index"); + } - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); segmentMetadata = new SegmentMetadataImpl(_indexDir); columnMetadata = segmentMetadata.getColumnMetadataFor(NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_MV); // should be null since column does not exist in the schema assertNull(columnMetadata); - assertThrows(IllegalStateException.class, - () -> createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_MV, 1, 1, - _newColumnsSchemaWithForwardIndexDisabled, true, true, false, 4, false, 1, null, false, DataType.STRING, - 100000)); + try { + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_MV, 1, 1, + _newColumnsSchemaWithForwardIndexDisabled, true, true, false, 4, false, 1, null, false, DataType.STRING, + 100000); + Assert.fail("Should not be able to disable forward index for raw column"); + } catch (IllegalStateException e) { + assertEquals(e.getMessage(), "Dictionary disabled column: newForwardIndexDisabledColumnMV cannot disable the " + + "forward index"); + } // Reset the no dictionary columns _indexLoadingConfig.setNoDictionaryColumns(existingNoDictionaryColumns); @@ -2058,21 +2128,32 @@ public void testForwardIndexDisabledOnNewColumnsMV() // should be null since column does not exist in the schema assertNull(columnMetadata); - assertThrows(IllegalStateException.class, - () -> createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_MV, 1, 1, - _newColumnsSchemaWithForwardIndexDisabled, true, true, false, 4, false, 1, null, false, DataType.STRING, - 100000)); + try { + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_MV, 1, 1, + _newColumnsSchemaWithForwardIndexDisabled, true, true, false, 4, false, 1, null, false, DataType.STRING, + 100000); + Assert.fail("Should not be able to disable forward index for raw column"); + } catch (IllegalStateException e) { + assertEquals(e.getMessage(), "Inverted index must be enabled for forward index disabled column: " + + "newForwardIndexDisabledColumnMV"); + } - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); segmentMetadata = new SegmentMetadataImpl(_indexDir); columnMetadata = segmentMetadata.getColumnMetadataFor(NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_MV); // should be null since column does not exist in the schema assertNull(columnMetadata); - assertThrows(IllegalStateException.class, - () -> createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_MV, 1, 1, - _newColumnsSchemaWithForwardIndexDisabled, true, true, false, 4, false, 1, null, false, DataType.STRING, - 100000)); + try { + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, NEWLY_ADDED_FORWARD_INDEX_DISABLED_COL_MV, 1, 1, + _newColumnsSchemaWithForwardIndexDisabled, true, true, false, 4, false, 1, null, false, DataType.STRING, + 100000); + Assert.fail("Should not be able to disable forward index for raw column"); + } catch (IllegalStateException e) { + assertEquals(e.getMessage(), "Inverted index must be enabled for forward index disabled column: " + + "newForwardIndexDisabledColumnMV"); + } _indexLoadingConfig.setForwardIndexDisabledColumns(new HashSet<>()); } @@ -2099,7 +2180,8 @@ public void testForwardIndexDisabledOnExistingColumnDictEncoded() _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 26, true, 0, null, true, DataType.STRING, 100000); - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_STRING_COL_DICT, 9, 4, _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 26, true, 0, null, true, DataType.STRING, @@ -2228,7 +2310,8 @@ public void testForwardIndexDisabledOnExistingColumnRaw() _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 4, true, 0, null, true, DataType.STRING, 100000); - constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList()); // No dictionary is created nor is the forward index disabled since this is a v1 type segment createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_STRING_COL_RAW, 5, 3, @@ -2347,7 +2430,7 @@ public void testForwardIndexDisabledOnExistingColumnRaw() // has range index. List rangeIndexCols = new ArrayList<>(); rangeIndexCols.add(EXISTING_INT_COL_RAW); - constructV1Segment(Collections.emptyList(), Collections.emptyList(), rangeIndexCols); + constructV1Segment(Collections.emptyList(), Collections.emptyList(), rangeIndexCols, Collections.emptyList()); new SegmentV1V2ToV3FormatConverter().convert(_indexDir); validateIndex(ColumnIndexType.RANGE_INDEX, EXISTING_INT_COL_RAW, 42242, 16, _schema, false, false, false, 0, true, 0, ChunkCompressionType.LZ4, false, DataType.INT, 100000); @@ -2370,4 +2453,433 @@ public void testForwardIndexDisabledOnExistingColumnRaw() // Add it back so that this column is not rewritten for the other tests below. _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_INT_COL_RAW); } + + /** + * Test to check the behavior of enabling a dictionary based forward + * index on a column which has forward index disabled + */ + @Test + public void testForwardIndexEnabledWithDictOnExistingForwardIndexDisabledColumn() + throws Exception { + Set forwardIndexDisabledColumns = new HashSet<>(); + forwardIndexDisabledColumns.add(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + Set invertedIndexColumns = _indexLoadingConfig.getInvertedIndexColumns(); + invertedIndexColumns.addAll(forwardIndexDisabledColumns); + + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.singletonList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV)); + new SegmentV1V2ToV3FormatConverter().convert(_indexDir); + SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(_indexDir); + ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + assertNotNull(columnMetadata); + + // Forward index should be disabled for column10 + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, true, DataType.INT, + 100000); + + // At this point, the segment has forward index disabled for column10. Enable the forward index and invoke reload + forwardIndexDisabledColumns = new HashSet<>(); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + + // Forward index should be enabled for column10 along with inverted index and dictionary + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, false, DataType.INT, + 100000); + validateIndex(ColumnIndexType.DICTIONARY, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, false, DataType.INT, + 100000); + validateIndex(ColumnIndexType.INVERTED_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, false, DataType.INT, + 100000); + + // Enable the forward index but disable the inverted index + forwardIndexDisabledColumns = new HashSet<>(); + forwardIndexDisabledColumns.add(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + invertedIndexColumns = _indexLoadingConfig.getInvertedIndexColumns(); + invertedIndexColumns.addAll(forwardIndexDisabledColumns); + + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.singletonList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV)); + new SegmentV1V2ToV3FormatConverter().convert(_indexDir); + segmentMetadata = new SegmentMetadataImpl(_indexDir); + columnMetadata = segmentMetadata.getColumnMetadataFor(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + assertNotNull(columnMetadata); + + // Forward index should be disabled for column10 + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, true, DataType.INT, + 100000); + + // At this point, the segment has forward index disabled for column10. Enable the forward index and invoke reload. + // Disable the inverted index and ensure it gets removed. + forwardIndexDisabledColumns = new HashSet<>(); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + _indexLoadingConfig.getInvertedIndexColumns().remove(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + + // Forward index should be enabled for column10 dictionary. Inverted index should be disabled. + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, false, DataType.INT, + 100000); + validateIndex(ColumnIndexType.DICTIONARY, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, false, DataType.INT, + 100000); + validateIndexDoesNotExist(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, ColumnIndexType.INVERTED_INDEX); + + // Enable the forward index for two columns at once, one being a MV column + forwardIndexDisabledColumns = new HashSet<>(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_FORWARD_INDEX_DISABLED_COL_MV)); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + invertedIndexColumns = _indexLoadingConfig.getInvertedIndexColumns(); + invertedIndexColumns.addAll(forwardIndexDisabledColumns); + + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, EXISTING_FORWARD_INDEX_DISABLED_COL_MV)); + new SegmentV1V2ToV3FormatConverter().convert(_indexDir); + segmentMetadata = new SegmentMetadataImpl(_indexDir); + columnMetadata = segmentMetadata.getColumnMetadataFor(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + assertNotNull(columnMetadata); + ColumnMetadata columnMetadata2 = segmentMetadata.getColumnMetadataFor(EXISTING_FORWARD_INDEX_DISABLED_COL_MV); + assertNotNull(columnMetadata2); + + // Forward index should be disabled for column10 and column7 + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, true, DataType.INT, + 100000); + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, false, 24, null, true, DataType.INT, + 134090); + + // At this point, the segment has forward index disabled for column10 and column7. Enable the forward index and + // invoke reload. + forwardIndexDisabledColumns = new HashSet<>(); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + + // Forward index should be enabled for column10 and column7 along with inverted index and dictionary + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, false, DataType.INT, + 100000); + validateIndex(ColumnIndexType.DICTIONARY, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, false, DataType.INT, + 100000); + validateIndex(ColumnIndexType.INVERTED_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, false, DataType.INT, + 100000); + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, false, 24, null, false, DataType.INT, + 134090); + validateIndex(ColumnIndexType.DICTIONARY, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, false, 24, null, false, DataType.INT, + 134090); + validateIndex(ColumnIndexType.INVERTED_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, false, 24, null, false, DataType.INT, + 134090); + + // Enable the forward index for multiple columns at once and enable other indexes + forwardIndexDisabledColumns = new HashSet<>(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_FORWARD_INDEX_DISABLED_COL_MV, EXISTING_STRING_COL_DICT)); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + invertedIndexColumns = _indexLoadingConfig.getInvertedIndexColumns(); + invertedIndexColumns.addAll(forwardIndexDisabledColumns); + + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, + EXISTING_STRING_COL_DICT)); + new SegmentV1V2ToV3FormatConverter().convert(_indexDir); + segmentMetadata = new SegmentMetadataImpl(_indexDir); + columnMetadata = segmentMetadata.getColumnMetadataFor(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + assertNotNull(columnMetadata); + columnMetadata2 = segmentMetadata.getColumnMetadataFor(EXISTING_FORWARD_INDEX_DISABLED_COL_MV); + assertNotNull(columnMetadata2); + ColumnMetadata columnMetadata3 = segmentMetadata.getColumnMetadataFor(EXISTING_STRING_COL_DICT); + assertNotNull(columnMetadata3); + + // Forward index should be disabled for column10 and column7 + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, true, DataType.INT, + 100000); + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, false, 24, null, true, DataType.INT, + 134090); + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_STRING_COL_DICT, 9, 4, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 26, true, 0, null, true, DataType.STRING, + 100000); + + // At this point, the segment has forward index disabled for column10, column5 and column7. Enable the forward + // index and invoke reload. + forwardIndexDisabledColumns = new HashSet<>(); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + _indexLoadingConfig.getRangeIndexColumns().addAll(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_FORWARD_INDEX_DISABLED_COL_MV)); + _indexLoadingConfig.getTextIndexColumns().add(EXISTING_STRING_COL_DICT); + _indexLoadingConfig.getFSTIndexColumns().add(EXISTING_STRING_COL_DICT); + _indexLoadingConfig.getInvertedIndexColumns().remove(EXISTING_FORWARD_INDEX_DISABLED_COL_MV); + + // Forward index should be enabled for column10 and column7 along with inverted index and dictionary + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, false, DataType.INT, + 100000); + validateIndex(ColumnIndexType.DICTIONARY, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, false, DataType.INT, + 100000); + validateIndex(ColumnIndexType.INVERTED_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, false, DataType.INT, + 100000); + validateIndex(ColumnIndexType.RANGE_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, false, DataType.INT, + 100000); + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, false, 24, null, false, DataType.INT, + 134090); + validateIndex(ColumnIndexType.DICTIONARY, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, false, 24, null, false, DataType.INT, + 134090); + validateIndexDoesNotExist(EXISTING_FORWARD_INDEX_DISABLED_COL_MV, ColumnIndexType.INVERTED_INDEX); + validateIndex(ColumnIndexType.RANGE_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, false, 24, null, false, DataType.INT, + 134090); + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_STRING_COL_DICT, 9, 4, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 26, true, 0, null, false, DataType.STRING, + 100000); + validateIndex(ColumnIndexType.DICTIONARY, EXISTING_STRING_COL_DICT, 9, 4, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 26, true, 0, null, false, DataType.STRING, + 100000); + validateIndex(ColumnIndexType.INVERTED_INDEX, EXISTING_STRING_COL_DICT, 9, 4, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 26, true, 0, null, false, DataType.STRING, + 100000); + validateIndex(ColumnIndexType.TEXT_INDEX, EXISTING_STRING_COL_DICT, 9, 4, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 26, true, 0, null, false, DataType.STRING, + 100000); + validateIndex(ColumnIndexType.FST_INDEX, EXISTING_STRING_COL_DICT, 9, 4, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 26, true, 0, null, false, DataType.STRING, + 100000); + + // Reset the indexLoadingConfig + _indexLoadingConfig.getRangeIndexColumns().removeAll(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_FORWARD_INDEX_DISABLED_COL_MV)); + _indexLoadingConfig.getTextIndexColumns().remove(EXISTING_STRING_COL_DICT); + _indexLoadingConfig.getFSTIndexColumns().remove(EXISTING_STRING_COL_DICT); + _indexLoadingConfig.getInvertedIndexColumns().removeAll(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_STRING_COL_DICT)); + } + + /** + * Test to check the behavior of enabling a no dictionary based forward + * index on a column which has forward index disabled + */ + @Test + public void testForwardIndexEnabledWithRawOnExistingForwardIndexDisabledColumn() + throws Exception { + Set forwardIndexDisabledColumns = new HashSet<>(); + forwardIndexDisabledColumns.add(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + Set invertedIndexColumns = _indexLoadingConfig.getInvertedIndexColumns(); + invertedIndexColumns.addAll(forwardIndexDisabledColumns); + + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.singletonList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV)); + new SegmentV1V2ToV3FormatConverter().convert(_indexDir); + SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(_indexDir); + ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + assertNotNull(columnMetadata); + + // Forward index should be disabled for column10 + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, true, DataType.INT, + 100000); + + // At this point, the segment has forward index disabled for column10. Enable the forward index, disable the + // dictionary and inverted index + forwardIndexDisabledColumns = new HashSet<>(); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + _indexLoadingConfig.getInvertedIndexColumns().remove(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + + // Forward index should be enabled for column10. Dictionary and inverted index shouldn't be present. + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, false, false, 0, true, 0, ChunkCompressionType.LZ4, false, + DataType.INT, 100000); + validateIndexDoesNotExist(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, ColumnIndexType.DICTIONARY); + validateIndexDoesNotExist(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, ColumnIndexType.INVERTED_INDEX); + + // Reset indexLoadingConfig + _indexLoadingConfig.getNoDictionaryColumns().remove(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + + // Enable the forward index for two columns at once, one being a MV column + forwardIndexDisabledColumns = new HashSet<>(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_FORWARD_INDEX_DISABLED_COL_MV)); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + invertedIndexColumns = _indexLoadingConfig.getInvertedIndexColumns(); + invertedIndexColumns.addAll(forwardIndexDisabledColumns); + + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, EXISTING_FORWARD_INDEX_DISABLED_COL_MV)); + new SegmentV1V2ToV3FormatConverter().convert(_indexDir); + segmentMetadata = new SegmentMetadataImpl(_indexDir); + columnMetadata = segmentMetadata.getColumnMetadataFor(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + assertNotNull(columnMetadata); + ColumnMetadata columnMetadata2 = segmentMetadata.getColumnMetadataFor(EXISTING_FORWARD_INDEX_DISABLED_COL_MV); + assertNotNull(columnMetadata2); + + // Forward index should be disabled for column10 and column7 + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, true, DataType.INT, + 100000); + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, false, 24, null, true, DataType.INT, + 134090); + + // At this point, the segment has forward index disabled for column10 and column7. Enable the forward index and + // invoke reload. + forwardIndexDisabledColumns = new HashSet<>(); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + _indexLoadingConfig.getInvertedIndexColumns().removeAll(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_FORWARD_INDEX_DISABLED_COL_MV)); + _indexLoadingConfig.getNoDictionaryColumns().addAll(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_FORWARD_INDEX_DISABLED_COL_MV)); + + // Forward index should be enabled for column10 and column7. No dictionary or inverted index should exist. + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, false, false, 0, true, 0, ChunkCompressionType.LZ4, false, + DataType.INT, 100000); + validateIndexDoesNotExist(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, ColumnIndexType.DICTIONARY); + validateIndexDoesNotExist(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, ColumnIndexType.INVERTED_INDEX); + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, false, false, 0, false, 24, ChunkCompressionType.LZ4, false, + DataType.INT, 134090); + validateIndexDoesNotExist(EXISTING_FORWARD_INDEX_DISABLED_COL_MV, ColumnIndexType.DICTIONARY); + validateIndexDoesNotExist(EXISTING_FORWARD_INDEX_DISABLED_COL_MV, ColumnIndexType.INVERTED_INDEX); + + // Reset indexLoadingConfig + _indexLoadingConfig.getNoDictionaryColumns().removeAll(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_FORWARD_INDEX_DISABLED_COL_MV)); + + // Enable the forward index for two columns at once, one as raw another as dictionary enabled + forwardIndexDisabledColumns = new HashSet<>(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_FORWARD_INDEX_DISABLED_COL_MV)); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + invertedIndexColumns = _indexLoadingConfig.getInvertedIndexColumns(); + invertedIndexColumns.addAll(forwardIndexDisabledColumns); + + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, EXISTING_FORWARD_INDEX_DISABLED_COL_MV)); + new SegmentV1V2ToV3FormatConverter().convert(_indexDir); + segmentMetadata = new SegmentMetadataImpl(_indexDir); + columnMetadata = segmentMetadata.getColumnMetadataFor(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + assertNotNull(columnMetadata); + columnMetadata2 = segmentMetadata.getColumnMetadataFor(EXISTING_FORWARD_INDEX_DISABLED_COL_MV); + assertNotNull(columnMetadata2); + + // Forward index should be disabled for column10 and column7 + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, true, DataType.INT, + 100000); + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, false, 24, null, true, DataType.INT, + 134090); + + // At this point, the segment has forward index disabled for column10 and column7. Enable the forward index and + // invoke reload. + forwardIndexDisabledColumns = new HashSet<>(); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + _indexLoadingConfig.getInvertedIndexColumns().remove(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + _indexLoadingConfig.getNoDictionaryColumns().add(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + + // Forward index should be enabled for column10 and column7. No dictionary or inverted index should exist. + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, false, false, 0, true, 0, ChunkCompressionType.LZ4, false, + DataType.INT, 100000); + validateIndexDoesNotExist(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, ColumnIndexType.DICTIONARY); + validateIndexDoesNotExist(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, ColumnIndexType.INVERTED_INDEX); + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, false, 24, null, false, DataType.INT, + 134090); + validateIndex(ColumnIndexType.DICTIONARY, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, false, 24, null, false, DataType.INT, + 134090); + validateIndex(ColumnIndexType.INVERTED_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, false, 24, null, false, DataType.INT, + 134090); + + // Reset indexLoadingConfig + _indexLoadingConfig.getNoDictionaryColumns().remove(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + _indexLoadingConfig.getInvertedIndexColumns().remove(EXISTING_FORWARD_INDEX_DISABLED_COL_MV); + + // Enable the forward index for multiple columns at once and enable other indexes + forwardIndexDisabledColumns = new HashSet<>(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_FORWARD_INDEX_DISABLED_COL_MV, EXISTING_STRING_COL_DICT)); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + invertedIndexColumns = _indexLoadingConfig.getInvertedIndexColumns(); + invertedIndexColumns.addAll(forwardIndexDisabledColumns); + + constructV1Segment(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, + EXISTING_STRING_COL_DICT)); + new SegmentV1V2ToV3FormatConverter().convert(_indexDir); + segmentMetadata = new SegmentMetadataImpl(_indexDir); + columnMetadata = segmentMetadata.getColumnMetadataFor(EXISTING_FORWARD_INDEX_DISABLED_COL_SV); + assertNotNull(columnMetadata); + columnMetadata2 = segmentMetadata.getColumnMetadataFor(EXISTING_FORWARD_INDEX_DISABLED_COL_MV); + assertNotNull(columnMetadata2); + ColumnMetadata columnMetadata3 = segmentMetadata.getColumnMetadataFor(EXISTING_STRING_COL_DICT); + assertNotNull(columnMetadata3); + + // Forward index should be disabled for column10 and column7 + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, true, 0, null, true, DataType.INT, + 100000); + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 0, false, 24, null, true, DataType.INT, + 134090); + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_STRING_COL_DICT, 9, 4, + _newColumnsSchemaWithForwardIndexDisabled, false, true, false, 26, true, 0, null, true, DataType.STRING, + 100000); + + // At this point, the segment has forward index disabled for column10, column5 and column7. Enable the forward + // index and invoke reload. + forwardIndexDisabledColumns = new HashSet<>(); + _indexLoadingConfig.setForwardIndexDisabledColumns(forwardIndexDisabledColumns); + _indexLoadingConfig.getRangeIndexColumns().addAll(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_FORWARD_INDEX_DISABLED_COL_MV)); + _indexLoadingConfig.getTextIndexColumns().add(EXISTING_STRING_COL_DICT); + _indexLoadingConfig.getInvertedIndexColumns().removeAll(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_FORWARD_INDEX_DISABLED_COL_MV, EXISTING_STRING_COL_DICT)); + _indexLoadingConfig.getNoDictionaryColumns().addAll(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_FORWARD_INDEX_DISABLED_COL_MV, EXISTING_STRING_COL_DICT)); + + // Forward index should be enabled for column10 and column7. Dictionary and inverted index should not exist + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, false, false, 0, true, 0, ChunkCompressionType.LZ4, false, + DataType.INT, 100000); + validateIndexDoesNotExist(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, ColumnIndexType.DICTIONARY); + validateIndexDoesNotExist(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, ColumnIndexType.INVERTED_INDEX); + validateIndex(ColumnIndexType.RANGE_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_SV, 3960, 12, + _newColumnsSchemaWithForwardIndexDisabled, false, false, false, 0, true, 0, ChunkCompressionType.LZ4, false, + DataType.INT, 100000); + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, false, false, 0, false, 24, ChunkCompressionType.LZ4, false, + DataType.INT, 134090); + validateIndexDoesNotExist(EXISTING_FORWARD_INDEX_DISABLED_COL_MV, ColumnIndexType.DICTIONARY); + validateIndexDoesNotExist(EXISTING_FORWARD_INDEX_DISABLED_COL_MV, ColumnIndexType.INVERTED_INDEX); + validateIndex(ColumnIndexType.RANGE_INDEX, EXISTING_FORWARD_INDEX_DISABLED_COL_MV, 359, 9, + _newColumnsSchemaWithForwardIndexDisabled, false, false, false, 0, false, 24, ChunkCompressionType.LZ4, false, + DataType.INT, 134090); + createAndValidateIndex(ColumnIndexType.FORWARD_INDEX, EXISTING_STRING_COL_DICT, 9, 4, + _newColumnsSchemaWithForwardIndexDisabled, false, false, false, 0, true, 0, ChunkCompressionType.LZ4, false, + DataType.STRING, 100000); + validateIndexDoesNotExist(EXISTING_STRING_COL_DICT, ColumnIndexType.DICTIONARY); + validateIndexDoesNotExist(EXISTING_STRING_COL_DICT, ColumnIndexType.INVERTED_INDEX); + validateIndex(ColumnIndexType.TEXT_INDEX, EXISTING_STRING_COL_DICT, 9, 4, + _newColumnsSchemaWithForwardIndexDisabled, false, false, false, 0, true, 0, ChunkCompressionType.LZ4, false, + DataType.STRING, 100000); + + // Reset the indexLoadingConfig + _indexLoadingConfig.getRangeIndexColumns().removeAll(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_FORWARD_INDEX_DISABLED_COL_MV)); + _indexLoadingConfig.getTextIndexColumns().remove(EXISTING_STRING_COL_DICT); + _indexLoadingConfig.getNoDictionaryColumns().removeAll(Arrays.asList(EXISTING_FORWARD_INDEX_DISABLED_COL_SV, + EXISTING_FORWARD_INDEX_DISABLED_COL_MV, EXISTING_STRING_COL_DICT)); + } }