diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java index d3bc36e16f35..6b4d04012ffc 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java @@ -18,6 +18,7 @@ */ package org.apache.pinot.sql.parsers; +import com.google.common.annotations.VisibleForTesting; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -56,6 +57,7 @@ import org.apache.pinot.common.request.PinotQuery; import org.apache.pinot.common.utils.request.RequestUtils; import org.apache.pinot.segment.spi.AggregationFunctionType; +import org.apache.pinot.spi.utils.Pairs; import org.apache.pinot.sql.parsers.rewriter.QueryRewriter; import org.apache.pinot.sql.parsers.rewriter.QueryRewriterFactory; import org.slf4j.Logger; @@ -117,7 +119,10 @@ private static String removeTerminatingSemicolon(String sql) { public static PinotQuery compileToPinotQuery(String sql) throws SqlCompilationException { - // Removes the terminating semicolon if any + // Remove the comments from the query + sql = removeComments(sql); + + // Remove the terminating semicolon from the query sql = removeTerminatingSemicolon(sql); // Extract OPTION statements from sql as Calcite Parser doesn't parse it. @@ -418,6 +423,103 @@ private static String removeOptionsFromSql(String sql) { return matcher.replaceAll(""); } + /** + * Removes comments from the query. + * NOTE: Comment indicator within single quotes (literal) and double quotes (identifier) are ignored. + */ + @VisibleForTesting + static String removeComments(String sql) { + boolean openSingleQuote = false; + boolean openDoubleQuote = false; + boolean commented = false; + boolean singleLineCommented = false; + boolean multiLineCommented = false; + int commentStartIndex = -1; + List commentedParts = new ArrayList<>(); + + int length = sql.length(); + int index = 0; + while (index < length) { + switch (sql.charAt(index)) { + case '\'': + if (!commented && !openDoubleQuote) { + openSingleQuote = !openSingleQuote; + } + break; + case '"': + if (!commented && !openSingleQuote) { + openDoubleQuote = !openDoubleQuote; + } + break; + case '-': + // Single line comment start indicator: -- + if (!commented && !openSingleQuote && !openDoubleQuote && index < length - 1 + && sql.charAt(index + 1) == '-') { + commented = true; + singleLineCommented = true; + commentStartIndex = index; + index++; + } + break; + case '\n': + // Single line comment end indicator: \n + if (singleLineCommented) { + commentedParts.add(new Pairs.IntPair(commentStartIndex, index + 1)); + commented = false; + singleLineCommented = false; + commentStartIndex = -1; + } + break; + case '/': + // Multi-line comment start indicator: /* + if (!commented && !openSingleQuote && !openDoubleQuote && index < length - 1 + && sql.charAt(index + 1) == '*') { + commented = true; + multiLineCommented = true; + commentStartIndex = index; + index++; + } + break; + case '*': + // Multi-line comment end indicator: */ + if (multiLineCommented && index < length - 1 && sql.charAt(index + 1) == '/') { + commentedParts.add(new Pairs.IntPair(commentStartIndex, index + 2)); + commented = false; + multiLineCommented = false; + commentStartIndex = -1; + index++; + } + break; + default: + break; + } + index++; + } + + if (commentedParts.isEmpty()) { + if (singleLineCommented) { + return sql.substring(0, commentStartIndex); + } else { + return sql; + } + } else { + StringBuilder stringBuilder = new StringBuilder(); + int startIndex = 0; + for (Pairs.IntPair commentedPart : commentedParts) { + stringBuilder.append(sql, startIndex, commentedPart.getLeft()).append(' '); + startIndex = commentedPart.getRight(); + } + if (startIndex < length) { + if (singleLineCommented) { + stringBuilder.append(sql, startIndex, commentStartIndex); + } else { + stringBuilder.append(sql, startIndex, length); + } + } + return stringBuilder.toString(); + } + } + private static List convertDistinctSelectList(SqlNodeList selectList) { List selectExpr = new ArrayList<>(); selectExpr.add(convertDistinctAndSelectListToFunctionExpression(selectList)); diff --git a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java index 2500a50097de..defad35041dc 100644 --- a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java @@ -656,6 +656,56 @@ public void testQueryOptions() { Assert.assertEquals(pinotQuery.getQueryOptions().get("bar"), "'potato'"); } + @Test + public void testRemoveComments() { + testRemoveComments("select * from myTable", "select * from myTable"); + testRemoveComments("select * from myTable--hello", "select * from myTable"); + testRemoveComments("select * from myTable--hello\n", "select * from myTable"); + testRemoveComments("select * from--hello\nmyTable", "select * from myTable"); + testRemoveComments("select * from/*hello*/myTable", "select * from myTable"); + // Multi-line comment must have end indicator + testRemoveComments("select * from myTable/*hello", "select * from myTable/*hello"); + testRemoveComments("select * from myTable--", "select * from myTable"); + testRemoveComments("select * from myTable--\n", "select * from myTable"); + testRemoveComments("select * from--\nmyTable", "select * from myTable"); + testRemoveComments("select * from/**/myTable", "select * from myTable"); + // End indicator itself has no effect + testRemoveComments("select * from\nmyTable", "select * from\nmyTable"); + testRemoveComments("select * from*/myTable", "select * from*/myTable"); + + // Mix of single line and multi-line comment indicators + testRemoveComments("select * from myTable--hello--world", "select * from myTable"); + testRemoveComments("select * from myTable--hello/*world", "select * from myTable"); + testRemoveComments("select * from myTable--hello\n--world", "select * from myTable"); + testRemoveComments("select * from myTable--hello\n/*--world*/", "select * from myTable"); + testRemoveComments("select * from myTable/*hello--world*/", "select * from myTable"); + testRemoveComments("select * from myTable/*hello--\nworld*/", "select * from myTable"); + testRemoveComments("select * from myTable/*hello*/--world", "select * from myTable"); + testRemoveComments("select * from myTable/*hello*/--world\n", "select * from myTable"); + + // Comment indicator within quotes + testRemoveComments("select * from \"myTable--hello\"", "select * from \"myTable--hello\""); + testRemoveComments("select * from \"myTable/*hello*/\"", "select * from \"myTable/*hello*/\""); + testRemoveComments("select '--' from myTable", "select '--' from myTable"); + testRemoveComments("select '/*' from myTable", "select '/*' from myTable"); + testRemoveComments("select '/**/' from myTable", "select '/**/' from myTable"); + testRemoveComments("select * from \"my\"\"Table--hello\"", "select * from \"my\"\"Table--hello\""); + testRemoveComments("select * from \"my\"\"Table/*hello*/\"", "select * from \"my\"\"Table/*hello*/\""); + testRemoveComments("select '''--' from myTable", "select '''--' from myTable"); + testRemoveComments("select '''/*' from myTable", "select '''/*' from myTable"); + testRemoveComments("select '''/**/' from myTable", "select '''/**/' from myTable"); + + // Comment indicator outside of quotes + testRemoveComments("select * from \"myTable\"--hello", "select * from \"myTable\""); + testRemoveComments("select * from \"myTable\"/*hello*/", "select * from \"myTable\""); + testRemoveComments("select ''--from myTable", "select ''"); + testRemoveComments("select ''/**/from myTable", "select '' from myTable"); + } + + private void testRemoveComments(String sqlWithComments, String expectedSqlWithoutComments) { + Assert.assertEquals(CalciteSqlParser.removeComments(sqlWithComments).trim(), expectedSqlWithoutComments.trim()); + } + @Test public void testIdentifierQuoteCharacter() { PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( @@ -2435,23 +2485,20 @@ public void testQueryWithSemicolon() { @Test public void testInvalidQueryWithSemicolon() { - Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery(";")); + Assert.expectThrows(SqlCompilationException.class, () -> CalciteSqlParser.compileToPinotQuery(";")); - Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery(";;;;")); + Assert.expectThrows(SqlCompilationException.class, () -> CalciteSqlParser.compileToPinotQuery(";;;;")); Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT col1, count(*) FROM foo GROUP BY ; col1")); + () -> CalciteSqlParser.compileToPinotQuery("SELECT col1, count(*) FROM foo GROUP BY ; col1")); // Query having multiple SQL statements - Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT col1, count(*) FROM foo GROUP BY col1; SELECT col2," - + "count(*) FROM foo GROUP BY col2")); + Assert.expectThrows(SqlCompilationException.class, () -> CalciteSqlParser.compileToPinotQuery( + "SELECT col1, count(*) FROM foo GROUP BY col1; SELECT col2," + "count(*) FROM foo GROUP BY col2")); // Query having multiple SQL statements with trailing and leading whitespaces - Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery(" SELECT col1, count(*) FROM foo GROUP BY col1; " - + "SELECT col2, count(*) FROM foo GROUP BY col2 ")); + Assert.expectThrows(SqlCompilationException.class, () -> CalciteSqlParser.compileToPinotQuery( + " SELECT col1, count(*) FROM foo GROUP BY col1; " + + "SELECT col2, count(*) FROM foo GROUP BY col2 ")); } }