From 75079f794d483b686f0c07d170ce4d479945d156 Mon Sep 17 00:00:00 2001 From: bowen xiao Date: Mon, 23 Sep 2024 11:59:50 -0700 Subject: [PATCH] Refactor pinot custom string query in pinot_query_validator (#6298) * refactor pinot custom string query * add comment --- .../pinot/pinot_visibility_store_test.go | 8 ++-- common/pinot/pinotQueryValidator.go | 6 +-- common/pinot/pinotQueryValidator_test.go | 48 +++++++++++-------- 3 files changed, 35 insertions(+), 27 deletions(-) diff --git a/common/persistence/pinot/pinot_visibility_store_test.go b/common/persistence/pinot/pinot_visibility_store_test.go index b585918c8cb..1e82f1a5505 100644 --- a/common/persistence/pinot/pinot_visibility_store_test.go +++ b/common/persistence/pinot/pinot_visibility_store_test.go @@ -1210,7 +1210,7 @@ LIMIT 0, 10 expectedOutput: fmt.Sprintf(`SELECT * FROM %s WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd' -AND (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'String and or order by*') +AND (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*String and or order by.*'')') Order BY StartTime DESC LIMIT 0, 10 `, testTableName), @@ -1228,7 +1228,7 @@ LIMIT 0, 10 expectedOutput: fmt.Sprintf(`SELECT * FROM %s WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd' -AND (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'Or*') or JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'and*')) +AND (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*Or.*'')') or JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*and.*'')')) Order by StartTime DESC LIMIT 0, 10 `, testTableName), @@ -1246,7 +1246,7 @@ LIMIT 0, 10 expectedOutput: fmt.Sprintf(`SELECT * FROM %s WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd' -AND WorkflowID = 'wid' and (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom and custom2 or custom3 order by*') or (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10)) +AND WorkflowID = 'wid' and (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*custom and custom2 or custom3 order by.*'')') or (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10)) Order BY StartTime DESC LIMIT 0, 10 `, testTableName), @@ -1300,7 +1300,7 @@ LIMIT 0, 10 expectedOutput: fmt.Sprintf(`SELECT * FROM %s WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd' -AND CloseStatus < 0 and (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10) and JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'String field is for text*') +AND CloseStatus < 0 and (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10) and JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*String field is for text.*'')') Order by DomainID Desc LIMIT 11, 10 `, testTableName), diff --git a/common/pinot/pinotQueryValidator.go b/common/pinot/pinotQueryValidator.go index 91e698d9ef7..e5316708b1b 100644 --- a/common/pinot/pinotQueryValidator.go +++ b/common/pinot/pinotQueryValidator.go @@ -462,13 +462,13 @@ func processCustomString(operator string, colNameStr string, colValStr string) s } func createCustomStringQuery(colNameStr string, colValStr string, notEqual string) string { + // handle edge case if colValStr == "" { return fmt.Sprintf("JSON_MATCH(Attr, '\"$.%s\" is not null') "+ - "AND %sREGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.%s', 'string'), '^$')", colNameStr, notEqual, colNameStr) + "AND %sJSON_MATCH(Attr, 'REGEXP_LIKE(\"$.%s\", ''^$'')')", colNameStr, notEqual, colNameStr) } - return fmt.Sprintf("JSON_MATCH(Attr, '\"$.%s\" is not null') "+ - "AND %sREGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.%s', 'string'), '%s*')", colNameStr, notEqual, colNameStr, colValStr) + "AND %sJSON_MATCH(Attr, 'REGEXP_LIKE(\"$.%s\", ''.*%s.*'')')", colNameStr, notEqual, colNameStr, colValStr) } func trimTimeFieldValueFromNanoToMilliSeconds(original *sqlparser.SQLVal) (*sqlparser.SQLVal, error) { diff --git a/common/pinot/pinotQueryValidator_test.go b/common/pinot/pinotQueryValidator_test.go index 3553a2904ac..ebf51d4fc0a 100644 --- a/common/pinot/pinotQueryValidator_test.go +++ b/common/pinot/pinotQueryValidator_test.go @@ -55,15 +55,23 @@ func TestValidateQuery(t *testing.T) { err: "right comparison is invalid: &{ wid { }}"}, "Case3-1: query with custom field": { query: "CustomStringField = 'custom'", - validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom*')", + validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*custom.*'')')", }, - "Case3-2: query with custom field value is empty": { + "Case3-2: query with custom field not equal": { + query: "CustomStringField != 'custom'", + validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*custom.*'')')", + }, + "Case3-3: query with custom field value is empty": { query: "CustomStringField = ''", - validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), '^$')", + validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''^$'')')", + }, + "Case3-4: query with custom field not equal to empty": { + query: "CustomStringField != ''", + validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''^$'')')", }, "Case4: custom field query with or in string": { query: "CustomStringField='Or'", - validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'Or*')", + validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*Or.*'')')", }, "Case5: custom keyword field query": { query: "CustomKeywordField = 'custom'", @@ -71,7 +79,7 @@ func TestValidateQuery(t *testing.T) { }, "Case6-1: complex query I: with parenthesis": { query: "(CustomStringField = 'custom and custom2 or custom3 order by') or CustomIntField between 1 and 10", - validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom and custom2 or custom3 order by*') or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))", + validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*custom and custom2 or custom3 order by.*'')') or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))", }, "Case6-2: complex query II: with only system keys": { query: "DomainID = 'd-id' and (RunID = 'run-id' or WorkflowID = 'wid')", @@ -83,7 +91,7 @@ func TestValidateQuery(t *testing.T) { }, "Case6-4: complex query IV": { query: "WorkflowID = 'wid' and (CustomStringField = 'custom and custom2 or custom3 order by' or CustomIntField between 1 and 10)", - validated: "WorkflowID = 'wid' and (JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom and custom2 or custom3 order by*') or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))", + validated: "WorkflowID = 'wid' and (JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*custom and custom2 or custom3 order by.*'')') or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))", }, "Case6-5: complex query with partial match": { query: "RunID like '123' or WorkflowID like '123'", @@ -303,11 +311,11 @@ func TestValidateQuery(t *testing.T) { }, "case22-1: test not equal to a string field": { query: "CustomStringField != 'abc'", - validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*')", + validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')')", }, "case22-2: test not equal to an empty string": { query: "CustomStringField != ''", - validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), '^$')", + validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''^$'')')", }, // ES also doesn't support this kind of query "case22-3: custom string is missing": { @@ -321,35 +329,35 @@ func TestValidateQuery(t *testing.T) { }, "case22-5: 2 custom string not equal with and clause": { query: "CustomStringField != 'abc' AND CustomStringField != 'def'", - validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*')", + validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')')", }, - "case22-6: 2 custom string , equal and not equal with and clause": { + "case22-6: 2 custom string, equal and not equal with and clause": { query: "CustomStringField = 'abc' AND CustomStringField != 'def'", - validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*')", + validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')')", }, - "case22-7: 2 custom string , not equal and equal with and clause": { + "case22-7: 2 custom string, not equal and equal with and clause": { query: "CustomStringField != 'abc' AND CustomStringField = 'def'", - validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*')", + validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')')", }, "case22-8: 2 custom string equal with and clause": { query: "CustomStringField = 'abc' AND CustomStringField = 'def'", - validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*')", + validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')')", }, "case22-9: 2 custom string not equal with or clause": { query: "CustomStringField != 'abc' OR CustomStringField != 'def'", - validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*'))", + validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')'))", }, - "case22-10: 2 custom string , equal and not equal with or clause": { + "case22-10: 2 custom string, equal and not equal with or clause": { query: "CustomStringField = 'abc' OR CustomStringField != 'def'", - validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*'))", + validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')'))", }, - "case22-11: 2 custom string , not equal and equal with or clause": { + "case22-11: 2 custom string, not equal and equal with or clause": { query: "CustomStringField != 'abc' OR CustomStringField = 'def'", - validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*'))", + validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')'))", }, "case22-12: 2 custom string equal with or clause": { query: "CustomStringField = 'abc' OR CustomStringField = 'def'", - validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*'))", + validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')'))", }, }