Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor pinot custom string query in pinot_query_validator #6298

Merged
merged 13 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions common/persistence/pinot/pinot_visibility_store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1210,7 +1210,7 @@ LIMIT 0, 10
expectedOutput: fmt.Sprintf(`SELECT *
FROM %s
WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd'
AND (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'String and or order by*')
AND (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*String and or order by.*'')')
Order BY StartTime DESC
LIMIT 0, 10
`, testTableName),
Expand All @@ -1228,7 +1228,7 @@ LIMIT 0, 10
expectedOutput: fmt.Sprintf(`SELECT *
FROM %s
WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd'
AND (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'Or*') or JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'and*'))
AND (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*Or.*'')') or JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*and.*'')'))
Order by StartTime DESC
LIMIT 0, 10
`, testTableName),
Expand All @@ -1246,7 +1246,7 @@ LIMIT 0, 10
expectedOutput: fmt.Sprintf(`SELECT *
FROM %s
WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd'
AND WorkflowID = 'wid' and (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom and custom2 or custom3 order by*') or (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))
AND WorkflowID = 'wid' and (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*custom and custom2 or custom3 order by.*'')') or (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))
Order BY StartTime DESC
LIMIT 0, 10
`, testTableName),
Expand Down Expand Up @@ -1300,7 +1300,7 @@ LIMIT 0, 10
expectedOutput: fmt.Sprintf(`SELECT *
FROM %s
WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd'
AND CloseStatus < 0 and (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10) and JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'String field is for text*')
AND CloseStatus < 0 and (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10) and JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*String field is for text.*'')')
Order by DomainID Desc
LIMIT 11, 10
`, testTableName),
Expand Down
6 changes: 3 additions & 3 deletions common/pinot/pinotQueryValidator.go
Original file line number Diff line number Diff line change
Expand Up @@ -462,13 +462,13 @@ func processCustomString(operator string, colNameStr string, colValStr string) s
}

func createCustomStringQuery(colNameStr string, colValStr string, notEqual string) string {
// handle edge case
if colValStr == "" {
return fmt.Sprintf("JSON_MATCH(Attr, '\"$.%s\" is not null') "+
"AND %sREGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.%s', 'string'), '^$')", colNameStr, notEqual, colNameStr)
"AND %sJSON_MATCH(Attr, 'REGEXP_LIKE(\"$.%s\", ''^$'')')", colNameStr, notEqual, colNameStr)
}

return fmt.Sprintf("JSON_MATCH(Attr, '\"$.%s\" is not null') "+
"AND %sREGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.%s', 'string'), '%s*')", colNameStr, notEqual, colNameStr, colValStr)
"AND %sJSON_MATCH(Attr, 'REGEXP_LIKE(\"$.%s\", ''.*%s.*'')')", colNameStr, notEqual, colNameStr, colValStr)
}

func trimTimeFieldValueFromNanoToMilliSeconds(original *sqlparser.SQLVal) (*sqlparser.SQLVal, error) {
Expand Down
48 changes: 28 additions & 20 deletions common/pinot/pinotQueryValidator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,23 +55,31 @@ func TestValidateQuery(t *testing.T) {
err: "right comparison is invalid: &{<nil> wid { }}"},
"Case3-1: query with custom field": {
query: "CustomStringField = 'custom'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom*')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*custom.*'')')",
},
"Case3-2: query with custom field value is empty": {
"Case3-2: query with custom field not equal": {
query: "CustomStringField != 'custom'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*custom.*'')')",
},
"Case3-3: query with custom field value is empty": {
query: "CustomStringField = ''",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), '^$')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''^$'')')",
},
"Case3-4: query with custom field not equal to empty": {
query: "CustomStringField != ''",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''^$'')')",
},
"Case4: custom field query with or in string": {
query: "CustomStringField='Or'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'Or*')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*Or.*'')')",
},
"Case5: custom keyword field query": {
query: "CustomKeywordField = 'custom'",
validated: "(JSON_MATCH(Attr, '\"$.CustomKeywordField\"=''custom''') or JSON_MATCH(Attr, '\"$.CustomKeywordField[*]\"=''custom'''))",
},
"Case6-1: complex query I: with parenthesis": {
query: "(CustomStringField = 'custom and custom2 or custom3 order by') or CustomIntField between 1 and 10",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom and custom2 or custom3 order by*') or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*custom and custom2 or custom3 order by.*'')') or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))",
},
"Case6-2: complex query II: with only system keys": {
query: "DomainID = 'd-id' and (RunID = 'run-id' or WorkflowID = 'wid')",
Expand All @@ -83,7 +91,7 @@ func TestValidateQuery(t *testing.T) {
},
"Case6-4: complex query IV": {
query: "WorkflowID = 'wid' and (CustomStringField = 'custom and custom2 or custom3 order by' or CustomIntField between 1 and 10)",
validated: "WorkflowID = 'wid' and (JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom and custom2 or custom3 order by*') or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))",
validated: "WorkflowID = 'wid' and (JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*custom and custom2 or custom3 order by.*'')') or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))",
},
"Case6-5: complex query with partial match": {
query: "RunID like '123' or WorkflowID like '123'",
Expand Down Expand Up @@ -303,11 +311,11 @@ func TestValidateQuery(t *testing.T) {
},
"case22-1: test not equal to a string field": {
query: "CustomStringField != 'abc'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')')",
},
"case22-2: test not equal to an empty string": {
query: "CustomStringField != ''",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), '^$')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''^$'')')",
},
// ES also doesn't support this kind of query
"case22-3: custom string is missing": {
Expand All @@ -321,35 +329,35 @@ func TestValidateQuery(t *testing.T) {
},
"case22-5: 2 custom string not equal with and clause": {
query: "CustomStringField != 'abc' AND CustomStringField != 'def'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')')",
},
"case22-6: 2 custom string , equal and not equal with and clause": {
"case22-6: 2 custom string, equal and not equal with and clause": {
query: "CustomStringField = 'abc' AND CustomStringField != 'def'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')')",
},
"case22-7: 2 custom string , not equal and equal with and clause": {
"case22-7: 2 custom string, not equal and equal with and clause": {
query: "CustomStringField != 'abc' AND CustomStringField = 'def'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')')",
},
"case22-8: 2 custom string equal with and clause": {
query: "CustomStringField = 'abc' AND CustomStringField = 'def'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')')",
},
"case22-9: 2 custom string not equal with or clause": {
query: "CustomStringField != 'abc' OR CustomStringField != 'def'",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*'))",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')'))",
},
"case22-10: 2 custom string , equal and not equal with or clause": {
"case22-10: 2 custom string, equal and not equal with or clause": {
query: "CustomStringField = 'abc' OR CustomStringField != 'def'",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*'))",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')'))",
},
"case22-11: 2 custom string , not equal and equal with or clause": {
"case22-11: 2 custom string, not equal and equal with or clause": {
query: "CustomStringField != 'abc' OR CustomStringField = 'def'",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*'))",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')'))",
},
"case22-12: 2 custom string equal with or clause": {
query: "CustomStringField = 'abc' OR CustomStringField = 'def'",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*'))",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')'))",
},
}

Expand Down
Loading