Skip to content

Commit

Permalink
Refactor pinot custom string query in pinot_query_validator (#6298)
Browse files Browse the repository at this point in the history
* refactor pinot custom string query

* add comment
  • Loading branch information
bowenxia authored Sep 23, 2024
1 parent c8abc2a commit 75079f7
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 27 deletions.
8 changes: 4 additions & 4 deletions common/persistence/pinot/pinot_visibility_store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1210,7 +1210,7 @@ LIMIT 0, 10
expectedOutput: fmt.Sprintf(`SELECT *
FROM %s
WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd'
AND (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'String and or order by*')
AND (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*String and or order by.*'')')
Order BY StartTime DESC
LIMIT 0, 10
`, testTableName),
Expand All @@ -1228,7 +1228,7 @@ LIMIT 0, 10
expectedOutput: fmt.Sprintf(`SELECT *
FROM %s
WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd'
AND (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'Or*') or JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'and*'))
AND (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*Or.*'')') or JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*and.*'')'))
Order by StartTime DESC
LIMIT 0, 10
`, testTableName),
Expand All @@ -1246,7 +1246,7 @@ LIMIT 0, 10
expectedOutput: fmt.Sprintf(`SELECT *
FROM %s
WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd'
AND WorkflowID = 'wid' and (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom and custom2 or custom3 order by*') or (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))
AND WorkflowID = 'wid' and (JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*custom and custom2 or custom3 order by.*'')') or (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))
Order BY StartTime DESC
LIMIT 0, 10
`, testTableName),
Expand Down Expand Up @@ -1300,7 +1300,7 @@ LIMIT 0, 10
expectedOutput: fmt.Sprintf(`SELECT *
FROM %s
WHERE DomainID = 'bfd5c907-f899-4baf-a7b2-2ab85e623ebd'
AND CloseStatus < 0 and (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10) and JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'String field is for text*')
AND CloseStatus < 0 and (JSON_MATCH(Attr, '"$.CustomKeywordField"=''keywordCustomized''') or JSON_MATCH(Attr, '"$.CustomKeywordField[*]"=''keywordCustomized''')) and (JSON_MATCH(Attr, '"$.CustomIntField" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10) and JSON_MATCH(Attr, '"$.CustomStringField" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE("$.CustomStringField", ''.*String field is for text.*'')')
Order by DomainID Desc
LIMIT 11, 10
`, testTableName),
Expand Down
6 changes: 3 additions & 3 deletions common/pinot/pinotQueryValidator.go
Original file line number Diff line number Diff line change
Expand Up @@ -462,13 +462,13 @@ func processCustomString(operator string, colNameStr string, colValStr string) s
}

func createCustomStringQuery(colNameStr string, colValStr string, notEqual string) string {
// handle edge case
if colValStr == "" {
return fmt.Sprintf("JSON_MATCH(Attr, '\"$.%s\" is not null') "+
"AND %sREGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.%s', 'string'), '^$')", colNameStr, notEqual, colNameStr)
"AND %sJSON_MATCH(Attr, 'REGEXP_LIKE(\"$.%s\", ''^$'')')", colNameStr, notEqual, colNameStr)
}

return fmt.Sprintf("JSON_MATCH(Attr, '\"$.%s\" is not null') "+
"AND %sREGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.%s', 'string'), '%s*')", colNameStr, notEqual, colNameStr, colValStr)
"AND %sJSON_MATCH(Attr, 'REGEXP_LIKE(\"$.%s\", ''.*%s.*'')')", colNameStr, notEqual, colNameStr, colValStr)
}

func trimTimeFieldValueFromNanoToMilliSeconds(original *sqlparser.SQLVal) (*sqlparser.SQLVal, error) {
Expand Down
48 changes: 28 additions & 20 deletions common/pinot/pinotQueryValidator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,23 +55,31 @@ func TestValidateQuery(t *testing.T) {
err: "right comparison is invalid: &{<nil> wid { }}"},
"Case3-1: query with custom field": {
query: "CustomStringField = 'custom'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom*')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*custom.*'')')",
},
"Case3-2: query with custom field value is empty": {
"Case3-2: query with custom field not equal": {
query: "CustomStringField != 'custom'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*custom.*'')')",
},
"Case3-3: query with custom field value is empty": {
query: "CustomStringField = ''",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), '^$')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''^$'')')",
},
"Case3-4: query with custom field not equal to empty": {
query: "CustomStringField != ''",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''^$'')')",
},
"Case4: custom field query with or in string": {
query: "CustomStringField='Or'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'Or*')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*Or.*'')')",
},
"Case5: custom keyword field query": {
query: "CustomKeywordField = 'custom'",
validated: "(JSON_MATCH(Attr, '\"$.CustomKeywordField\"=''custom''') or JSON_MATCH(Attr, '\"$.CustomKeywordField[*]\"=''custom'''))",
},
"Case6-1: complex query I: with parenthesis": {
query: "(CustomStringField = 'custom and custom2 or custom3 order by') or CustomIntField between 1 and 10",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom and custom2 or custom3 order by*') or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*custom and custom2 or custom3 order by.*'')') or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))",
},
"Case6-2: complex query II: with only system keys": {
query: "DomainID = 'd-id' and (RunID = 'run-id' or WorkflowID = 'wid')",
Expand All @@ -83,7 +91,7 @@ func TestValidateQuery(t *testing.T) {
},
"Case6-4: complex query IV": {
query: "WorkflowID = 'wid' and (CustomStringField = 'custom and custom2 or custom3 order by' or CustomIntField between 1 and 10)",
validated: "WorkflowID = 'wid' and (JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'custom and custom2 or custom3 order by*') or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))",
validated: "WorkflowID = 'wid' and (JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*custom and custom2 or custom3 order by.*'')') or (JSON_MATCH(Attr, '\"$.CustomIntField\" is not null') AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) >= 1 AND CAST(JSON_EXTRACT_SCALAR(Attr, '$.CustomIntField') AS INT) <= 10))",
},
"Case6-5: complex query with partial match": {
query: "RunID like '123' or WorkflowID like '123'",
Expand Down Expand Up @@ -303,11 +311,11 @@ func TestValidateQuery(t *testing.T) {
},
"case22-1: test not equal to a string field": {
query: "CustomStringField != 'abc'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')')",
},
"case22-2: test not equal to an empty string": {
query: "CustomStringField != ''",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), '^$')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''^$'')')",
},
// ES also doesn't support this kind of query
"case22-3: custom string is missing": {
Expand All @@ -321,35 +329,35 @@ func TestValidateQuery(t *testing.T) {
},
"case22-5: 2 custom string not equal with and clause": {
query: "CustomStringField != 'abc' AND CustomStringField != 'def'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')')",
},
"case22-6: 2 custom string , equal and not equal with and clause": {
"case22-6: 2 custom string, equal and not equal with and clause": {
query: "CustomStringField = 'abc' AND CustomStringField != 'def'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')')",
},
"case22-7: 2 custom string , not equal and equal with and clause": {
"case22-7: 2 custom string, not equal and equal with and clause": {
query: "CustomStringField != 'abc' AND CustomStringField = 'def'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')')",
},
"case22-8: 2 custom string equal with and clause": {
query: "CustomStringField = 'abc' AND CustomStringField = 'def'",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*')",
validated: "JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') and JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')')",
},
"case22-9: 2 custom string not equal with or clause": {
query: "CustomStringField != 'abc' OR CustomStringField != 'def'",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*'))",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')'))",
},
"case22-10: 2 custom string , equal and not equal with or clause": {
"case22-10: 2 custom string, equal and not equal with or clause": {
query: "CustomStringField = 'abc' OR CustomStringField != 'def'",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*'))",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')'))",
},
"case22-11: 2 custom string , not equal and equal with or clause": {
"case22-11: 2 custom string, not equal and equal with or clause": {
query: "CustomStringField != 'abc' OR CustomStringField = 'def'",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*'))",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND NOT JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')'))",
},
"case22-12: 2 custom string equal with or clause": {
query: "CustomStringField = 'abc' OR CustomStringField = 'def'",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'abc*') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND REGEXP_LIKE(JSON_EXTRACT_SCALAR(Attr, '$.CustomStringField', 'string'), 'def*'))",
validated: "(JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*abc.*'')') or JSON_MATCH(Attr, '\"$.CustomStringField\" is not null') AND JSON_MATCH(Attr, 'REGEXP_LIKE(\"$.CustomStringField\", ''.*def.*'')'))",
},
}

Expand Down

0 comments on commit 75079f7

Please sign in to comment.