Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add implementation for simple_query_string relevance search function in SQL and PPL #635

Merged
Merged
Prev Previous commit
Next Next commit
Merge branch 'dev-simple_query_string-#192' into dev-simple_query_str…
…ing-#192-impl3

Signed-off-by: Yury Fridlyand <[email protected]>
  • Loading branch information
Yury-Fridlyand committed Jun 3, 2022
commit 5564fd05f0173c8ae46025e972a1e938a0d9682d
11 changes: 9 additions & 2 deletions core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -654,8 +654,15 @@ public FunctionExpression match(Expression... args) {
return compile(BuiltinFunctionName.MATCH, args);
}

public FunctionExpression match_phrase(Expression... args) {
return compile(BuiltinFunctionName.MATCH_PHRASE, args);
}

public FunctionExpression simple_query_string(Expression... args) {
return (FunctionExpression) repository
.compile(BuiltinFunctionName.SIMPLE_QUERY_STRING.getName(), Arrays.asList(args.clone()));
return compile(BuiltinFunctionName.SIMPLE_QUERY_STRING, args);
}

private FunctionExpression compile(BuiltinFunctionName bfn, Expression... args) {
return (FunctionExpression) repository.compile(bfn.getName(), Arrays.asList(args.clone()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ public enum BuiltinFunctionName {
*/
MATCH(FunctionName.of("match")),
SIMPLE_QUERY_STRING(FunctionName.of("simple_query_string")),
MATCH_PHRASE(FunctionName.of("match_phrase")),
MATCHPHRASE(FunctionName.of("matchphrase")),

/**
* Legacy Relevance Function.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,30 +26,36 @@
@UtilityClass
public class OpenSearchFunctions {

public static final int MATCH_MAX_NUM_PARAMETERS = 12;
public static final int MATCH_PHRASE_MAX_NUM_PARAMETERS = 3;
public static final int MATCH_MAX_NUM_PARAMETERS = 14;
public static final int MATCH_PHRASE_MAX_NUM_PARAMETERS = 5;
public static final int MIN_NUM_PARAMETERS = 2;
public static final int SIMPLE_QUERY_STRING_MAX_NUM_PARAMETERS = 14;

/**
* Add functions specific to OpenSearch to repository.
*/
public void register(BuiltinFunctionRepository repository) {
repository.register(match());
repository.register(simple_query_string());
// Register MATCHPHRASE as MATCH_PHRASE as well for backwards
// compatibility.
repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE));
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE));
}

private static FunctionResolver match() {
FunctionName funcName = BuiltinFunctionName.MATCH.getName();
// At most field, query, and all optional parameters
final int matchMaxNumParameters = 14;
return getRelevanceFunctionResolver(funcName, matchMaxNumParameters, STRING);
return getRelevanceFunctionResolver(funcName, MATCH_MAX_NUM_PARAMETERS, STRING);
}

private static FunctionResolver match_phrase(BuiltinFunctionName matchPhrase) {
FunctionName funcName = matchPhrase.getName();
return getRelevanceFunctionResolver(funcName, MATCH_PHRASE_MAX_NUM_PARAMETERS, STRING);
}

private static FunctionResolver simple_query_string() {
FunctionName funcName = BuiltinFunctionName.SIMPLE_QUERY_STRING.getName();
// At most field, query, and all optional parameters
final int simpleQueryStringMaxNumParameters = 12;
return getRelevanceFunctionResolver(funcName, simpleQueryStringMaxNumParameters, STRUCT);
return getRelevanceFunctionResolver(funcName, SIMPLE_QUERY_STRING_MAX_NUM_PARAMETERS, STRUCT);
}

private static FunctionResolver getRelevanceFunctionResolver(
Expand All @@ -60,10 +66,10 @@ private static FunctionResolver getRelevanceFunctionResolver(

private static Map<FunctionSignature, FunctionBuilder> getRelevanceFunctionSignatureMap(
FunctionName funcName, int maxNumParameters, ExprCoreType firstArgType) {
final int minNumParameters = 2;
FunctionBuilder buildFunction = args -> new OpenSearchFunction(funcName, args);
var signatureMapBuilder = ImmutableMap.<FunctionSignature, FunctionBuilder>builder();
for (int numParameters = minNumParameters; numParameters <= maxNumParameters; numParameters++) {
for (int numParameters = MIN_NUM_PARAMETERS;
numParameters <= maxNumParameters; numParameters++) {
List<ExprType> args = new ArrayList<>(Collections.nCopies(numParameters - 1, STRING));
args.add(0, firstArgType);
signatureMapBuilder.put(new FunctionSignature(funcName, args), buildFunction);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import com.google.common.collect.ImmutableMap;
import java.util.LinkedHashMap;
import java.util.List;
import org.junit.jupiter.api.Test;
import org.opensearch.sql.data.model.ExprTupleValue;
import org.opensearch.sql.data.model.ExprValueUtils;
Expand Down
43 changes: 39 additions & 4 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2183,8 +2183,6 @@ Example with only ``field`` and ``query`` expressions, and all other parameters
| Bates | 789 Madison Street |
+------------+--------------------+



Another example to show how to set custom values for the optional parameters::

os> SELECT lastname FROM accounts WHERE match(firstname, 'Hattie', operator='AND', boost=2.0);
Expand All @@ -2196,6 +2194,45 @@ Another example to show how to set custom values for the optional parameters::
+------------+


MATCH_PHRASE
------------

Description
>>>>>>>>>>>

``match_phrase(field_expression, query_expression[, option=<option_value>]*)``

The match_phrase function maps to the match_phrase query used in search engine, to return the documents that match a provided text with a given field. Available parameters include:

- analyzer
- slop
- zero_terms_query

For backward compatibility, matchphrase is also supported and mapped to match_phrase query as well.

Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::

os> SELECT author, title FROM books WHERE match_phrase(author, 'Alexander Milne');
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+

Another example to show how to set custom values for the optional parameters::

os> SELECT author, title FROM books WHERE match_phrase(author, 'Alan Milne', slop = 2);
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+


SIMPLE_QUERY_STRING
-------------------

Expand Down Expand Up @@ -2237,8 +2274,6 @@ Example with only ``fields`` and ``query`` expressions, and all other parameters
| Nanette | Bates | Nogal | 789 Madison Street |
+-------------+------------+--------+--------------------+



Another example to show how to set custom values for the optional parameters::

os> select firstname, lastname, city, address from accounts where simple_query_string(['firstname', city ^ 2], 'Amber Nogal', analyzer=keyword, default_operator='AND');
Expand Down
44 changes: 42 additions & 2 deletions docs/user/ppl/functions/relevance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,47 @@ Another example to show how to set custom values for the optional parameters::
+------------+


MATCH_PHRASE
------------

Description
>>>>>>>>>>>

``match_phrase(field_expression, query_expression[, option=<option_value>]*)``

The match_phrase function maps to the match_phrase query used in search engine, to return the documents that match a provided text with a given field. Available parameters include:

- analyzer
- slop
- zero_terms_query

For backward compatibility, matchphrase is also supported and mapped to match_phrase query as well.

Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::

os> source=books | where match_phrase(author, 'Alexander Milne') | fields author, title
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+



Another example to show how to set custom values for the optional parameters::

os> source=books | where match_phrase(author, 'Alan Milne', slop = 2) | fields author, title
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+


SIMPLE_QUERY_STRING
-------------------

Expand Down Expand Up @@ -99,8 +140,6 @@ Example with only ``fields`` and ``query`` expressions, and all other parameters
| Nanette | Bates | Nogal | 789 Madison Street |
+-------------+------------+--------+--------------------+



Another example to show how to set custom values for the optional parameters::

os> source=accounts | where simple_query_string(['firstname', city ^ 2], 'Amber Nogal', analyzer=keyword, default_operator='AND') | fields firstname, lastname, city, address;
Expand All @@ -110,6 +149,7 @@ Another example to show how to set custom values for the optional parameters::
|-------------+------------+--------+-----------|
+-------------+------------+--------+-----------+


Limitations
>>>>>>>>>>>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

package org.opensearch.sql.opensearch.storage.script.filter;

import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
Expand Down Expand Up @@ -405,6 +405,25 @@ void should_build_simple_query_string_query_with_default_parameters() {
dsl.namedArgument("query", literal("search query")))));
}

@Test
void should_build_match_phrase_query_with_default_parameters() {
assertJsonEquals(
"{\n"
+ " \"match_phrase\" : {\n"
+ " \"message\" : {\n"
+ " \"query\" : \"search query\",\n"
+ " \"slop\" : 0,\n"
+ " \"zero_terms_query\" : \"NONE\",\n"
+ " \"boost\" : 1.0\n"
+ " }\n"
+ " }\n"
+ "}",
buildQuery(
dsl.match_phrase(
dsl.namedArgument("field", literal("message")),
dsl.namedArgument("query", literal("search query")))));
}

@Test
// Test is disabled because DSL because of:
// 1) DSL reverses order of the `fields`
Expand Down Expand Up @@ -464,6 +483,96 @@ void simple_query_string_invalid_parameter() {
"Parameter invalid_parameter is invalid for match function.");
}

@Test
void should_build_match_phrase_query_with_custom_parameters() {
assertJsonEquals(
"{\n"
+ " \"match_phrase\" : {\n"
+ " \"message\" : {\n"
+ " \"query\" : \"search query\",\n"
+ " \"analyzer\" : \"keyword\","
+ " \"slop\" : 2,\n"
+ " \"zero_terms_query\" : \"ALL\",\n"
+ " \"boost\" : 1.0\n"
+ " }\n"
+ " }\n"
+ "}",
buildQuery(
dsl.match_phrase(
dsl.namedArgument("field", literal("message")),
dsl.namedArgument("query", literal("search query")),
dsl.namedArgument("analyzer", literal("keyword")),
dsl.namedArgument("slop", literal("2")),
dsl.namedArgument("zero_terms_query", literal("ALL")))));
}

@Test
void match_phrase_invalid_parameter() {
FunctionExpression expr = dsl.match_phrase(
dsl.namedArgument("field", literal("message")),
dsl.namedArgument("query", literal("search query")),
dsl.namedArgument("invalid_parameter", literal("invalid_value")));
var msg = assertThrows(SemanticCheckException.class, () -> buildQuery(expr)).getMessage();
assertEquals("Parameter invalid_parameter is invalid for match_phrase function.", msg);
}

@Test
void match_phrase_invalid_value_slop() {
FunctionExpression expr = dsl.match_phrase(
dsl.namedArgument("field", literal("message")),
dsl.namedArgument("query", literal("search query")),
dsl.namedArgument("slop", literal("1.5")));
var msg = assertThrows(NumberFormatException.class, () -> buildQuery(expr)).getMessage();
assertEquals("For input string: \"1.5\"", msg);
}

@Test
void match_phrase_invalid_value_ztq() {
FunctionExpression expr = dsl.match_phrase(
dsl.namedArgument("field", literal("message")),
dsl.namedArgument("query", literal("search query")),
dsl.namedArgument("zero_terms_query", literal("meow")));
var msg = assertThrows(IllegalArgumentException.class, () -> buildQuery(expr)).getMessage();
assertEquals("No enum constant org.opensearch.index.search.MatchQuery.ZeroTermsQuery.meow",
msg);
}

@Test
void match_phrase_missing_field() {
var msg = assertThrows(ExpressionEvaluationException.class, () ->
dsl.match_phrase(
dsl.namedArgument("query", literal("search query")))).getMessage();
assertEquals("match_phrase function expected {[STRING,STRING],[STRING,STRING,STRING],"
+ "[STRING,STRING,STRING,STRING],[STRING,STRING,STRING,STRING,STRING]}, but get [STRING]",
msg);
}

@Test
void match_phrase_missing_query() {
var msg = assertThrows(ExpressionEvaluationException.class, () ->
dsl.match_phrase(
dsl.namedArgument("field", literal("message")))).getMessage();
assertEquals("match_phrase function expected {[STRING,STRING],[STRING,STRING,STRING],"
+ "[STRING,STRING,STRING,STRING],[STRING,STRING,STRING,STRING,STRING]}, but get [STRING]",
msg);
}

@Test
void match_phrase_too_many_args() {
var msg = assertThrows(ExpressionEvaluationException.class, () ->
dsl.match_phrase(
dsl.namedArgument("one", literal("1")),
dsl.namedArgument("two", literal("2")),
dsl.namedArgument("three", literal("3")),
dsl.namedArgument("four", literal("4")),
dsl.namedArgument("fix", literal("5")),
dsl.namedArgument("six", literal("6"))
)).getMessage();
assertEquals("match_phrase function expected {[STRING,STRING],[STRING,STRING,STRING],"
+ "[STRING,STRING,STRING,STRING],[STRING,STRING,STRING,STRING,STRING]}, but get "
+ "[STRING,STRING,STRING,STRING,STRING,STRING]", msg);
}

@Test
void simple_query_string_missing_fields() {
var msg = assertThrows(ExpressionEvaluationException.class, () ->
Expand All @@ -476,7 +585,10 @@ void simple_query_string_missing_fields() {
+ "STRING,STRING,STRING,STRING,STRING,STRING],[STRUCT,STRING,STRING,STRING,STRING,"
+ "STRING,STRING,STRING,STRING,STRING],[STRUCT,STRING,STRING,STRING,STRING,STRING,"
+ "STRING,STRING,STRING,STRING,STRING],[STRUCT,STRING,STRING,STRING,STRING,STRING,"
+ "STRING,STRING,STRING,STRING,STRING,STRING]}, but get [STRING]",
+ "STRING,STRING,STRING,STRING,STRING,STRING],[STRUCT,STRING,STRING,STRING,STRING,"
+ "STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING],[STRUCT,STRING,STRING,"
+ "STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING]},"
+ " but get [STRING]",
msg);
}

Expand All @@ -495,7 +607,10 @@ void simple_query_string_missing_query() {
+ "STRING,STRING,STRING,STRING,STRING,STRING],[STRUCT,STRING,STRING,STRING,STRING,"
+ "STRING,STRING,STRING,STRING,STRING],[STRUCT,STRING,STRING,STRING,STRING,STRING,"
+ "STRING,STRING,STRING,STRING,STRING],[STRUCT,STRING,STRING,STRING,STRING,STRING,"
+ "STRING,STRING,STRING,STRING,STRING,STRING]}, but get [STRUCT]",
+ "STRING,STRING,STRING,STRING,STRING,STRING],[STRUCT,STRING,STRING,STRING,STRING,"
+ "STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING],[STRUCT,STRING,STRING,"
+ "STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING]},"
+ " but get [STRUCT]",
msg);
}

Expand Down
2 changes: 2 additions & 0 deletions ppl/src/main/antlr/OpenSearchPPLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,9 @@ IF: 'IF';

// RELEVANCE FUNCTIONS AND PARAMETERS
MATCH: 'MATCH';
MATCH_PHRASE: 'MATCH_PHRASE';
SIMPLE_QUERY_STRING: 'SIMPLE_QUERY_STRING';

ALLOW_LEADING_WILDCARD: 'ALLOW_LEADING_WILDCARD';
ANALYZE_WILDCARD: 'ANALYZE_WILDCARD';
ANALYZER: 'ANALYZER';
Expand Down
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.