Skip to content

Commit

Permalink
Add multi_match to SQL plugin (#649)
Browse files Browse the repository at this point in the history
Signed-off-by: Yury Fridlyand <[email protected]>

Co-authored-by: Yury Fridlyand <[email protected]>
  • Loading branch information
MaxKsyunz and Yury-Fridlyand authored Jun 23, 2022
1 parent 127b662 commit 90ccc3e
Show file tree
Hide file tree
Showing 22 changed files with 4,718 additions and 3,865 deletions.
4 changes: 4 additions & 0 deletions core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,10 @@ public FunctionExpression match_phrase(Expression... args) {
return compile(BuiltinFunctionName.MATCH_PHRASE, args);
}

public FunctionExpression multi_match(Expression... args) {
return compile(BuiltinFunctionName.MULTI_MATCH, args);
}

public FunctionExpression simple_query_string(Expression... args) {
return compile(BuiltinFunctionName.SIMPLE_QUERY_STRING, args);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,8 @@ public enum BuiltinFunctionName {
*/
QUERY(FunctionName.of("query")),
MATCH_QUERY(FunctionName.of("match_query")),
MATCHQUERY(FunctionName.of("matchquery"));
MATCHQUERY(FunctionName.of("matchquery")),
MULTI_MATCH(FunctionName.of("multi_match"));

private final FunctionName name;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,15 @@ public class OpenSearchFunctions {
public static final int MATCH_MAX_NUM_PARAMETERS = 14;
public static final int MATCH_PHRASE_MAX_NUM_PARAMETERS = 5;
public static final int MIN_NUM_PARAMETERS = 2;
public static final int MULTI_MATCH_MAX_NUM_PARAMETERS = 17;
public static final int SIMPLE_QUERY_STRING_MAX_NUM_PARAMETERS = 14;

/**
* Add functions specific to OpenSearch to repository.
*/
public void register(BuiltinFunctionRepository repository) {
repository.register(match());
repository.register(multi_match());
repository.register(simple_query_string());
// Register MATCHPHRASE as MATCH_PHRASE as well for backwards
// compatibility.
Expand All @@ -53,6 +55,11 @@ private static FunctionResolver match_phrase(BuiltinFunctionName matchPhrase) {
return getRelevanceFunctionResolver(funcName, MATCH_PHRASE_MAX_NUM_PARAMETERS, STRING);
}

private static FunctionResolver multi_match() {
FunctionName funcName = BuiltinFunctionName.MULTI_MATCH.getName();
return getRelevanceFunctionResolver(funcName, MULTI_MATCH_MAX_NUM_PARAMETERS, STRUCT);
}

private static FunctionResolver simple_query_string() {
FunctionName funcName = BuiltinFunctionName.SIMPLE_QUERY_STRING.getName();
return getRelevanceFunctionResolver(funcName, SIMPLE_QUERY_STRING_MAX_NUM_PARAMETERS, STRUCT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,51 @@ void visit_in() {
() -> analyze(AstDSL.in(field("integer_value"), Collections.emptyList())));
}

@Test
void multi_match_expression() {
assertAnalyzeEqual(
dsl.multi_match(
dsl.namedArgument("fields", DSL.literal(
new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of(
"field", ExprValueUtils.floatValue(1.F)))))),
dsl.namedArgument("query", DSL.literal("sample query"))),
AstDSL.function("multi_match",
AstDSL.unresolvedArg("fields", new RelevanceFieldList(Map.of(
"field", 1.F))),
AstDSL.unresolvedArg("query", stringLiteral("sample query"))));
}

@Test
void multi_match_expression_with_params() {
assertAnalyzeEqual(
dsl.multi_match(
dsl.namedArgument("fields", DSL.literal(
new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of(
"field", ExprValueUtils.floatValue(1.F)))))),
dsl.namedArgument("query", DSL.literal("sample query")),
dsl.namedArgument("analyzer", DSL.literal("keyword"))),
AstDSL.function("multi_match",
AstDSL.unresolvedArg("fields", new RelevanceFieldList(Map.of(
"field", 1.F))),
AstDSL.unresolvedArg("query", stringLiteral("sample query")),
AstDSL.unresolvedArg("analyzer", stringLiteral("keyword"))));
}

@Test
void multi_match_expression_two_fields() {
assertAnalyzeEqual(
dsl.multi_match(
dsl.namedArgument("fields", DSL.literal(
new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of(
"field1", ExprValueUtils.floatValue(1.F),
"field2", ExprValueUtils.floatValue(.3F)))))),
dsl.namedArgument("query", DSL.literal("sample query"))),
AstDSL.function("multi_match",
AstDSL.unresolvedArg("fields", new RelevanceFieldList(ImmutableMap.of(
"field1", 1.F, "field2", .3F))),
AstDSL.unresolvedArg("query", stringLiteral("sample query"))));
}

@Test
void simple_query_string_expression() {
assertAnalyzeEqual(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,19 @@ void match_to_string() {
assertEquals("match(field=\"message\", query=\"search query\")", expr.toString());
}

@Test
void multi_match() {
FunctionExpression expr = dsl.multi_match(fields, query);
assertEquals(String.format("multi_match(fields=%s, query=%s)",
fields.getValue(), query.getValue()),
expr.toString());
}

@Test
void simple_query_string() {
FunctionExpression expr = dsl.simple_query_string(fields, query);
assertEquals(String.format("simple_query_string(fields=%s, query=%s)",
fields.getValue().toString(), query.getValue().toString()),
fields.getValue(), query.getValue()),
expr.toString());
}
}
79 changes: 66 additions & 13 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2233,6 +2233,58 @@ Another example to show how to set custom values for the optional parameters::
+----------------------+--------------------------+


MULTI_MATCH
-----------

Description
>>>>>>>>>>>

``multi_match([field_expression+], query_expression[, option=<option_value>]*)``

The multi_match function maps to the multi_match query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field or fields.
The **^** lets you *boost* certain fields. Boosts are multipliers that weigh matches in one field more heavily than matches in other fields. The syntax allows to specify the fields in double quotes, single quotes, in backtick or even without any wrap. All fields search using star ``"*"`` is also available (star symbol should be wrapped). The weight is optional and should be specified using after the field name, it could be delimeted by the `caret` character or by whitespace. Please, refer to examples below:

| ``multi_match(["Tags" ^ 2, 'Title' 3.4, `Body`, Comments ^ 0.3], ...)``
| ``multi_match(["*"], ...)``
Available parameters include:

- analyzer
- auto_generate_synonyms_phrase
- cutoff_frequency
- fuzziness
- fuzzy_transpositions
- lenient
- max_expansions
- minimum_should_match
- operator
- prefix_length
- tie_breaker
- type
- slop
- boost

Example with only ``fields`` and ``query`` expressions, and all other parameters are set default values::

os> select * from books where multi_match(['title'], 'Pooh House');
fetched rows / total rows = 2/2
+------+--------------------------+----------------------+
| id | title | author |
|------+--------------------------+----------------------|
| 1 | The House at Pooh Corner | Alan Alexander Milne |
| 2 | Winnie-the-Pooh | Alan Alexander Milne |
+------+--------------------------+----------------------+

Another example to show how to set custom values for the optional parameters::

os> select * from books where multi_match(['title'], 'Pooh House', operator='AND', analyzer=default);
fetched rows / total rows = 1/1
+------+--------------------------+----------------------+
| id | title | author |
|------+--------------------------+----------------------|
| 1 | The House at Pooh Corner | Alan Alexander Milne |
+------+--------------------------+----------------------+

SIMPLE_QUERY_STRING
-------------------

Expand Down Expand Up @@ -2265,20 +2317,21 @@ Available parameters include:

Example with only ``fields`` and ``query`` expressions, and all other parameters are set default values::

os> select firstname, lastname, city, address from accounts where simple_query_string(['firstname', city ^ 2], 'Amber | Nogal');
os> select * from books where simple_query_string(['title'], 'Pooh House');
fetched rows / total rows = 2/2
+-------------+------------+--------+--------------------+
| firstname | lastname | city | address |
|-------------+------------+--------+--------------------|
| Amber | Duke | Brogan | 880 Holmes Lane |
| Nanette | Bates | Nogal | 789 Madison Street |
+-------------+------------+--------+--------------------+
+------+--------------------------+----------------------+
| id | title | author |
|------+--------------------------+----------------------|
| 1 | The House at Pooh Corner | Alan Alexander Milne |
| 2 | Winnie-the-Pooh | Alan Alexander Milne |
+------+--------------------------+----------------------+

Another example to show how to set custom values for the optional parameters::

os> select firstname, lastname, city, address from accounts where simple_query_string(['firstname', city ^ 2], 'Amber Nogal', analyzer=keyword, default_operator='AND');
fetched rows / total rows = 0/0
+-------------+------------+--------+-----------+
| firstname | lastname | city | address |
|-------------+------------+--------+-----------|
+-------------+------------+--------+-----------+
os> select * from books where simple_query_string(['title'], 'Pooh House', flags='ALL', default_operator='AND');
fetched rows / total rows = 1/1
+------+--------------------------+----------------------+
| id | title | author |
|------+--------------------------+----------------------|
| 1 | The House at Pooh Corner | Alan Alexander Milne |
+------+--------------------------+----------------------+
82 changes: 68 additions & 14 deletions docs/user/ppl/functions/relevance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,60 @@ Another example to show how to set custom values for the optional parameters::
+----------------------+--------------------------+


MULTI_MATCH
-----------

Description
>>>>>>>>>>>

``multi_match([field_expression+], query_expression[, option=<option_value>]*)``

The multi_match function maps to the multi_match query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field or fields.
The **^** lets you *boost* certain fields. Boosts are multipliers that weigh matches in one field more heavily than matches in other fields. The syntax allows to specify the fields in double quotes, single quotes, in backtick or even without any wrap. All fields search using star ``"*"`` is also available (star symbol should be wrapped). The weight is optional and should be specified using after the field name, it could be delimeted by the `caret` character or by whitespace. Please, refer to examples below:

| ``multi_match(["Tags" ^ 2, 'Title' 3.4, `Body`, Comments ^ 0.3], ...)``
| ``multi_match(["*"], ...)``

Available parameters include:

- analyzer
- auto_generate_synonyms_phrase
- cutoff_frequency
- fuzziness
- fuzzy_transpositions
- lenient
- max_expansions
- minimum_should_match
- operator
- prefix_length
- tie_breaker
- type
- slop
- boost

Example with only ``fields`` and ``query`` expressions, and all other parameters are set default values::

os> source=books | where multi_match(['title'], 'Pooh House');
fetched rows / total rows = 2/2
+------+--------------------------+----------------------+
| id | title | author |
|------+--------------------------+----------------------|
| 1 | The House at Pooh Corner | Alan Alexander Milne |
| 2 | Winnie-the-Pooh | Alan Alexander Milne |
+------+--------------------------+----------------------+

Another example to show how to set custom values for the optional parameters::

os> source=books | where multi_match(['title'], 'Pooh House', operator='AND', analyzer=default);
fetched rows / total rows = 1/1
+------+--------------------------+----------------------+
| id | title | author |
|------+--------------------------+----------------------|
| 1 | The House at Pooh Corner | Alan Alexander Milne |
+------+--------------------------+----------------------+


SIMPLE_QUERY_STRING
-------------------

Expand Down Expand Up @@ -131,24 +185,24 @@ Available parameters include:

Example with only ``fields`` and ``query`` expressions, and all other parameters are set default values::

os> source=accounts | where simple_query_string(['firstname', city ^ 2], 'Amber | Nogal') | fields firstname, lastname, city, address;
os> source=books | where simple_query_string(['title'], 'Pooh House');
fetched rows / total rows = 2/2
+-------------+------------+--------+--------------------+
| firstname | lastname | city | address |
|-------------+------------+--------+--------------------|
| Amber | Duke | Brogan | 880 Holmes Lane |
| Nanette | Bates | Nogal | 789 Madison Street |
+-------------+------------+--------+--------------------+
+------+--------------------------+----------------------+
| id | title | author |
|------+--------------------------+----------------------|
| 1 | The House at Pooh Corner | Alan Alexander Milne |
| 2 | Winnie-the-Pooh | Alan Alexander Milne |
+------+--------------------------+----------------------+

Another example to show how to set custom values for the optional parameters::

os> source=accounts | where simple_query_string(['firstname', city ^ 2], 'Amber Nogal', analyzer=keyword, default_operator='AND') | fields firstname, lastname, city, address;
fetched rows / total rows = 0/0
+-------------+------------+--------+-----------+
| firstname | lastname | city | address |
|-------------+------------+--------+-----------|
+-------------+------------+--------+-----------+

os> source=books | where simple_query_string(['title'], 'Pooh House', flags='ALL', default_operator='AND');
fetched rows / total rows = 1/1
+------+--------------------------+----------------------+
| id | title | author |
|------+--------------------------+----------------------|
| 1 | The House at Pooh Corner | Alan Alexander Milne |
+------+--------------------------+----------------------+

Limitations
>>>>>>>>>>>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import static org.opensearch.sql.util.MatcherUtils.verifySome;

import java.io.IOException;
import org.junit.jupiter.api.Test;
import org.junit.Test;

public class RelevanceFunctionIT extends PPLIntegTestCase {
@Override
Expand All @@ -22,20 +22,63 @@ public void init() throws IOException {
}

@Test
public void test1() throws IOException {
public void test_multi_match() throws IOException {
String query = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE simple_query_string([\\\"Tags\\\" ^ 1.5, Title, `Body` 4.2], 'taste')";
+ " | WHERE multi_match([\\\"Tags\\\" ^ 1.5, Title, `Body` 4.2], 'taste') | fields Id";
var result = executeQuery(query);
assertNotEquals(0, result.getInt("total"));
assertEquals(713, result.getInt("total"));
}

@Test
public void verify_wildcard_test() throws IOException {
public void test_simple_query_string() throws IOException {
String query = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE simple_query_string([\\\"Tags\\\" ^ 1.5, Title, `Body` 4.2], 'taste') | fields Id";
var result = executeQuery(query);
assertEquals(713, result.getInt("total"));
}

@Test
public void test_multi_match_all_params() throws IOException {
String query = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE multi_match(['Body', Tags], 'taste beer', operator='and', analyzer=english,"
+ "auto_generate_synonyms_phrase_query=true, boost = 0.77, cutoff_frequency=0.33,"
+ "fuzziness = 14, fuzzy_transpositions = false, lenient = true, max_expansions = 25,"
+ "minimum_should_match = '2<-25% 9<-3', prefix_length = 7, tie_breaker = 0.3,"
+ "type = most_fields, slop = 2, zero_terms_query = 'ALL') | fields Id";
var result = executeQuery(query);
assertEquals(507, result.getInt("total"));
}

@Test
public void test_simple_query_string_all_params() throws IOException {
String query = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE simple_query_string(['Body', Tags, Title], 'taste beer', default_operator='or',"
+ "analyzer=english, analyze_wildcard = false, quote_field_suffix = '.exact',"
+ "auto_generate_synonyms_phrase_query=true, boost = 0.77, flags='PREFIX',"
+ "fuzzy_transpositions = false, lenient = true, fuzzy_max_expansions = 25,"
+ "minimum_should_match = '2<-25% 9<-3', fuzzy_prefix_length = 7) | fields Id";
var result = executeQuery(query);
assertEquals(1990, result.getInt("total"));
}

@Test
public void test_wildcard_multi_match() throws IOException {
String query1 = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE multi_match(['Tags'], 'taste') | fields Id";
var result1 = executeQuery(query1);
String query2 = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE multi_match(['T*'], 'taste') | fields Id";
var result2 = executeQuery(query2);
assertNotEquals(result2.getInt("total"), result1.getInt("total"));
}

@Test
public void test_wildcard_simple_query_string() throws IOException {
String query1 = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE simple_query_string(['Tags'], 'taste')";
+ " | WHERE simple_query_string(['Tags'], 'taste') | fields Id";
var result1 = executeQuery(query1);
String query2 = "SOURCE=" + TEST_INDEX_BEER
+ " | WHERE simple_query_string(['T*'], 'taste')";
+ " | WHERE simple_query_string(['T*'], 'taste') | fields Id";
var result2 = executeQuery(query2);
assertNotEquals(result2.getInt("total"), result1.getInt("total"));
}
Expand Down
Loading

0 comments on commit 90ccc3e

Please sign in to comment.