Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove unused simple query string features #3360

Merged
merged 9 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions api/api/controllers/search_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import logging
import logging as log
import re
from math import ceil
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -58,6 +59,13 @@
QUERY_SPECIAL_CHARACTER_ERROR = "Unescaped special characters are not allowed."
DEFAULT_BOOST = 10000
DEFAULT_SEARCH_FIELDS = ["title", "description", "tags.name"]
DEFAULT_SQS_FLAGS = "AND|NOT|PHRASE|WHITESPACE"
UNUSED_SQS_FLAGS = [
("PRECEDENCE", r"\(.*\)"),
("ESCAPE", r"\\"),
("FUZZY|SLOP", r"~\d"),
("PREFIX", r"\*"),
]


def _quote_escape(query_string):
Expand Down Expand Up @@ -287,8 +295,11 @@ def build_search_query(
# individual field-level queries specified.
if "q" in search_params.data:
query = _quote_escape(search_params.data["q"])
log_query_features(query, query_name="q")

base_query_kwargs = {
"query": query,
"flags": DEFAULT_SQS_FLAGS,
"fields": DEFAULT_SEARCH_FIELDS,
"default_operator": "AND",
}
Expand All @@ -301,6 +312,7 @@ def build_search_query(
quotes_stripped = query.replace('"', "")
exact_match_boost = Q(
"simple_query_string",
flags=DEFAULT_SQS_FLAGS,
fields=["title"],
query=f"{quotes_stripped}",
boost=10000,
Expand All @@ -313,9 +325,11 @@ def build_search_query(
("tags", "tags.name"),
]:
if field_value := search_params.data.get(field):
log_query_features(field_value, query_name="field")
search_queries["must"].append(
Q(
"simple_query_string",
flags=DEFAULT_SQS_FLAGS,
query=_quote_escape(field_value),
fields=[field_name],
)
Expand All @@ -339,6 +353,22 @@ def build_search_query(
)


def log_query_features(query: str, query_name) -> None:
query_flags = []
for flag, pattern in UNUSED_SQS_FLAGS:
if bool(re.search(pattern, query)):
query_flags.append(flag)
if query_flags:
log.info(
{
"log_message": "Special features present in query",
"query_name": query_name,
"query": query,
"flags": query_flags,
}
)


def build_collection_query(
search_params: MediaListRequestSerializer,
collection_params: dict[str, str],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from api.controllers import search_controller
from api.controllers.search_controller import (
DEFAULT_SQS_FLAGS,
FILTERED_PROVIDERS_CACHE_KEY,
FILTERED_PROVIDERS_CACHE_VERSION,
)
Expand Down Expand Up @@ -71,6 +72,7 @@ def test_create_search_query_q_search_no_filters(media_type_config):
"default_operator": "AND",
"fields": ["title", "description", "tags.name"],
"query": "cat",
"flags": DEFAULT_SQS_FLAGS,
}
}
],
Expand All @@ -80,14 +82,15 @@ def test_create_search_query_q_search_no_filters(media_type_config):
"boost": 10000,
"fields": ["title"],
"query": "cat",
"flags": DEFAULT_SQS_FLAGS,
}
},
{"rank_feature": {"boost": 10000, "field": "standardized_popularity"}},
],
}


def test_create_search_query_q_search_with_quotes_adds_exact_suffix(media_type_config):
def test_create_search_query_q_search_with_quotes_adds_raw_suffix(media_type_config):
serializer = media_type_config.search_request_serializer(
data={"q": '"The cutest cat"'}
)
Expand All @@ -104,6 +107,7 @@ def test_create_search_query_q_search_with_quotes_adds_exact_suffix(media_type_c
"fields": ["title", "description", "tags.name"],
"query": '"The cutest cat"',
"quote_field_suffix": ".raw",
"flags": DEFAULT_SQS_FLAGS,
}
}
],
Expand All @@ -113,6 +117,7 @@ def test_create_search_query_q_search_with_quotes_adds_exact_suffix(media_type_c
"boost": 10000,
"fields": ["title"],
"query": "The cutest cat",
"flags": DEFAULT_SQS_FLAGS,
}
},
{"rank_feature": {"boost": 10000, "field": "standardized_popularity"}},
Expand Down Expand Up @@ -152,6 +157,7 @@ def test_create_search_query_q_search_with_filters(image_media_type_config):
"default_operator": "AND",
"fields": ["title", "description", "tags.name"],
"query": "cat",
"flags": DEFAULT_SQS_FLAGS,
}
}
],
Expand All @@ -161,6 +167,7 @@ def test_create_search_query_q_search_with_filters(image_media_type_config):
"boost": 10000,
"fields": ["title"],
"query": "cat",
"flags": DEFAULT_SQS_FLAGS,
}
},
{"rank_feature": {"boost": 10000, "field": "standardized_popularity"}},
Expand Down Expand Up @@ -188,10 +195,23 @@ def test_create_search_query_non_q_query(image_media_type_config):
"simple_query_string": {
"fields": ["creator"],
"query": "Artist From Openverse",
"flags": DEFAULT_SQS_FLAGS,
}
},
{
"simple_query_string": {
"fields": ["title"],
"query": "kitten🐱",
"flags": DEFAULT_SQS_FLAGS,
}
},
{
"simple_query_string": {
"fields": ["tags.name"],
"query": "cute",
"flags": DEFAULT_SQS_FLAGS,
}
},
{"simple_query_string": {"fields": ["title"], "query": "kitten🐱"}},
{"simple_query_string": {"fields": ["tags.name"], "query": "cute"}},
],
"should": [
{"rank_feature": {"boost": 10000, "field": "standardized_popularity"}},
Expand Down
72 changes: 8 additions & 64 deletions frontend/src/locales/scripts/en.json5
Original file line number Diff line number Diff line change
Expand Up @@ -177,70 +177,14 @@
title: "Search for an exact match",
ariaLabel: "quote unquote Claude Monet",
claudeMonet: '"Claude Monet"',
content: "Put a word or phrase inside quotes. For example, {link}.",
},
combine: {
title: "Combining terms",
description: "If you want to combine terms, you can use the following operators to perform more complex queries",
and: "{symbol} signifies AND operation",
or: "{symbol} signifies OR operation",
not: "{symbol} negates a single token",
prefix: "{symbol} at the end of a term signifies a prefix query",
precedence: "{symbol} signify precedence",
fuzziness: "{symbol} after a word signifies edit distance (fuzziness)",
ariaLabels: {
fuzziness: "tilde N",
open: "open parenthesis",
close: "close parenthesis",
star: "star symbol",
not: "minus symbol",
and: "plus symbol",
or: "vertical bar symbol",
prefix: "asterisk symbol",
verticalBar: "vertical bar symbol",
precedence: "parentheses",
},
},
example: {
and: {
description: "Example: {link}{br} This will search for images related to both dog and cat.",
ariaLabel: "dog plus cat",
example: "dog+cat",
},
or: {
description: "Example: {link}{br} This will search for images related to dog or cat, but not necessarily both.",
ariaLabel: "dog vertical bar cat",
example: "dog|cat",
},
negate: {
description: "You can use the {operator} to exclude a search term from the results.",
operatorName: "operator (signifies NOT)",
operatorAriaLabel: "minus operator (signifies NOT)",
ariaLabel: "dog minus pug",
example: "dog -pug",
content: "Example: {link}{br} This will search for images related to dog but won't include results related to 'pug'",
},
prefix: {
description: "You can use the {operatorName} to mark a prefix term. This will match anything after the *.",
operatorName: "operator (wildcard)",
operatorAriaLabel: "star operator (wildcard)",
ariaLabel: "net star symbol",
example: "net*",
content: "Example: {link}{br} This will search for images matching anything with 'net'. This might include 'network', 'Netflix', 'Netherlands', etc.",
},
precedence: {
description: "You can use parentheses {highlight} to specify precedence of terms or combine more complex queries.",
ariaLabel: "dogs plus open parenthesis corgis vertical bar labrador close parenthesis",
example: "dogs + (corgis | labrador)",
content: "Example: {link}{br} This will search for images that match dogs that are either corgis or labrador.",
},
fuzziness: {
description: "You can use {highlight} to specify some fuzzy logic to the term according to the {link} — the number of one character changes that need to be made to one string to make it the same as another string.",
linkText: "Levenshtein Edit Distance",
ariaLabel: "theatre tilde 1",
example: "theatre~1",
content: "Example: {link}{br} This will search for images that match strings close to the term 'theatre' with a difference of one character. Results might include terms with different spellings like 'theater'.",
},
content: "To search for an exact word or phrase, put it inside quotes. For example, {link}.",
},
negate: {
title: "Excluding terms",
operatorName: "minus operator",
ariaLabel: "dog minus pug",
example: "dog -pug",
content: 'To exclude a term from your results, put the {operator} in front of it. Example: {link}{br} This will search for media related to "dog" but won\'t include results related to "pug".',
},
},
feedback: {
Expand Down
Loading
Loading