Skip to content

Commit

Permalink
[#136] Number of matching records for term-based search (#217)
Browse files Browse the repository at this point in the history
* [#136] number of matching records for term based search

* [#136] get search matching record count for filter based search

* fix: use facet stage to compute total count

* fix: handle empty search results

* refactor: base search aggregate stages

* [#136] code formatting

* chore: code cleanup

---------

Co-authored-by: Maurier <[email protected]>
  • Loading branch information
pkdash and Maurier authored Sep 4, 2024
1 parent 3954b7a commit 2c7fcb0
Showing 1 changed file with 22 additions and 25 deletions.
47 changes: 22 additions & 25 deletions dspback/routers/discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from fuzzywuzzy import fuzz

from dspback.config import get_settings
from dspback.schemas.discovery import DiscoveryResult, PathEnum, TypeAhead
from dspback.schemas.discovery import PathEnum, TypeAhead

router = APIRouter()

Expand All @@ -33,6 +33,20 @@ def is_one_char_off(str1, str2):
return True


async def aggregate_stages(request, stages, pageNumber=1, pageSize=30):
# Insert a `$facet` stage to extract the total count. We specify pagination here too.
stages.append({"$facet": {"docs": [{"$skip": (pageNumber - 1) * pageSize},
{"$limit": pageSize}], "totalCount": [{"$count": 'count'}]}})

aggregation = await request.app.db[get_settings().mongo_database]["discovery"].aggregate(stages).to_list(None)
total_count = aggregation[0]["totalCount"][0]["count"] if len(aggregation[0]["totalCount"]) else None

if total_count is not None:
return {"docs": aggregation[0]["docs"], "meta": {"count": {"total": total_count}}}

return {"docs": aggregation[0]["docs"]}


@router.get("/search")
async def search(
request: Request,
Expand All @@ -55,13 +69,10 @@ async def search(
creatorName,
dataCoverageEnd,
dataCoverageStart,
pageNumber,
pageSize,
providerName,
publishedEnd,
publishedStart,
sortBy,
term,
)

compound = {}
Expand Down Expand Up @@ -91,8 +102,7 @@ async def search(
score_threshold = get_settings().search_relevance_score_threshold
stages.append({'$match': {'score': {'$gt': score_threshold}}})

results = await request.app.db[get_settings().mongo_database]["discovery"].aggregate(stages).to_list(pageSize)
return results
return await aggregate_stages(request, stages, pageNumber, pageSize)


@router.get("/search/fuzzy")
Expand All @@ -117,13 +127,10 @@ async def search(
creatorName,
dataCoverageEnd,
dataCoverageStart,
pageNumber,
pageSize,
providerName,
publishedEnd,
publishedStart,
sortBy,
term,
)

should = [{'autocomplete': {'query': term, 'path': key, 'fuzzy': {'maxEdits': 1}}} for key in search_paths]
Expand All @@ -139,9 +146,8 @@ async def search(
},
)

results = await request.app.db[get_settings().mongo_database]["discovery"].aggregate(stages).to_list(pageSize)
return await aggregate_stages(request, stages, pageNumber, pageSize)

return results


@router.get("/search/fuzzy/feedback")
Expand All @@ -166,13 +172,10 @@ async def search_fuzzy_feedback(
creatorName,
dataCoverageEnd,
dataCoverageStart,
pageNumber,
pageSize,
providerName,
publishedEnd,
publishedStart,
sortBy,
term,
)

should = [{'autocomplete': {'query': term, 'path': key}} for key in search_paths]
Expand All @@ -188,15 +191,15 @@ async def search_fuzzy_feedback(
},
)

results = await request.app.db[get_settings().mongo_database]["discovery"].aggregate(stages).to_list(pageSize)
results = await aggregate_stages(request, stages, pageNumber, pageSize)

if len(results) == 0:
if len(results["docs"]) == 0:
fuzzy_should = [
{'autocomplete': {'query': term, 'path': key, 'fuzzy': {'maxEdits': 1}}} for key in search_paths
]
stages[0]['$search']['compound']['should'] = fuzzy_should
results = await request.app.db[get_settings().mongo_database]["discovery"].aggregate(stages).to_list(pageSize)
result_hits = await determine_fuzzy_result_terms(results, term)
results = await aggregate_stages(request, stages, pageNumber, pageSize)
result_hits = await determine_fuzzy_result_terms(results["docs"], term)
return {"results": results, "fuzzy_search_terms": result_hits}

return {"results": results, "fuzzy_search_terms": {}}
Expand All @@ -208,13 +211,10 @@ async def base_search(
creatorName,
dataCoverageEnd,
dataCoverageStart,
pageNumber,
pageSize,
providerName,
publishedEnd,
publishedStart,
sortBy,
term,
):
search_paths = PathEnum.values()
must = []
Expand Down Expand Up @@ -258,10 +258,7 @@ async def base_search(
stages.append({'$sort': {"name": 1}})
if sortBy == "dateCreated":
stages.append({'$sort': {"dateCreated": -1}})
stages.append({'$skip': (pageNumber - 1) * pageSize})
stages.append(
{'$limit': pageSize},
)

stages.append({'$unset': ['_id']})
stages.append(
{'$set': {'score': {'$meta': 'searchScore'}, 'highlights': {'$meta': 'searchHighlights'}}},
Expand Down

0 comments on commit 2c7fcb0

Please sign in to comment.