Skip to content

Commit

Permalink
feat: prioritize kana matches when searching with kana
Browse files Browse the repository at this point in the history
Fixes #1610 and fixes #1657.
  • Loading branch information
birtles committed Apr 2, 2024
1 parent 9d1b72e commit 6f4cae8
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 39 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ app.
elements
thanks to [@StarScape](https://github.com/StarScape)
([#1678](https://github.com/birchill/10ten-ja-reader/issues/1678)).
- When searching using just kana (e.g. し and ヤク), made kana matches be
prioritized
([#1610](https://github.com/birchill/10ten-ja-reader/issues/1610)
[#1657](https://github.com/birchill/10ten-ja-reader/issues/1657)).

## [1.18.0] - 2024-02-26

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
"dependencies": {
"@birchill/bugsnag-zero": "0.6.8",
"@birchill/discriminator": "0.3.0",
"@birchill/jpdict-idb": "2.3.2",
"@birchill/jpdict-idb": "2.4.0",
"@birchill/normal-jp": "1.5.0",
"classname-variants": "1.3.3",
"husky": "9.0.11",
Expand Down
4 changes: 2 additions & 2 deletions src/background/flat-file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import { stripFields } from '../utils/strip-fields';
import { Overwrite } from '../utils/type-helpers';

import { DictionaryWordResult, Sense } from './search-result';
import { sortMatchesByPriority } from './word-match-sorting';
import { sortWordResults } from './word-match-sorting';

interface FlatFileDatabaseOptions {
// Although the v7 API of bugsnag-js can operate on a singleton client we
Expand Down Expand Up @@ -199,7 +199,7 @@ class FlatFileDatabase {
}

// Sort before capping the number of results
sortMatchesByPriority(result);
sortWordResults(result);
result.splice(maxResults);

return result;
Expand Down
77 changes: 73 additions & 4 deletions src/background/word-match-sorting.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,82 @@ import { WordResult } from './search-result';

// As with Array.prototype.sort, sorts `results` in-place, but returns the
// result to support chaining.
export function sortMatchesByPriority(
results: Array<WordResult>
): Array<WordResult> {
results.sort((a, b) => getPriority(b) - getPriority(a));
export function sortWordResults(results: Array<WordResult>): Array<WordResult> {
const sortMeta: Map<number, { priority: number; type: number }> = new Map();

for (const result of results) {
// Determine the headword match type
//
// 1 = match on a kanji, or kana which is not just the reading for a kanji
// 2 = match on a kana reading for a kanji
const kanaReading = result.r.find((r) => !!r.matchRange);
const rt = kanaReading ? getKanaHeadwordType(kanaReading, result) : 1;

// Priority
const priority = getPriority(result);

sortMeta.set(result.id, { priority, type: rt });
}

results.sort((a, b) => {
const metaA = sortMeta.get(a.id)!;
const metaB = sortMeta.get(b.id)!;

if (metaA.type !== metaB.type) {
return metaA.type - metaB.type;
}

return metaB.priority - metaA.priority;
});

return results;
}

function getKanaHeadwordType(
r: WordResult['r'][number],
result: WordResult
): 1 | 2 {
// We don't want to prioritize readings marked as `ok` etc. or else we'll end
// up prioritizing words like `檜` and `羆` being prioritized when searching
// for `ひ`.
const isReadingObscure =
r.i?.includes('ok') ||
r.i?.includes('rk') ||
r.i?.includes('sk') ||
r.i?.includes('ik');

if (isReadingObscure) {
return 2;
}

// Kana headwords are type 1 (i.e. they are a primary headword, not just a
// reading for a kanji headword) if:
//
// (a) the entry has no kanji headwords or all the kanji headwords are marked
// as `rK`, `sK`, or `iK`.
if (
!result.k.length ||
result.k.every(
(k) => k.i?.includes('rK') || k.i?.includes('sK') || k.i?.includes('iK')
)
) {
return 1;
}

// (b) all senses for the entry have a `uk` (usually kana) `misc` field
// and the reading is not marked as `ok` (old kana usage).
//
// We wanted to make the condition here be just one sense being marked as `uk`
// but then you get words like `梓` being prioritized when searching for `し`
// because of one sense out of many being usually kana.
if (result.s.every((s) => s.misc?.includes('uk'))) {
return 1;
}

// (c) the headword is marked as `nokanji`
return r.app === 0 ? 1 : 2;
}

function getPriority(result: WordResult): number {
const scores: Array<number> = [0];

Expand Down
39 changes: 7 additions & 32 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@
resolved "https://registry.yarnpkg.com/@birchill/discriminator/-/discriminator-0.3.0.tgz#77d09d754e1ed14fc6fbc94af16724bc4f46d97b"
integrity sha512-IXkLc1FZLSjFUcPYUnAdg+AWPHC9vPz8hS2IDwPAvnAipeNKIkzQ/B4WY5z14UO1Ex0aOdlHtgwUUXc/Ngkhyg==

"@birchill/jpdict-idb@2.3.2":
version "2.3.2"
resolved "https://registry.yarnpkg.com/@birchill/jpdict-idb/-/jpdict-idb-2.3.2.tgz#856fd9ee3b658d011d26e614214dfd92b6aa82b5"
integrity sha512-2qryn/VIVw3Aj2XUTY7wjDwpMw5ZRmtPC5lb6XEew2eOOXT8VYX4jKkK1Ebe0S7qMVA4Fc/O+XdLF8YBMvqswA==
"@birchill/jpdict-idb@2.4.0":
version "2.4.0"
resolved "https://registry.yarnpkg.com/@birchill/jpdict-idb/-/jpdict-idb-2.4.0.tgz#565356d98a2cd57c263d5fe2a0a7f62b6a4ad302"
integrity sha512-9pKQGLvB7rdtV8CZNClG6z50kU6NnJ/4gRSCQCDI/r3K7pxv2SVjtTbiXlGr3PJt2NZ6zVrneX1qp9xO/prwow==
dependencies:
"@birchill/json-equalish" "^1.1.2"
"@birchill/normal-jp" "^1.5.0"
Expand Down Expand Up @@ -8796,16 +8796,7 @@ string-template@^1.0.0:
resolved "https://registry.yarnpkg.com/string-template/-/string-template-1.0.0.tgz#9e9f2233dc00f218718ec379a28a5673ecca8b96"
integrity sha512-SLqR3GBUXuoPP5MmYtD7ompvXiG87QjT6lzOszyXjTM86Uu7At7vNnt2xgyTLq5o9T4IxTYFyGxcULqpsmsfdg==

"string-width-cjs@npm:string-width@^4.2.0":
version "4.2.3"
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
dependencies:
emoji-regex "^8.0.0"
is-fullwidth-code-point "^3.0.0"
strip-ansi "^6.0.1"

string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
"string-width-cjs@npm:string-width@^4.2.0", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
version "4.2.3"
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
Expand Down Expand Up @@ -8887,14 +8878,7 @@ string_decoder@~1.1.1:
dependencies:
safe-buffer "~5.1.0"

"strip-ansi-cjs@npm:strip-ansi@^6.0.1":
version "6.0.1"
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
dependencies:
ansi-regex "^5.0.1"

strip-ansi@^6.0.0, strip-ansi@^6.0.1:
"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@^6.0.0, strip-ansi@^6.0.1:
version "6.0.1"
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
Expand Down Expand Up @@ -10136,7 +10120,7 @@ [email protected]:
resolved "https://registry.yarnpkg.com/workerpool/-/workerpool-6.2.1.tgz#46fc150c17d826b86a008e5a4508656777e9c343"
integrity sha512-ILEIE97kDZvF9Wb9f6h5aXK4swSlKGUcOEGiIYb2OOu/IrDU9iwj0fD//SsA6E5ibwJxpEvhullJY4Sl4GcpAw==

"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0":
"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0:
version "7.0.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
Expand All @@ -10154,15 +10138,6 @@ wrap-ansi@^6.2.0:
string-width "^4.1.0"
strip-ansi "^6.0.0"

wrap-ansi@^7.0.0:
version "7.0.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
dependencies:
ansi-styles "^4.0.0"
string-width "^4.1.0"
strip-ansi "^6.0.0"

wrap-ansi@^8.1.0:
version "8.1.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-8.1.0.tgz#56dc22368ee570face1b49819975d9b9a5ead214"
Expand Down

0 comments on commit 6f4cae8

Please sign in to comment.