From 4407e62dcb495fdc38e897f559212d4c00c7b66a Mon Sep 17 00:00:00 2001 From: aidenlx Date: Sat, 30 Apr 2022 10:33:12 +0800 Subject: [PATCH] Improve simplified Chinese text search result --- src/search.ts | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/search.ts b/src/search.ts index 94cb4de..087e09a 100644 --- a/src/search.ts +++ b/src/search.ts @@ -12,6 +12,20 @@ let minisearchInstance: MiniSearch let indexedNotes: Record = {} +const chsPattern = /[\u4e00-\u9fa5]/ + +const tokenize = (text: string): string[] => { + const tokens = text.split(SPACE_OR_PUNCTUATION) + const chsSegmenter = (app as any).plugins.plugins['cm-chs-patch'] + + if (chsSegmenter) { + return tokens.flatMap(word => + chsPattern.test(word) ? chsSegmenter.cut(word) : [word], + ) + } + else return tokens +} + /** * Initializes the MiniSearch instance, * and adds all the notes to the index @@ -19,7 +33,7 @@ let indexedNotes: Record = {} export async function initGlobalSearchIndex(): Promise { indexedNotes = {} minisearchInstance = new MiniSearch({ - tokenize: text => text.split(SPACE_OR_PUNCTUATION), + tokenize, idField: 'path', fields: ['basename', 'content', 'headings1', 'headings2', 'headings3'], })