From dd2768c3176c72a2bbc5a8cbf18f868ffe1c6910 Mon Sep 17 00:00:00 2001 From: Jason Dent Date: Sun, 12 Jan 2025 12:07:07 +0100 Subject: [PATCH] fix: improve how words are sorted fixes #4033 --- .../client/src/settings/DictionaryTarget.mts | 2 +- .../client/src/settings/configUpdaters.mts | 25 ++- packages/client/src/settings/wordList.mts | 155 ++++++++++++++++++ .../client/src/settings/wordList.test.mts | 93 +++++++++++ samples/custom-dictionary/README.md | 7 + samples/custom-dictionary/words.txt | 35 ++++ 6 files changed, 302 insertions(+), 15 deletions(-) create mode 100644 packages/client/src/settings/wordList.mts create mode 100644 packages/client/src/settings/wordList.test.mts diff --git a/packages/client/src/settings/DictionaryTarget.mts b/packages/client/src/settings/DictionaryTarget.mts index ef2cf82a10..3706704889 100644 --- a/packages/client/src/settings/DictionaryTarget.mts +++ b/packages/client/src/settings/DictionaryTarget.mts @@ -96,7 +96,7 @@ async function updateWordInCustomDictionary(updateFn: (words: string[]) => strin await ensureFileExists(dict.uri); const doc = await workspace.openTextDocument(dict.uri); const data = doc.getText(); - const lines = updateFn(data.split(/\r?\n/g).filter((a) => !!a)); + const lines = updateFn(data.split(/\r?\n/g)); const text = lines.join('\n').trim() + '\n'; const success = await replaceDocText(doc, text); if (!success) { diff --git a/packages/client/src/settings/configUpdaters.mts b/packages/client/src/settings/configUpdaters.mts index 307bb2d5bc..d945444fec 100644 --- a/packages/client/src/settings/configUpdaters.mts +++ b/packages/client/src/settings/configUpdaters.mts @@ -1,5 +1,6 @@ import type { ConfigUpdater } from './configUpdater.mjs'; import { configUpdaterForKey } from './configUpdater.mjs'; +import { createWordListFromLines } from './wordList.mjs'; export function updaterAddWords(words: string[]): ConfigUpdater<'words'> { return configUpdaterForKey('words', addWordsFn(words)); @@ -10,23 +11,19 @@ export function updaterRemoveWords(words: string[]): ConfigUpdater<'words'> { } export function addWordsFn(words: string[] | undefined = []): (lines: string[] | undefined) => string[] { - return (lines) => sortWords([...new Set((lines || []).concat(words))]); + return (lines) => { + const wordList = createWordListFromLines(lines || []); + wordList.addWords(words); + wordList.sort(); + return wordList.toString().split('\n').slice(0, -1); + }; } export function removeWordsFn(words: string[]): (lines: string[] | undefined) => string[] { return (lines) => { - const current = new Set(lines || []); - for (const w of words) { - current.delete(w); - } - return sortWords([...current]); + const wordList = createWordListFromLines(lines || []); + wordList.removeWords(words); + wordList.sort(); + return wordList.toString().split('\n').slice(0, -1); }; } - -function sortWords(words: string[]): string[] { - return words.sort(compare); -} - -function compare(a: string, b: string): number { - return a.localeCompare(b); -} diff --git a/packages/client/src/settings/wordList.mts b/packages/client/src/settings/wordList.mts new file mode 100644 index 0000000000..9d96744a17 --- /dev/null +++ b/packages/client/src/settings/wordList.mts @@ -0,0 +1,155 @@ +export const commentPrefix = '#'; + +export interface WordListEntry { + word?: string | undefined; + comment?: string | undefined; +} + +export interface WordListWordEntry extends WordListEntry { + word: string; +} + +export interface WordListHeaderEntry extends WordListEntry { + /** the word is empty, either '' or undefined */ + word?: string | undefined; + comment: string; +} + +export interface WordList { + addWords(words: (string | WordListEntry)[]): void; + removeWords(words: string[]): void; + readonly words: string[]; + readonly entries: WordListEntry[]; + /** + * Sort the words in the list and removes duplicates. + * Sections are separated by headers. + */ + sort(): void; + toString(): string; +} + +export function createWordList(content: string): WordList { + const lines = content.split('\n'); + return createWordListFromLines(lines); +} + +export function createWordListFromLines(lines: string[]): WordList { + return new WordListImpl(lines); +} + +class WordListImpl implements WordList { + private _entries: WordListEntry[] = []; + + constructor(lines: string[]) { + this._entries = lines.map(lineToEntry); + } + + get words(): string[] { + return this._entries.filter(isWordListWord).map((e) => e.word); + } + + get entries(): (WordListEntry | WordListHeaderEntry)[] { + return this._entries; + } + + addWords(words: (string | WordListEntry | WordListHeaderEntry)[]): void { + const entries = words.map((w) => (typeof w === 'string' ? lineToEntry(w) : w)); + this._entries.push(...entries); + } + + removeWords(words: string[]): void { + const toRemove = new Set(words); + this._entries = this._entries.filter((w) => !w.word || !toRemove.has(w.word)); + } + + sort(): void { + const knownWords = new Set(); + + const sections = wordListEntriesToSections(this._entries); + + for (const section of sections) { + section.words.sort((a, b) => a.word.localeCompare(b.word)); + section.words = section.words.filter((w) => { + if (knownWords.has(w.word) && !w.comment) return false; + knownWords.add(w.word); + return true; + }); + } + + this._entries = sectionsToEntries(sections); + } + + toString(): string { + const sections = wordListEntriesToSections(this._entries); + const s = sections.map((s) => sectionToString(s)).join(''); + return s.endsWith('\n\n') ? s.slice(0, -1) : s; + } +} + +function lineToEntry(line: string): WordListWordEntry | WordListHeaderEntry { + const parts = line.split('#', 2); + if (parts.length === 1) { + return { word: parts[0].trim() }; + } + if (!parts[0]) { + return { comment: commentPrefix + parts[1].trimEnd() }; + } + return { word: parts[0].trim() || ' ', comment: commentPrefix + parts[1].trimEnd() }; +} + +interface WordListSection { + header: WordListHeaderEntry | undefined; + words: WordListWordEntry[]; + hasEmptyLines: boolean; +} + +function wordListEntriesToSections(entries: WordListEntry[]): WordListSection[] { + const sections: WordListSection[] = []; + let currentSection: WordListSection = { header: undefined, words: [], hasEmptyLines: false }; + for (const entry of entries) { + if (isWordListHeader(entry)) { + if (currentSection.header) { + sections.push(currentSection); + } + currentSection = { header: entry, words: [], hasEmptyLines: false }; + continue; + } + if (!isWordListWord(entry)) { + currentSection.hasEmptyLines = true; + continue; // skip empty lines. + } + currentSection.words.push(entry); + } + sections.push(currentSection); + return sections; +} + +function sectionsToEntries(sections: WordListSection[]): WordListEntry[] { + return sections.flatMap((s) => (s.header ? [s.header, ...s.words, ...(s.hasEmptyLines ? [{ word: '' }] : [])] : s.words)); +} + +function isWordListHeader(entry: WordListEntry): entry is WordListHeaderEntry { + return !!entry.comment && !entry.word; +} + +function isWordListWord(entry: WordListEntry): entry is WordListWordEntry { + return !!entry.word; +} + +function sectionToString(section: WordListSection): string { + const header = wordListHeaderToString(section.header); + const words = section.words.map((w) => wordEntryToString(w)).join(''); + const sep = section.hasEmptyLines || section.words.length ? '\n' : ''; + return header + words + sep; +} + +function wordListHeaderToString(header: WordListHeaderEntry | undefined): string { + if (header?.comment) { + return header.comment + '\n'; + } + return ''; +} + +function wordEntryToString(entry: WordListWordEntry): string { + return entry.word + (entry.comment ? ' ' + entry.comment : '') + '\n'; +} diff --git a/packages/client/src/settings/wordList.test.mts b/packages/client/src/settings/wordList.test.mts new file mode 100644 index 0000000000..9fd1562c82 --- /dev/null +++ b/packages/client/src/settings/wordList.test.mts @@ -0,0 +1,93 @@ +import { describe, expect, test } from 'vitest'; + +import { createWordList } from './wordList.mjs'; + +describe('wordList', () => { + test('createWordList sampleWordListFormatted does not change.', () => { + const wordList = createWordList(sampleWordListFormatted()); + wordList.sort(); + expect(wordList.toString()).toBe(sampleWordListFormatted()); + }); + + test('sort', () => { + const wordList = createWordList(sampleUnsortedWordListFormatted()); + wordList.sort(); + expect(wordList.toString()).toBe(sampleWordListFormatted()); + }); + + test('addWords', () => { + const wordList = createWordList(sampleWordListFormatted()); + wordList.addWords(['red', 'blue', 'green', 'white']); + wordList.sort(); + expect(wordList.toString()).toBe(sampleWordListFormatted() + 'white\n'); + expect(wordList.words).toContain('white'); + expect(wordList.entries).toEqual(expect.arrayContaining([{ word: 'white' }, { word: 'cherry', comment: '# a small fruit' }])); + }); + + test('removeWords', () => { + const wordList = createWordList(sampleWordListFormatted()); + wordList.removeWords(['red', 'blue', 'orange']); + wordList.sort(); + expect(wordList.toString()).toBe(sampleWordListFormatted().replaceAll(/^(orange|red|blue)\b.*\n/gm, '')); + }); +}); + +function sampleWordListFormatted() { + return `\ +# This is a list of terms used by our project. +# Please add terms into the appropriate section. +# they will get automatically sorted and deduplicated. + +# Fruit +apple +banana +cherry # a small fruit +orange # both a fruit and a color + +# Colors +blue +green +orange # both a fruit and a color +red +yellow + +# API terms +# none yet + +# People +Alice +Bob +Charlie + +# New terms not yet placed +`; +} + +function sampleUnsortedWordListFormatted() { + return `\ +# This is a list of terms used by our project. +# Please add terms into the appropriate section. +# they will get automatically sorted and deduplicated. + +# Fruit +banana +cherry # a small fruit +orange # both a fruit and a color +apple +# Colors +red +yellow +blue +green +orange # both a fruit and a color +# API terms +# none yet + +# People +Charlie +Alice +Bob + +# New terms not yet placed +`; +} diff --git a/samples/custom-dictionary/README.md b/samples/custom-dictionary/README.md index c873d14250..c5746c0699 100644 --- a/samples/custom-dictionary/README.md +++ b/samples/custom-dictionary/README.md @@ -8,6 +8,13 @@ wordc wordd worde +terma +termb +termc +aterm + +white + Here is text from `dict`: Compaknee. diff --git a/samples/custom-dictionary/words.txt b/samples/custom-dictionary/words.txt index 13f7a2e4e6..9002d1daf8 100644 --- a/samples/custom-dictionary/words.txt +++ b/samples/custom-dictionary/words.txt @@ -1,6 +1,41 @@ +# This is a list of terms used by our project. +# Please add terms into the appropriate section. +# they will get automatically sorted and deduplicated. + +# Fruit +apple +banana +cherry # a small fruit +orange # both a fruit and a color + +# Colors +blue +green +orange # both a fruit and a color +red +yellow + +# API terms +# none yet + +# People +Alice +Bob +Charlie + +# Word variants word worda wordb wordc wordd worde + +# More terms +aterm +termA # with comment +termb +termc + +# New Terms +white