Skip to content

Commit

Permalink
fix: improve how words are sorted (#4041)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S authored Jan 12, 2025
1 parent 76f77d0 commit 37b3ede
Show file tree
Hide file tree
Showing 6 changed files with 302 additions and 15 deletions.
2 changes: 1 addition & 1 deletion packages/client/src/settings/DictionaryTarget.mts
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ async function updateWordInCustomDictionary(updateFn: (words: string[]) => strin
await ensureFileExists(dict.uri);
const doc = await workspace.openTextDocument(dict.uri);
const data = doc.getText();
const lines = updateFn(data.split(/\r?\n/g).filter((a) => !!a));
const lines = updateFn(data.split(/\r?\n/g));
const text = lines.join('\n').trim() + '\n';
const success = await replaceDocText(doc, text);
if (!success) {
Expand Down
25 changes: 11 additions & 14 deletions packages/client/src/settings/configUpdaters.mts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { ConfigUpdater } from './configUpdater.mjs';
import { configUpdaterForKey } from './configUpdater.mjs';
import { createWordListFromLines } from './wordList.mjs';

export function updaterAddWords(words: string[]): ConfigUpdater<'words'> {
return configUpdaterForKey('words', addWordsFn(words));
Expand All @@ -10,23 +11,19 @@ export function updaterRemoveWords(words: string[]): ConfigUpdater<'words'> {
}

export function addWordsFn(words: string[] | undefined = []): (lines: string[] | undefined) => string[] {
return (lines) => sortWords([...new Set((lines || []).concat(words))]);
return (lines) => {
const wordList = createWordListFromLines(lines || []);
wordList.addWords(words);
wordList.sort();
return wordList.toString().split('\n').slice(0, -1);
};
}

export function removeWordsFn(words: string[]): (lines: string[] | undefined) => string[] {
return (lines) => {
const current = new Set(lines || []);
for (const w of words) {
current.delete(w);
}
return sortWords([...current]);
const wordList = createWordListFromLines(lines || []);
wordList.removeWords(words);
wordList.sort();
return wordList.toString().split('\n').slice(0, -1);
};
}

function sortWords(words: string[]): string[] {
return words.sort(compare);
}

function compare(a: string, b: string): number {
return a.localeCompare(b);
}
155 changes: 155 additions & 0 deletions packages/client/src/settings/wordList.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
export const commentPrefix = '#';

export interface WordListEntry {
word?: string | undefined;
comment?: string | undefined;
}

export interface WordListWordEntry extends WordListEntry {
word: string;
}

export interface WordListHeaderEntry extends WordListEntry {
/** the word is empty, either '' or undefined */
word?: string | undefined;
comment: string;
}

export interface WordList {
addWords(words: (string | WordListEntry)[]): void;
removeWords(words: string[]): void;
readonly words: string[];
readonly entries: WordListEntry[];
/**
* Sort the words in the list and removes duplicates.
* Sections are separated by headers.
*/
sort(): void;
toString(): string;
}

export function createWordList(content: string): WordList {
const lines = content.split('\n');
return createWordListFromLines(lines);
}

export function createWordListFromLines(lines: string[]): WordList {
return new WordListImpl(lines);
}

class WordListImpl implements WordList {
private _entries: WordListEntry[] = [];

constructor(lines: string[]) {
this._entries = lines.map(lineToEntry);
}

get words(): string[] {
return this._entries.filter(isWordListWord).map((e) => e.word);
}

get entries(): (WordListEntry | WordListHeaderEntry)[] {
return this._entries;
}

addWords(words: (string | WordListEntry | WordListHeaderEntry)[]): void {
const entries = words.map((w) => (typeof w === 'string' ? lineToEntry(w) : w));
this._entries.push(...entries);
}

removeWords(words: string[]): void {
const toRemove = new Set(words);
this._entries = this._entries.filter((w) => !w.word || !toRemove.has(w.word));
}

sort(): void {
const knownWords = new Set<string>();

const sections = wordListEntriesToSections(this._entries);

for (const section of sections) {
section.words.sort((a, b) => a.word.localeCompare(b.word));
section.words = section.words.filter((w) => {
if (knownWords.has(w.word) && !w.comment) return false;
knownWords.add(w.word);
return true;
});
}

this._entries = sectionsToEntries(sections);
}

toString(): string {
const sections = wordListEntriesToSections(this._entries);
const s = sections.map((s) => sectionToString(s)).join('');
return s.endsWith('\n\n') ? s.slice(0, -1) : s;
}
}

function lineToEntry(line: string): WordListWordEntry | WordListHeaderEntry {
const parts = line.split('#', 2);
if (parts.length === 1) {
return { word: parts[0].trim() };
}
if (!parts[0]) {
return { comment: commentPrefix + parts[1].trimEnd() };
}
return { word: parts[0].trim() || ' ', comment: commentPrefix + parts[1].trimEnd() };
}

interface WordListSection {
header: WordListHeaderEntry | undefined;
words: WordListWordEntry[];
hasEmptyLines: boolean;
}

function wordListEntriesToSections(entries: WordListEntry[]): WordListSection[] {
const sections: WordListSection[] = [];
let currentSection: WordListSection = { header: undefined, words: [], hasEmptyLines: false };
for (const entry of entries) {
if (isWordListHeader(entry)) {
if (currentSection.header) {
sections.push(currentSection);
}
currentSection = { header: entry, words: [], hasEmptyLines: false };
continue;
}
if (!isWordListWord(entry)) {
currentSection.hasEmptyLines = true;
continue; // skip empty lines.
}
currentSection.words.push(entry);
}
sections.push(currentSection);
return sections;
}

function sectionsToEntries(sections: WordListSection[]): WordListEntry[] {
return sections.flatMap((s) => (s.header ? [s.header, ...s.words, ...(s.hasEmptyLines ? [{ word: '' }] : [])] : s.words));
}

function isWordListHeader(entry: WordListEntry): entry is WordListHeaderEntry {
return !!entry.comment && !entry.word;
}

function isWordListWord(entry: WordListEntry): entry is WordListWordEntry {
return !!entry.word;
}

function sectionToString(section: WordListSection): string {
const header = wordListHeaderToString(section.header);
const words = section.words.map((w) => wordEntryToString(w)).join('');
const sep = section.hasEmptyLines || section.words.length ? '\n' : '';
return header + words + sep;
}

function wordListHeaderToString(header: WordListHeaderEntry | undefined): string {
if (header?.comment) {
return header.comment + '\n';
}
return '';
}

function wordEntryToString(entry: WordListWordEntry): string {
return entry.word + (entry.comment ? ' ' + entry.comment : '') + '\n';
}
93 changes: 93 additions & 0 deletions packages/client/src/settings/wordList.test.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import { describe, expect, test } from 'vitest';

import { createWordList } from './wordList.mjs';

describe('wordList', () => {
test('createWordList sampleWordListFormatted does not change.', () => {
const wordList = createWordList(sampleWordListFormatted());
wordList.sort();
expect(wordList.toString()).toBe(sampleWordListFormatted());
});

test('sort', () => {
const wordList = createWordList(sampleUnsortedWordListFormatted());
wordList.sort();
expect(wordList.toString()).toBe(sampleWordListFormatted());
});

test('addWords', () => {
const wordList = createWordList(sampleWordListFormatted());
wordList.addWords(['red', 'blue', 'green', 'white']);
wordList.sort();
expect(wordList.toString()).toBe(sampleWordListFormatted() + 'white\n');
expect(wordList.words).toContain('white');
expect(wordList.entries).toEqual(expect.arrayContaining([{ word: 'white' }, { word: 'cherry', comment: '# a small fruit' }]));
});

test('removeWords', () => {
const wordList = createWordList(sampleWordListFormatted());
wordList.removeWords(['red', 'blue', 'orange']);
wordList.sort();
expect(wordList.toString()).toBe(sampleWordListFormatted().replaceAll(/^(orange|red|blue)\b.*\n/gm, ''));
});
});

function sampleWordListFormatted() {
return `\
# This is a list of terms used by our project.
# Please add terms into the appropriate section.
# they will get automatically sorted and deduplicated.
# Fruit
apple
banana
cherry # a small fruit
orange # both a fruit and a color
# Colors
blue
green
orange # both a fruit and a color
red
yellow
# API terms
# none yet
# People
Alice
Bob
Charlie
# New terms not yet placed
`;
}

function sampleUnsortedWordListFormatted() {
return `\
# This is a list of terms used by our project.
# Please add terms into the appropriate section.
# they will get automatically sorted and deduplicated.
# Fruit
banana
cherry # a small fruit
orange # both a fruit and a color
apple
# Colors
red
yellow
blue
green
orange # both a fruit and a color
# API terms
# none yet
# People
Charlie
Alice
Bob
# New terms not yet placed
`;
}
7 changes: 7 additions & 0 deletions samples/custom-dictionary/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ wordc
wordd
worde

terma
termb
termc
aterm

white

Here is text from `dict`:

Compaknee.
35 changes: 35 additions & 0 deletions samples/custom-dictionary/words.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,41 @@
# This is a list of terms used by our project.
# Please add terms into the appropriate section.
# they will get automatically sorted and deduplicated.

# Fruit
apple
banana
cherry # a small fruit
orange # both a fruit and a color

# Colors
blue
green
orange # both a fruit and a color
red
yellow

# API terms
# none yet

# People
Alice
Bob
Charlie

# Word variants
word
worda
wordb
wordc
wordd
worde

# More terms
aterm
termA # with comment
termb
termc

# New Terms
white

0 comments on commit 37b3ede

Please sign in to comment.