-
-
Notifications
You must be signed in to change notification settings - Fork 134
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fixes #4033
- Loading branch information
Showing
6 changed files
with
302 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
export const commentPrefix = '#'; | ||
|
||
export interface WordListEntry { | ||
word?: string | undefined; | ||
comment?: string | undefined; | ||
} | ||
|
||
export interface WordListWordEntry extends WordListEntry { | ||
word: string; | ||
} | ||
|
||
export interface WordListHeaderEntry extends WordListEntry { | ||
/** the word is empty, either '' or undefined */ | ||
word?: string | undefined; | ||
comment: string; | ||
} | ||
|
||
export interface WordList { | ||
addWords(words: (string | WordListEntry)[]): void; | ||
removeWords(words: string[]): void; | ||
readonly words: string[]; | ||
readonly entries: WordListEntry[]; | ||
/** | ||
* Sort the words in the list and removes duplicates. | ||
* Sections are separated by headers. | ||
*/ | ||
sort(): void; | ||
toString(): string; | ||
} | ||
|
||
export function createWordList(content: string): WordList { | ||
const lines = content.split('\n'); | ||
return createWordListFromLines(lines); | ||
} | ||
|
||
export function createWordListFromLines(lines: string[]): WordList { | ||
return new WordListImpl(lines); | ||
} | ||
|
||
class WordListImpl implements WordList { | ||
private _entries: WordListEntry[] = []; | ||
|
||
constructor(lines: string[]) { | ||
this._entries = lines.map(lineToEntry); | ||
} | ||
|
||
get words(): string[] { | ||
return this._entries.filter(isWordListWord).map((e) => e.word); | ||
} | ||
|
||
get entries(): (WordListEntry | WordListHeaderEntry)[] { | ||
return this._entries; | ||
} | ||
|
||
addWords(words: (string | WordListEntry | WordListHeaderEntry)[]): void { | ||
const entries = words.map((w) => (typeof w === 'string' ? lineToEntry(w) : w)); | ||
this._entries.push(...entries); | ||
} | ||
|
||
removeWords(words: string[]): void { | ||
const toRemove = new Set(words); | ||
this._entries = this._entries.filter((w) => !w.word || !toRemove.has(w.word)); | ||
} | ||
|
||
sort(): void { | ||
const knownWords = new Set<string>(); | ||
|
||
const sections = wordListEntriesToSections(this._entries); | ||
|
||
for (const section of sections) { | ||
section.words.sort((a, b) => a.word.localeCompare(b.word)); | ||
section.words = section.words.filter((w) => { | ||
if (knownWords.has(w.word) && !w.comment) return false; | ||
knownWords.add(w.word); | ||
return true; | ||
}); | ||
} | ||
|
||
this._entries = sectionsToEntries(sections); | ||
} | ||
|
||
toString(): string { | ||
const sections = wordListEntriesToSections(this._entries); | ||
const s = sections.map((s) => sectionToString(s)).join(''); | ||
return s.endsWith('\n\n') ? s.slice(0, -1) : s; | ||
} | ||
} | ||
|
||
function lineToEntry(line: string): WordListWordEntry | WordListHeaderEntry { | ||
const parts = line.split('#', 2); | ||
if (parts.length === 1) { | ||
return { word: parts[0].trim() }; | ||
} | ||
if (!parts[0]) { | ||
return { comment: commentPrefix + parts[1].trimEnd() }; | ||
} | ||
return { word: parts[0].trim() || ' ', comment: commentPrefix + parts[1].trimEnd() }; | ||
} | ||
|
||
interface WordListSection { | ||
header: WordListHeaderEntry | undefined; | ||
words: WordListWordEntry[]; | ||
hasEmptyLines: boolean; | ||
} | ||
|
||
function wordListEntriesToSections(entries: WordListEntry[]): WordListSection[] { | ||
const sections: WordListSection[] = []; | ||
let currentSection: WordListSection = { header: undefined, words: [], hasEmptyLines: false }; | ||
for (const entry of entries) { | ||
if (isWordListHeader(entry)) { | ||
if (currentSection.header) { | ||
sections.push(currentSection); | ||
} | ||
currentSection = { header: entry, words: [], hasEmptyLines: false }; | ||
continue; | ||
} | ||
if (!isWordListWord(entry)) { | ||
currentSection.hasEmptyLines = true; | ||
continue; // skip empty lines. | ||
} | ||
currentSection.words.push(entry); | ||
} | ||
sections.push(currentSection); | ||
return sections; | ||
} | ||
|
||
function sectionsToEntries(sections: WordListSection[]): WordListEntry[] { | ||
return sections.flatMap((s) => (s.header ? [s.header, ...s.words, ...(s.hasEmptyLines ? [{ word: '' }] : [])] : s.words)); | ||
} | ||
|
||
function isWordListHeader(entry: WordListEntry): entry is WordListHeaderEntry { | ||
return !!entry.comment && !entry.word; | ||
} | ||
|
||
function isWordListWord(entry: WordListEntry): entry is WordListWordEntry { | ||
return !!entry.word; | ||
} | ||
|
||
function sectionToString(section: WordListSection): string { | ||
const header = wordListHeaderToString(section.header); | ||
const words = section.words.map((w) => wordEntryToString(w)).join(''); | ||
const sep = section.hasEmptyLines || section.words.length ? '\n' : ''; | ||
return header + words + sep; | ||
} | ||
|
||
function wordListHeaderToString(header: WordListHeaderEntry | undefined): string { | ||
if (header?.comment) { | ||
return header.comment + '\n'; | ||
} | ||
return ''; | ||
} | ||
|
||
function wordEntryToString(entry: WordListWordEntry): string { | ||
return entry.word + (entry.comment ? ' ' + entry.comment : '') + '\n'; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import { describe, expect, test } from 'vitest'; | ||
|
||
import { createWordList } from './wordList.mjs'; | ||
|
||
describe('wordList', () => { | ||
test('createWordList sampleWordListFormatted does not change.', () => { | ||
const wordList = createWordList(sampleWordListFormatted()); | ||
wordList.sort(); | ||
expect(wordList.toString()).toBe(sampleWordListFormatted()); | ||
}); | ||
|
||
test('sort', () => { | ||
const wordList = createWordList(sampleUnsortedWordListFormatted()); | ||
wordList.sort(); | ||
expect(wordList.toString()).toBe(sampleWordListFormatted()); | ||
}); | ||
|
||
test('addWords', () => { | ||
const wordList = createWordList(sampleWordListFormatted()); | ||
wordList.addWords(['red', 'blue', 'green', 'white']); | ||
wordList.sort(); | ||
expect(wordList.toString()).toBe(sampleWordListFormatted() + 'white\n'); | ||
expect(wordList.words).toContain('white'); | ||
expect(wordList.entries).toEqual(expect.arrayContaining([{ word: 'white' }, { word: 'cherry', comment: '# a small fruit' }])); | ||
}); | ||
|
||
test('removeWords', () => { | ||
const wordList = createWordList(sampleWordListFormatted()); | ||
wordList.removeWords(['red', 'blue', 'orange']); | ||
wordList.sort(); | ||
expect(wordList.toString()).toBe(sampleWordListFormatted().replaceAll(/^(orange|red|blue)\b.*\n/gm, '')); | ||
}); | ||
}); | ||
|
||
function sampleWordListFormatted() { | ||
return `\ | ||
# This is a list of terms used by our project. | ||
# Please add terms into the appropriate section. | ||
# they will get automatically sorted and deduplicated. | ||
# Fruit | ||
apple | ||
banana | ||
cherry # a small fruit | ||
orange # both a fruit and a color | ||
# Colors | ||
blue | ||
green | ||
orange # both a fruit and a color | ||
red | ||
yellow | ||
# API terms | ||
# none yet | ||
# People | ||
Alice | ||
Bob | ||
Charlie | ||
# New terms not yet placed | ||
`; | ||
} | ||
|
||
function sampleUnsortedWordListFormatted() { | ||
return `\ | ||
# This is a list of terms used by our project. | ||
# Please add terms into the appropriate section. | ||
# they will get automatically sorted and deduplicated. | ||
# Fruit | ||
banana | ||
cherry # a small fruit | ||
orange # both a fruit and a color | ||
apple | ||
# Colors | ||
red | ||
yellow | ||
blue | ||
green | ||
orange # both a fruit and a color | ||
# API terms | ||
# none yet | ||
# People | ||
Charlie | ||
Alice | ||
Bob | ||
# New terms not yet placed | ||
`; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,13 @@ wordc | |
wordd | ||
worde | ||
|
||
terma | ||
termb | ||
termc | ||
aterm | ||
|
||
white | ||
|
||
Here is text from `dict`: | ||
|
||
Compaknee. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,41 @@ | ||
# This is a list of terms used by our project. | ||
# Please add terms into the appropriate section. | ||
# they will get automatically sorted and deduplicated. | ||
|
||
# Fruit | ||
apple | ||
banana | ||
cherry # a small fruit | ||
orange # both a fruit and a color | ||
|
||
# Colors | ||
blue | ||
green | ||
orange # both a fruit and a color | ||
red | ||
yellow | ||
|
||
# API terms | ||
# none yet | ||
|
||
# People | ||
Alice | ||
Bob | ||
Charlie | ||
|
||
# Word variants | ||
word | ||
worda | ||
wordb | ||
wordc | ||
wordd | ||
worde | ||
|
||
# More terms | ||
aterm | ||
termA # with comment | ||
termb | ||
termc | ||
|
||
# New Terms | ||
white |