diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index a7156b71d963..3212ac56f5b3 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -18,6 +18,7 @@ on: - 'packages/**/package.json' - 'packages/**/*-lock.yaml' - 'packages/**/*.ts' + - 'packages/**/*.mts' - 'integration-tests/**' - '!integration-tests/perf/**' - 'package.json' @@ -149,9 +150,14 @@ jobs: 'integration-tests/config/repositories/${{matrix.repo}}/**', 'integration-tests/snapshots/${{ matrix.repo }}/*', 'integration-tests/repositories/*', - 'integration-tests/src/**/*.ts', 'integration-tests/tsconfig.json', - 'packages/*/src/**/*.ts', 'packages/*/tsconfig.json', + 'integration-tests/src/**/*.ts', + 'integration-tests/src/**/*.mts', + 'integration-tests/tsconfig.json', + 'packages/*/src/**/*.ts', + 'packages/*/src/**/*.mts', + 'packages/*/tsconfig.json', 'packages/*/*.ts', + 'packages/*/*.mts', 'tools/perf-chart/lib/app.cjs', '*-lock.yaml' ) }} diff --git a/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.ts b/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.ts index ab9822d3ae31..91f779f4e8f4 100644 --- a/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.ts +++ b/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.ts @@ -211,18 +211,25 @@ function* outerWordForms(word: string, mapWord: (word: string) => string[]): Ite // Only generate the needed forms. const sent = new Set(); let w = word; + const ww = w; yield w; sent.add(w); w = word.normalize('NFC'); - if (!sent.has(w)) yield w; - sent.add(w); + if (w !== ww) { + yield w; + sent.add(w); + } w = word.normalize('NFD'); - if (!sent.has(w)) yield w; - sent.add(w); - for (const f of [...sent]) { + if (w !== ww && !sent.has(w)) { + yield w; + sent.add(w); + } + for (const f of sent) { for (const m of mapWord(f)) { - if (!sent.has(m)) yield m; - sent.add(m); + if (m !== ww && !sent.has(m)) { + yield m; + sent.add(m); + } } } return; diff --git a/packages/cspell-dictionary/src/perf/has.perf.ts b/packages/cspell-dictionary/src/perf/has.perf.ts index 66687336d324..03c6190c6330 100644 --- a/packages/cspell-dictionary/src/perf/has.perf.ts +++ b/packages/cspell-dictionary/src/perf/has.perf.ts @@ -18,19 +18,20 @@ suite('dictionary has', async (test) => { const dict3 = createSpellingDictionary(words3, 'test3', import.meta.url); const dictCol = createCollection([dict, dict2, dict3], 'test-collection'); + const dictColRev = createCollection([dict3, dict2, dict], 'test-collection-reverse'); test('dictionary has 100k words', () => { checkWords(dict, words); }); - test('dictionary has 100k words (2nd time)', () => { - checkWords(dict, words); - }); - test('collection has 100k words', () => { checkWords(dictCol, words); }); + test('collection reverse has 100k words', () => { + checkWords(dictColRev, words); + }); + test('iTrie has 100k words', () => { checkWords(iTrie, words); }); @@ -61,10 +62,6 @@ suite('dictionary has Not', async (test) => { checkWords(dict, missingWords, false); }); - test('dictionary has not 100k words (2nd time)', () => { - checkWords(dict, missingWords, false); - }); - test('collection has not 100k words', () => { checkWords(dictCol, missingWords, false); }); diff --git a/packages/cspell-lib/api/api.d.ts b/packages/cspell-lib/api/api.d.ts index 4b64e85130e1..5022a8f27f34 100644 --- a/packages/cspell-lib/api/api.d.ts +++ b/packages/cspell-lib/api/api.d.ts @@ -533,6 +533,11 @@ interface ExtendedSuggestion { * The suggested word adjusted to match the original case. */ wordAdjustedToMatchCase?: string; + /** + * The cost of using this word. + * The lower the cost, the better the suggestion. + */ + cost?: number; } interface ValidationResult extends TextOffset, Pick { diff --git a/packages/cspell-lib/src/lib/Models/Suggestion.ts b/packages/cspell-lib/src/lib/Models/Suggestion.ts index 9d4304279035..7c01deac0cde 100644 --- a/packages/cspell-lib/src/lib/Models/Suggestion.ts +++ b/packages/cspell-lib/src/lib/Models/Suggestion.ts @@ -11,4 +11,9 @@ export interface ExtendedSuggestion { * The suggested word adjusted to match the original case. */ wordAdjustedToMatchCase?: string; + /** + * The cost of using this word. + * The lower the cost, the better the suggestion. + */ + cost?: number; } diff --git a/packages/cspell-lib/src/lib/suggestions.test.ts b/packages/cspell-lib/src/lib/suggestions.test.ts index 89a3ae879dac..d9dbcd9cfd53 100644 --- a/packages/cspell-lib/src/lib/suggestions.test.ts +++ b/packages/cspell-lib/src/lib/suggestions.test.ts @@ -1,6 +1,6 @@ import { describe, expect, test } from 'vitest'; -import type { SuggestionOptions } from './suggestions.js'; +import type { SuggestedWord, SuggestionOptions } from './suggestions.js'; import { SuggestionError, suggestionsForWord, suggestionsForWords } from './suggestions.js'; import { asyncIterableToArray } from './util/util.js'; @@ -22,6 +22,7 @@ describe('suggestions', () => { ${'apple'} | ${undefined} | ${{ dictionaries: ['en-gb'] }} | ${ac([sug('apple', 0, ['en_us', 'en-gb']), sug('Apple', 1, ['en_us', 'companies'])])} `( 'suggestionsForWord default settings word: "$word", opts: $options, settings: $settings', + { timeout }, async ({ word, options, settings, expected }) => { const results = await suggestionsForWord(word, options, settings); expect(results.word).toEqual(word); @@ -32,7 +33,6 @@ describe('suggestions', () => { expect(resultsAsync[0].word).toEqual(word); expect(resultsAsync[0].suggestions).toEqual(expected); }, - { timeout }, ); test.each` @@ -40,18 +40,42 @@ describe('suggestions', () => { ${'apple'} | ${opt({ dictionaries: ['unknown'] })} | ${undefined} `( 'suggestionsForWord ERRORS word: "$word", opts: $options, settings: $settings', + { timeout }, async ({ word, options, settings }) => { await expect(suggestionsForWord(word, options, settings)).rejects.toThrow(SuggestionError); }, - { timeout }, ); +}); - function opt(opt: Partial): SuggestionOptions { - return opt; - } +describe('Suggestions English', async () => { + // const configLoader = getDefaultConfigLoaderInternal(); + // const settings = await configLoader.getGlobalSettingsAsync(); - function sug(word: string, cost: number, dicts: string[]) { - const dictionaries = [...dicts].sort(); - return oc({ word, cost, dictionaries }); - } + // cspell:ignore orangges + test('Orangges', async () => { + const results = await suggestionsForWord('orangges', { languageId: 'typescript', locale: 'en-US' }, {}); + expect(results.suggestions).toEqual([ + sug('oranges', 100), + sug('ranges', 185), + sug('orangs', 190), + sug('orange', 200), + sug('orangey', 200), + sug('orangier', 200), + sug('orangiest'), + sug('Orange', 201), + ]); + }); }); + +function opt(opt: Partial): SuggestionOptions { + return opt; +} + +function sug(word: string, cost?: number, dicts?: string[]) { + const suggestedWord: Partial = { word }; + if (cost !== undefined) suggestedWord.cost = cost; + if (dicts) { + suggestedWord.dictionaries = [...dicts].sort(); + } + return oc(suggestedWord); +} diff --git a/packages/cspell-lib/src/lib/textValidation/__snapshots__/docValidator.test.ts.snap b/packages/cspell-lib/src/lib/textValidation/__snapshots__/docValidator.test.ts.snap new file mode 100644 index 000000000000..31eb2e16e66f --- /dev/null +++ b/packages/cspell-lib/src/lib/textValidation/__snapshots__/docValidator.test.ts.snap @@ -0,0 +1,34 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`docValidator suggestions > suggestions 1`] = ` +[ + { + "word": "oranges", + "wordAdjustedToMatchCase": "Oranges", + }, + { + "word": "orange", + }, + { + "word": "Orange", + }, + { + "word": "orangs", + "wordAdjustedToMatchCase": "Orangs", + }, + { + "word": "orange's", + }, + { + "word": "Orange's", + }, + { + "word": "ranges", + "wordAdjustedToMatchCase": "Ranges", + }, + { + "word": "orangier", + "wordAdjustedToMatchCase": "Orangier", + }, +] +`; diff --git a/packages/cspell-lib/src/lib/textValidation/docValidator.test.ts b/packages/cspell-lib/src/lib/textValidation/docValidator.test.ts index 17b6075081b3..78bafe9b98b8 100644 --- a/packages/cspell-lib/src/lib/textValidation/docValidator.test.ts +++ b/packages/cspell-lib/src/lib/textValidation/docValidator.test.ts @@ -283,6 +283,27 @@ describe('docValidator trace', () => { }); }); +describe('docValidator suggestions', () => { + test('suggestions', async () => { + const doc = td(__filename, sampleCode()); + const dVal = new DocumentValidator(doc, { generateSuggestions: true, numSuggestions: 8 }, {}); + await dVal.prepare(); + const issues = dVal.checkDocument(); + expect(issues).toHaveLength(1); + expect(issues[0].suggestionsEx).toMatchSnapshot(); + }); +}); + +function sampleCode() { + // cspell:ignore Orangges + const text = ` +export function remainingOrangges(count: number): number { + return count % 42; +} +`; + return text; +} + function extractRawText(text: string, issues: ValidationIssue[]): string[] { return issues.map((issue) => { const start = issue.offset; diff --git a/packages/cspell-tools/package.json b/packages/cspell-tools/package.json index 244871c8084c..26eb222cc27a 100644 --- a/packages/cspell-tools/package.json +++ b/packages/cspell-tools/package.json @@ -63,7 +63,6 @@ "node": ">=18" }, "devDependencies": { - "@types/glob": "^8.1.0", "lorem-ipsum": "^2.0.8", "ts-json-schema-generator": "^2.3.0" }, diff --git a/packages/cspell-trie-lib/api/api.d.ts b/packages/cspell-trie-lib/api/api.d.ts index 2fad4e472914..94e18917617d 100644 --- a/packages/cspell-trie-lib/api/api.d.ts +++ b/packages/cspell-trie-lib/api/api.d.ts @@ -80,6 +80,11 @@ interface TrieInfo { forbiddenWordPrefix: string; isCaseAware: boolean; } +interface TrieCharacteristics { + hasForbiddenWords: boolean; + hasCompoundWords: boolean; + hasNonStrictWords: boolean; +} type PartialTrieInfo = PartialWithUndefined | undefined; interface FindResult$1 { @@ -117,18 +122,16 @@ interface ITrieNode { readonly id: ITrieNodeId; /** flag End of Word */ readonly eow: boolean; - /** number of children */ - readonly size: number; /** get keys to children */ - keys(): readonly string[]; + keys(): Iterable; /** get keys to children */ - values(): readonly ITrieNode[]; + values(): Iterable; /** get the children as key value pairs */ - entries(): readonly Entry[]; + entries(): Iterable; /** get child ITrieNode */ get(char: string): ITrieNode | undefined; - /** get a child by the key index */ - child(idx: number): ITrieNode; + /** get a nested child ITrieNode */ + getNode?: (chars: string) => ITrieNode | undefined; /** has child */ has(char: string): boolean; /** `true` iff this node has children */ @@ -137,13 +140,13 @@ interface ITrieNode { findExact?: ((word: string) => boolean) | undefined; } interface ITrieNodeRoot extends ITrieNode { - info: Readonly; + readonly info: Readonly; /** * converts an `id` into a node. * @param id an of a ITrieNode in this Trie */ resolveId(id: ITrieNodeId): ITrieNode; - findExact?: ((word: string) => boolean) | undefined; + findExact: ((word: string) => boolean) | undefined; /** * Try to find a word. * @param word - the normalized word to look up. @@ -152,9 +155,12 @@ interface ITrieNodeRoot extends ITrieNode { */ find?: ((word: string, strict: boolean) => FindResult$1 | undefined) | undefined; isForbidden?: ((word: string) => boolean) | undefined; - forbidPrefix: string; - compoundFix: string; - caseInsensitivePrefix: string; + readonly forbidPrefix: string; + readonly compoundFix: string; + readonly caseInsensitivePrefix: string; + readonly hasForbiddenWords: boolean; + readonly hasCompoundWords: boolean; + readonly hasNonStrictWords: boolean; } declare const FLAG_WORD = 1; @@ -376,8 +382,8 @@ declare function suggestionCollector(wordToMatch: string, options: SuggestionCol */ declare function impersonateCollector(collector: SuggestionCollector, word: string): SuggestionCollector; -interface TrieData { - info: Readonly; +interface TrieData extends Readonly { + readonly info: Readonly; /** Method used to split words into individual characters. */ wordToCharacters(word: string): readonly string[]; /** get an iterable for all the words in the dictionary. */ @@ -386,8 +392,10 @@ interface TrieData { getNode(prefix: string): ITrieNode | undefined; has(word: string): boolean; isForbiddenWord(word: string): boolean; - hasForbiddenWords(): boolean; - size: number; + readonly hasForbiddenWords: boolean; + readonly hasCompoundWords: boolean; + readonly hasNonStrictWords: boolean; + readonly size: number; } interface ITrie { @@ -463,12 +471,16 @@ interface ITrie { * On the returned Iterator, calling .next(goDeeper: boolean), allows for controlling the depth. */ iterate(): WalkerIterator; - weightMap: WeightMap | undefined; - get isCaseAware(): boolean; + readonly weightMap: WeightMap | undefined; + readonly isCaseAware: boolean; + readonly hasForbiddenWords: boolean; + readonly hasCompoundWords: boolean; + readonly hasNonStrictWords: boolean; } interface FindWordOptions$1 { caseSensitive?: boolean; useLegacyWordCompounds?: boolean | number; + checkForbidden?: boolean; } declare function buildITrieFromWords(words: Iterable, info?: PartialTrieInfo): ITrie; diff --git a/packages/cspell-trie-lib/src/lib/ITrie.ts b/packages/cspell-trie-lib/src/lib/ITrie.ts index a96c0649a63c..847e03ecbaea 100644 --- a/packages/cspell-trie-lib/src/lib/ITrie.ts +++ b/packages/cspell-trie-lib/src/lib/ITrie.ts @@ -108,24 +108,33 @@ export interface ITrie { */ iterate(): WalkerIterator; - weightMap: WeightMap | undefined; - - get isCaseAware(): boolean; + readonly weightMap: WeightMap | undefined; + readonly isCaseAware: boolean; + readonly hasForbiddenWords: boolean; + readonly hasCompoundWords: boolean; + readonly hasNonStrictWords: boolean; } export class ITrieImpl implements ITrie { private _info: TrieInfo; - private hasForbidden: boolean; private root: ITrieNodeRoot; private count?: number; weightMap: WeightMap | undefined; + #optionsCompound = this.createFindOptions({ compoundMode: 'compound' }); + + readonly hasForbiddenWords: boolean; + readonly hasCompoundWords: boolean; + readonly hasNonStrictWords: boolean; + constructor( readonly data: TrieData, private numNodes?: number, ) { this.root = data.getRoot(); this._info = mergeOptionalWithDefaults(data.info); - this.hasForbidden = data.hasForbiddenWords(); + this.hasForbiddenWords = data.hasForbiddenWords; + this.hasCompoundWords = data.hasCompoundWords; + this.hasNonStrictWords = data.hasNonStrictWords; } /** @@ -157,8 +166,7 @@ export class ITrieImpl implements ITrie { * @param text - text to find in the Trie */ find(text: string): ITrieNode | undefined { - const options = this.createFindOptions({ compoundMode: 'compound' }); - return findWordNode(this.data.getRoot(), text, options).node; + return findWordNode(this.data.getRoot(), text, this.#optionsCompound).node; } has(word: string, minLegacyCompoundLength?: boolean | number): boolean { @@ -177,8 +185,8 @@ export class ITrieImpl implements ITrie { * @returns true if the word was found and is not forbidden. */ hasWord(word: string, caseSensitive: boolean): boolean { - const f = this.findWord(word, { caseSensitive }); - return !!f.found && !f.forbidden; + const f = this.findWord(word, { caseSensitive, checkForbidden: false }); + return !!f.found; } findWord(word: string, options?: FindWordOptions): FindFullResult { @@ -193,8 +201,10 @@ export class ITrieImpl implements ITrie { }); return findLegacyCompound(this.root, word, findOptions); } - const findOptions = this.createFindOptionsMatchCase(options?.caseSensitive); - return findWord(this.root, word, findOptions); + return findWord(this.root, word, { + matchCase: options?.caseSensitive, + checkForbidden: options?.checkForbidden, + }); } /** @@ -202,7 +212,7 @@ export class ITrieImpl implements ITrie { * @param word the word to lookup. */ isForbiddenWord(word: string): boolean { - return this.hasForbidden && isForbiddenWord(this.root, word, this.info.forbiddenWordPrefix); + return this.hasForbiddenWords && isForbiddenWord(this.root, word, this.info.forbiddenWordPrefix); } /** @@ -288,17 +298,9 @@ export class ITrieImpl implements ITrie { const findOptions = createFindOptions(options); return findOptions; } - - private lastCreateFindOptionsMatchCaseMap = new Map(); - private createFindOptionsMatchCase(matchCase: boolean | undefined) { - const f = this.lastCreateFindOptionsMatchCaseMap.get(matchCase); - if (f !== undefined) return f; - const findOptions = this.createFindOptions({ matchCase }); - this.lastCreateFindOptionsMatchCaseMap.set(matchCase, findOptions); - return findOptions; - } } export interface FindWordOptions { caseSensitive?: boolean; useLegacyWordCompounds?: boolean | number; + checkForbidden?: boolean; } diff --git a/packages/cspell-trie-lib/src/lib/ITrieNode/FindOptions.ts b/packages/cspell-trie-lib/src/lib/ITrieNode/FindOptions.ts index b5063322f11f..eb0869408260 100644 --- a/packages/cspell-trie-lib/src/lib/ITrieNode/FindOptions.ts +++ b/packages/cspell-trie-lib/src/lib/ITrieNode/FindOptions.ts @@ -4,7 +4,8 @@ import type { CompoundModes } from './CompoundModes.js'; export interface FindOptions { matchCase: boolean; compoundMode: CompoundModes; - legacyMinCompoundLength?: number; + legacyMinCompoundLength?: number | undefined; + checkForbidden?: boolean | undefined; } export type PartialFindOptions = PartialWithUndefined | undefined; diff --git a/packages/cspell-trie-lib/src/lib/ITrieNode/ITrieNode.ts b/packages/cspell-trie-lib/src/lib/ITrieNode/ITrieNode.ts index 3d7f002b9e87..7d544515a74f 100644 --- a/packages/cspell-trie-lib/src/lib/ITrieNode/ITrieNode.ts +++ b/packages/cspell-trie-lib/src/lib/ITrieNode/ITrieNode.ts @@ -39,18 +39,16 @@ export interface ITrieNode { readonly id: ITrieNodeId; /** flag End of Word */ readonly eow: boolean; - /** number of children */ - readonly size: number; /** get keys to children */ - keys(): readonly string[]; + keys(): Iterable; /** get keys to children */ - values(): readonly ITrieNode[]; + values(): Iterable; /** get the children as key value pairs */ - entries(): readonly Entry[]; + entries(): Iterable; /** get child ITrieNode */ get(char: string): ITrieNode | undefined; - /** get a child by the key index */ - child(idx: number): ITrieNode; + /** get a nested child ITrieNode */ + getNode?: (chars: string) => ITrieNode | undefined; /** has child */ has(char: string): boolean; /** `true` iff this node has children */ @@ -60,14 +58,14 @@ export interface ITrieNode { } export interface ITrieNodeRoot extends ITrieNode { - info: Readonly; + readonly info: Readonly; /** * converts an `id` into a node. * @param id an of a ITrieNode in this Trie */ resolveId(id: ITrieNodeId): ITrieNode; - findExact?: ((word: string) => boolean) | undefined; + findExact: ((word: string) => boolean) | undefined; /** * Try to find a word. * @param word - the normalized word to look up. @@ -78,7 +76,11 @@ export interface ITrieNodeRoot extends ITrieNode { isForbidden?: ((word: string) => boolean) | undefined; - forbidPrefix: string; - compoundFix: string; - caseInsensitivePrefix: string; + readonly forbidPrefix: string; + readonly compoundFix: string; + readonly caseInsensitivePrefix: string; + + readonly hasForbiddenWords: boolean; + readonly hasCompoundWords: boolean; + readonly hasNonStrictWords: boolean; } diff --git a/packages/cspell-trie-lib/src/lib/ITrieNode/TrieInfo.ts b/packages/cspell-trie-lib/src/lib/ITrieNode/TrieInfo.ts index 38b5eff116a2..6f5330d96a31 100644 --- a/packages/cspell-trie-lib/src/lib/ITrieNode/TrieInfo.ts +++ b/packages/cspell-trie-lib/src/lib/ITrieNode/TrieInfo.ts @@ -6,4 +6,11 @@ export interface TrieInfo { forbiddenWordPrefix: string; isCaseAware: boolean; } + +export interface TrieCharacteristics { + hasForbiddenWords: boolean; + hasCompoundWords: boolean; + hasNonStrictWords: boolean; +} + export type PartialTrieInfo = PartialWithUndefined | undefined; diff --git a/packages/cspell-trie-lib/src/lib/ITrieNode/find.test.ts b/packages/cspell-trie-lib/src/lib/ITrieNode/find.test.ts index 33722dae6078..62a124cd0d86 100644 --- a/packages/cspell-trie-lib/src/lib/ITrieNode/find.test.ts +++ b/packages/cspell-trie-lib/src/lib/ITrieNode/find.test.ts @@ -27,12 +27,19 @@ describe('Validate findWord', () => { expect(findWord(trie, word, opts)).toEqual(expected); }); + const ncf = { checkForbidden: false }; + const tests: [string, PartialFindOptions, FindFullResult][] = [ [ 'errorCodes', - { matchCase: false, compoundMode: 'compound' }, + { matchCase: false, compoundMode: 'compound', checkForbidden: true }, frCompoundFound('errorCodes', { forbidden: false }), ], + [ + 'errorCodes', + { matchCase: false, compoundMode: 'compound', checkForbidden: false }, + frCompoundFound('errorCodes', { forbidden: undefined }), + ], [ 'errorcodes', { matchCase: false, compoundMode: 'compound' }, @@ -43,6 +50,12 @@ describe('Validate findWord', () => { ['cafe', { matchCase: true, compoundMode: 'none' }, frNotFound({ forbidden: false })], ['café', { matchCase: true, compoundMode: 'none' }, frFound('café', { forbidden: false })], + // Do not check forbidden words. + ['Code', { matchCase: true, compoundMode: 'none', ...ncf }, frNotFound()], + ['code', { matchCase: true, compoundMode: 'none', ...ncf }, frFound('code')], + ['cafe', { matchCase: true, compoundMode: 'none', ...ncf }, frNotFound()], + ['café', { matchCase: true, compoundMode: 'none', ...ncf }, frFound('café')], + // non-normalized words ['café', { matchCase: false, compoundMode: 'none' }, frFound('café')], ['Café', { matchCase: false, compoundMode: 'none' }, frNotFound()], @@ -156,12 +169,8 @@ describe('Validate Legacy Compound lookup', () => { type PartialFindFullResult = Partial; -function fr({ - found = false, - forbidden = undefined, - compoundUsed = false, - caseMatched = true, -}: PartialFindFullResult): FindFullResult { +function fr(r: PartialFindFullResult): FindFullResult { + const { found = false, forbidden = undefined, compoundUsed = false, caseMatched = true } = r; return { found, forbidden, diff --git a/packages/cspell-trie-lib/src/lib/ITrieNode/find.ts b/packages/cspell-trie-lib/src/lib/ITrieNode/find.ts index e31f1d828e98..75c8d4c4554f 100644 --- a/packages/cspell-trie-lib/src/lib/ITrieNode/find.ts +++ b/packages/cspell-trie-lib/src/lib/ITrieNode/find.ts @@ -1,5 +1,4 @@ import { memorizeLastCall } from '../utils/memorizeLastCall.js'; -import { mergeDefaults } from '../utils/mergeDefaults.js'; import type { CompoundModes } from './CompoundModes.js'; import type { FindOptions, PartialFindOptions } from './FindOptions.js'; import type { FindFullNodeResult } from './FindTypes.js'; @@ -20,15 +19,8 @@ Object.freeze(_defaultFindOptions); const arrayCompoundModes: CompoundModes[] = ['none', 'compound', 'legacy']; const knownCompoundModes = new Map(arrayCompoundModes.map((a) => [a, a])); -/** - * - * @param root Trie root node. root.c contains the compound root and forbidden root. - * @param word A pre normalized word use `normalizeWord` or `normalizeWordToLowercase` - * @param options - */ -export function findWord(root: Root, word: string, options?: PartialFindOptions): FindFullResult { - return _findWord(root, word, options); -} +const notFound: FindFullResult = { found: false, compoundUsed: false, caseMatched: false, forbidden: undefined }; +Object.freeze(notFound); /** * @@ -46,13 +38,22 @@ export function findWordNode(root: Root, word: string, options?: PartialFindOpti * @param word A pre normalized word use `normalizeWord` or `normalizeWordToLowercase` * @param options */ -function _findWord(root: Root, word: string, options: PartialFindOptions): FindFullResult { +export function findWord(root: Root, word: string, options?: PartialFindOptions): FindFullResult { if (root.find) { const found = root.find(word, options?.matchCase || false); if (found) return found as FindFullResult; + if (!root.hasCompoundWords) { + return notFound; + } } - const { node: _, ...result } = _findWordNode(root, word, options); - return result; + // return { found: false, compoundUsed: false, caseMatched: false, forbidden: false }; + const v = _findWordNode(root, word, options); + return { + found: v.found, + compoundUsed: v.compoundUsed, + caseMatched: v.caseMatched, + forbidden: v.forbidden, + }; } /** @@ -67,6 +68,7 @@ function _findWordNode(root: Root, word: string, options: PartialFindOptions): F const compoundMode = knownCompoundModes.get(options?.compoundMode) || _defaultFindOptions.compoundMode; const compoundPrefix = compoundMode === 'compound' ? (trieInfo.compoundCharacter ?? root.compoundFix) : ''; const ignoreCasePrefix = matchCase ? '' : (trieInfo.stripCaseAndAccentsPrefix ?? root.caseInsensitivePrefix); + const checkForbidden = options?.checkForbidden ?? true; function __findCompound(): FindFullNodeResult { const f = findCompoundWord(root, word, compoundPrefix, ignoreCasePrefix); @@ -75,18 +77,18 @@ function _findWordNode(root: Root, word: string, options: PartialFindOptions): F // If case was ignored when searching for the word, then check the forbidden // in the ignore case forbidden list. const r = !f.caseMatched ? walk(root, root.caseInsensitivePrefix) : root; - result.forbidden = isForbiddenWord(r, word, root.forbidPrefix); + result.forbidden = checkForbidden ? isForbiddenWord(r, word, root.forbidPrefix) : undefined; } return result; } function __findExact(): FindFullNodeResult { - const n = walk(root, word); + const n = root.getNode ? root.getNode(word) : walk(root, word); const isFound = isEndOfWordNode(n); const result: FindFullNodeResult = { found: isFound && word, compoundUsed: false, - forbidden: isForbiddenWord(root, word, root.forbidPrefix), + forbidden: checkForbidden ? isForbiddenWord(root, word, root.forbidPrefix) : undefined, node: n, caseMatched: true, }; @@ -134,7 +136,8 @@ export function findCompoundNode( ]; const compoundPrefix = compoundCharacter || ignoreCasePrefix; const possibleCompoundPrefix = ignoreCasePrefix && compoundCharacter ? ignoreCasePrefix + compoundCharacter : ''; - const w = word.normalize(); + const nw = word.normalize(); + const w = [...nw]; function determineRoot(s: FindCompoundChain): FindCompoundChain { const prefix = s.compoundPrefix; @@ -161,7 +164,7 @@ export function findCompoundNode( const s = stack[i]; const h = w[i++]; const n = s.cr || s.n; - const c = n?.get(h); + const c = (h && n?.get(h)) || undefined; if (c && i < word.length) { // Go deeper. caseMatched = s.caseMatched; @@ -183,7 +186,7 @@ export function findCompoundNode( if (!r.cr) { break; } - if (!i && !r.caseMatched && w !== w.toLowerCase()) { + if (!i && !r.caseMatched && nw !== nw.toLowerCase()) { // It is not going to be found. break; } @@ -197,7 +200,7 @@ export function findCompoundNode( } } - const found = (i && i === word.length && word) || false; + const found = (i === word.length && word) || false; const result: FindFullNodeResult = { found, compoundUsed, node, forbidden: undefined, caseMatched }; return result; } @@ -355,7 +358,13 @@ export const createFindOptions = memorizeLastCall(_createFindOptions); function _createFindOptions(options: PartialFindOptions | undefined): FindOptions { if (!options) return _defaultFindOptions; - return mergeDefaults(options, _defaultFindOptions); + const d = _defaultFindOptions; + return { + matchCase: options.matchCase ?? d.matchCase, + compoundMode: options.compoundMode ?? d.compoundMode, + legacyMinCompoundLength: options.legacyMinCompoundLength ?? d.legacyMinCompoundLength, + checkForbidden: options.checkForbidden ?? d.checkForbidden, + }; } export const __testing__ = { diff --git a/packages/cspell-trie-lib/src/lib/ITrieNode/trie-util.ts b/packages/cspell-trie-lib/src/lib/ITrieNode/trie-util.ts index 5cb251a0fac3..bc2f99ed4979 100644 --- a/packages/cspell-trie-lib/src/lib/ITrieNode/trie-util.ts +++ b/packages/cspell-trie-lib/src/lib/ITrieNode/trie-util.ts @@ -42,8 +42,8 @@ export function countNodes(root: ITrieNode): number { function walk(n: ITrieNode) { if (seen.has(n.id)) return; seen.add(n.id); - for (let i = 0; i < n.size; ++i) { - walk(n.child(i)); + for (const c of n.values()) { + walk(c); } } @@ -64,9 +64,8 @@ export function countWords(root: ITrieNode): number { // add the node to the set to avoid getting stuck on circular references. visited.set(n, cnt); - const size = n.size; - for (let i = 0; i < size; ++i) { - cnt += walk(n.child(i)); + for (const c of n.values()) { + cnt += walk(c); } visited.set(n, cnt); return cnt; diff --git a/packages/cspell-trie-lib/src/lib/ITrieNode/walker/hintedWalker.ts b/packages/cspell-trie-lib/src/lib/ITrieNode/walker/hintedWalker.ts index 23660c9ed9ba..4b69646a9240 100644 --- a/packages/cspell-trie-lib/src/lib/ITrieNode/walker/hintedWalker.ts +++ b/packages/cspell-trie-lib/src/lib/ITrieNode/walker/hintedWalker.ts @@ -86,8 +86,8 @@ function* hintedWalkerNext( // We don't want to suggest the compound character. hints.add(compoundCharacter); // Then yield everything else. - yield* n - .entries() + const entries = n.entries(); + yield* (Array.isArray(entries) ? entries : [...entries]) .filter((a) => !hints.has(a[0])) .map(([letter, node]) => ({ letter, @@ -154,8 +154,8 @@ class ITrieNodeFiltered implements ITrieNode { readonly id: ITrieNodeId; readonly eow: boolean; readonly size: number; - private filtered: (readonly [string, number])[]; - private keyMap: Map; + private filtered: (readonly [string, ITrieNode])[]; + private keyMap: Map; constructor( private srcNode: ITrieNode, @@ -163,10 +163,10 @@ class ITrieNodeFiltered implements ITrieNode { ) { this.id = srcNode.id; this.eow = srcNode.eow; - const keys = srcNode.keys(); - this.filtered = keys - .map((key, idx) => [key, idx] as const) - .filter(([key, idx]) => predicate(key, idx, srcNode)); + const entries = srcNode.entries(); + this.filtered = (Array.isArray(entries) ? entries : [...entries]).filter(([key], idx) => + predicate(key, idx, srcNode), + ); this.keyMap = new Map(this.filtered); this.size = this.keyMap.size; } @@ -176,16 +176,11 @@ class ITrieNodeFiltered implements ITrieNode { } values(): readonly ITrieNode[] { - return this.filtered.map(([_, idx]) => this.srcNode.child(idx)); - } - - child(idx: number): ITrieNode { - const [_, srcIdx] = this.filtered[idx]; - return this.srcNode.child(srcIdx); + return this.filtered.map(([, node]) => node); } entries(): readonly (readonly [string, ITrieNode])[] { - return this.filtered.map(([key, idx]) => [key, this.srcNode.child(idx)] as const); + return this.filtered; } has(char: string): boolean { @@ -197,8 +192,8 @@ class ITrieNodeFiltered implements ITrieNode { } get(char: string): ITrieNode | undefined { - const idx = this.keyMap.get(char); - if (idx === undefined) return undefined; - return this.srcNode.child(idx); + const node = this.keyMap.get(char); + if (node === undefined) return undefined; + return node; } } diff --git a/packages/cspell-trie-lib/src/lib/ITrieNode/walker/walker.ts b/packages/cspell-trie-lib/src/lib/ITrieNode/walker/walker.ts index 6e8e4ed413c6..ba976e22a3f1 100644 --- a/packages/cspell-trie-lib/src/lib/ITrieNode/walker/walker.ts +++ b/packages/cspell-trie-lib/src/lib/ITrieNode/walker/walker.ts @@ -19,7 +19,8 @@ function* compoundWalker(root: ITrieNode, compoundingMethod: CompoundWordsMethod function children(n: ITrieNode): Children { if (n.hasChildren()) { - const c = n.keys().map((k, i) => [k, n.child(i)] as const); + const entries = n.entries(); + const c = Array.isArray(entries) ? entries : [...entries]; return n.eow && rc ? [...c, ...rc] : c; } if (n.eow) { @@ -54,25 +55,26 @@ function* compoundWalker(root: ITrieNode, compoundingMethod: CompoundWordsMethod * next(goDeeper: boolean): */ function* nodeWalker(root: ITrieNode): WalkerIterator { - type Children = Readonly>; + type Children = Readonly>; let depth = 0; const stack: { t: string; n: ITrieNode; c: Children; ci: number }[] = []; - stack[depth] = { t: '', n: root, c: root.keys(), ci: 0 }; + const entries = root.entries(); + stack[depth] = { t: '', n: root, c: Array.isArray(entries) ? entries : [...entries], ci: 0 }; while (depth >= 0) { let s = stack[depth]; let baseText = s.t; while (s.ci < s.c.length && s.n) { const idx = s.ci++; - const char = s.c[idx]; - const node = s.n.child(idx); + const [char, node] = s.c[idx]; const text = baseText + char; const goDeeper = yield { text, node, depth }; if (goDeeper !== false) { depth++; baseText = text; const s = stack[depth]; - const c = node.keys(); + const entries = node.entries(); + const c = Array.isArray(entries) ? entries : [...entries]; if (s) { s.t = text; s.n = node; @@ -103,7 +105,7 @@ export function walkerWords(root: ITrieNode): Iterable { * Walks the Trie and yields each word. */ export function* walkerWordsITrie(root: ITrieNode): Iterable { - type Children = readonly string[]; + type Children = readonly [string, ITrieNode][]; interface Stack { t: string; n: ITrieNode; @@ -113,19 +115,21 @@ export function* walkerWordsITrie(root: ITrieNode): Iterable { let depth = 0; const stack: Stack[] = []; - stack[depth] = { t: '', n: root, c: root.keys(), ci: 0 }; + const entries = root.entries(); + const c = Array.isArray(entries) ? entries : [...entries]; + stack[depth] = { t: '', n: root, c, ci: 0 }; while (depth >= 0) { let s = stack[depth]; let baseText = s.t; while (s.ci < s.c.length && s.n) { - const char = s.c[s.ci++]; - const node = s.n.get(char); + const [char, node] = s.c[s.ci++]; if (!node) continue; const text = baseText + char; if (node.eow) yield text; depth++; baseText = text; - const c = node.keys(); + const entries = node.entries(); + const c = Array.isArray(entries) ? entries : [...entries]; if (stack[depth]) { s = stack[depth]; s.t = text; diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts index c832cc5c8907..d9452e6cbb69 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts @@ -1,11 +1,15 @@ import type { ITrieNode, ITrieNodeRoot } from '../ITrieNode/ITrieNode.js'; import { findNode } from '../ITrieNode/trie-util.js'; -import type { PartialTrieInfo, TrieInfo } from '../ITrieNode/TrieInfo.js'; +import type { TrieInfo } from '../ITrieNode/TrieInfo.js'; import type { TrieData } from '../TrieData.js'; -import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js'; import { CharIndex, Utf8Seq } from './CharIndex.js'; import { extractInfo, type FastTrieBlobBitMaskInfo } from './FastTrieBlobBitMaskInfo.js'; -import { assertSorted, FastTrieBlobInternals, sortNodes } from './FastTrieBlobInternals.js'; +import { + assertSorted, + FastTrieBlobInternals, + FastTrieBlobInternalsAndMethods, + sortNodes, +} from './FastTrieBlobInternals.js'; import { FastTrieBlobIRoot } from './FastTrieBlobIRoot.js'; import { TrieBlob } from './TrieBlob.js'; import { Utf8Accumulator } from './Utf8.js'; @@ -16,24 +20,29 @@ const checkSorted = false; export class FastTrieBlob implements TrieData { private _readonly = false; - private _forbidIdx: number; - private _caseInsensitiveIdx: number; + #forbidIdx: number; + #compoundIdx: number; + #nonStrictIdx: number; private _iTrieRoot: ITrieNodeRoot | undefined; wordToCharacters: (word: string) => readonly string[]; - // private nodes8: Uint8Array[]; - - readonly info: Readonly; + readonly hasForbiddenWords: boolean; + readonly hasCompoundWords: boolean; + readonly hasNonStrictWords: boolean; private constructor( private nodes: FastTrieBlobNode[], private _charIndex: CharIndex, readonly bitMasksInfo: FastTrieBlobBitMaskInfo, - options?: PartialTrieInfo, + readonly info: Readonly, ) { - this.info = mergeOptionalWithDefaults(options); this.wordToCharacters = (word: string) => [...word]; - this._forbidIdx = this._searchNodeForChar(0, this.info.forbiddenWordPrefix); - this._caseInsensitiveIdx = this._searchNodeForChar(0, this.info.stripCaseAndAccentsPrefix); + this.#forbidIdx = this.#searchNodeForChar(0, this.info.forbiddenWordPrefix) || 0; + this.#compoundIdx = this.#searchNodeForChar(0, this.info.compoundCharacter) || 0; + this.#nonStrictIdx = this.#searchNodeForChar(0, this.info.stripCaseAndAccentsPrefix) || 0; + + this.hasForbiddenWords = !!this.#forbidIdx; + this.hasCompoundWords = !!this.#compoundIdx; + this.hasNonStrictWords = !!this.#nonStrictIdx; if (checkSorted) { assertSorted(this.nodes, bitMasksInfo.NodeMaskChildCharIndex); @@ -49,31 +58,43 @@ export class FastTrieBlob implements TrieData { } has(word: string): boolean { - return this._has(0, word); + return this.#has(0, word); } hasCaseInsensitive(word: string): boolean { - if (!this._caseInsensitiveIdx) return false; - return this._has(this._caseInsensitiveIdx, word); + if (!this.#nonStrictIdx) return false; + return this.#has(this.#nonStrictIdx, word); } - private _has(nodeIdx: number, word: string): boolean { + #has(nodeIdx: number, word: string): boolean { return this.#hasSorted(nodeIdx, word); } #hasSorted(nodeIdx: number, word: string): boolean { + const charIndexes = this.wordToUtf8Seq(word); + const found = this.#lookupNode(nodeIdx, charIndexes); + if (found === undefined) return false; + const node = this.nodes[found]; + return !!(node[0] & this.bitMasksInfo.NodeMaskEOW); + } + + /** + * Find the node index for the given Utf8 character sequence. + * @param nodeIdx - node index to start the search + * @param seq - the byte sequence of the character to look for + * @returns + */ + #lookupNode(nodeIdx: number, seq: readonly number[] | Readonly): number | undefined { const NodeMaskChildCharIndex = this.bitMasksInfo.NodeMaskChildCharIndex; const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift; - const NodeMaskEOW = this.bitMasksInfo.NodeMaskEOW; const nodes = this.nodes; - const charIndexes = this.wordToUtf8Seq(word); - const len = charIndexes.length; + const len = seq.length; let node = nodes[nodeIdx]; for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) { - const letterIdx = charIndexes[p]; + const letterIdx = seq[p]; const count = node.length; // console.error('%o', { p, letterIdx, ...this.nodeInfo(nodeIdx) }); - if (count < 2) return false; + if (count < 2) return undefined; let i = 1; let j = count - 1; let c: number = -1; @@ -86,12 +107,12 @@ export class FastTrieBlob implements TrieData { j = m; } } - if (i >= count || (node[i] & NodeMaskChildCharIndex) !== letterIdx) return false; + if (i >= count || (node[i] & NodeMaskChildCharIndex) !== letterIdx) return undefined; nodeIdx = node[i] >>> NodeChildRefShift; - if (!nodeIdx) return false; + if (!nodeIdx) return undefined; } - return !!(node[0] & NodeMaskEOW); + return nodeIdx; } *words(): Iterable { @@ -190,19 +211,23 @@ export class FastTrieBlob implements TrieData { }; } - static create(data: FastTrieBlobInternals, options?: PartialTrieInfo) { - return new FastTrieBlob(data.nodes, data.charIndex, extractInfo(data), options); + static create(data: FastTrieBlobInternals) { + return new FastTrieBlob(data.nodes, data.charIndex, extractInfo(data), data.info); } static toITrieNodeRoot(trie: FastTrieBlob): ITrieNodeRoot { return new FastTrieBlobIRoot( - new FastTrieBlobInternals(trie.nodes, trie._charIndex, trie.bitMasksInfo), + new FastTrieBlobInternalsAndMethods(trie.nodes, trie._charIndex, trie.bitMasksInfo, trie.info, { + nodeFindNode: (idx: number, word: string) => trie.#lookupNode(idx, trie.wordToUtf8Seq(word)), + nodeFindExact: (idx: number, word: string) => trie.#has(idx, word), + nodeGetChild: (idx: number, letter: string) => trie.#searchNodeForChar(idx, letter), + isForbidden: (word: string) => trie.isForbiddenWord(word), + findExact: (word: string) => trie.has(word), + hasForbiddenWords: trie.hasForbiddenWords, + hasCompoundWords: trie.hasCompoundWords, + hasNonStrictWords: trie.hasNonStrictWords, + }), 0, - trie.info, - (word: string) => trie.has(word), - (word: string) => trie.isForbiddenWord(word), - (word: string) => trie.hasCaseInsensitive(word), - (idx: number, word: string) => trie._has(idx, word), ); } @@ -229,11 +254,7 @@ export class FastTrieBlob implements TrieData { } isForbiddenWord(word: string): boolean { - return !!this._forbidIdx && this._has(this._forbidIdx, word); - } - - hasForbiddenWords(): boolean { - return !!this._forbidIdx; + return !!this.#forbidIdx && this.#has(this.#forbidIdx, word); } nodeInfo(nodeIndex: number, accumulator?: Utf8Accumulator): TrieBlobNodeInfo { @@ -259,31 +280,10 @@ export class FastTrieBlob implements TrieData { return this.nodes.length; } - private _lookupCharIndexNode(nodeIdx: number, charIndex: number): number { - const NodeMaskChildCharIndex = this.bitMasksInfo.NodeMaskChildCharIndex; - const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift; - const nodes = this.nodes; - const node = nodes[nodeIdx]; - const letterIdx = charIndex; - const count = node.length; - let i = count - 1; - for (; i > 0; --i) { - if ((node[i] & NodeMaskChildCharIndex) === letterIdx) { - return node[i] >>> NodeChildRefShift; - } - } - return 0; - } - /** Search from nodeIdx for the node index representing the character. */ - private _searchNodeForChar(nodeIdx: number, char: string): number { + #searchNodeForChar(nodeIdx: number, char: string): number | undefined { const charIndexes = this.letterToUtf8Seq(char); - let idx = nodeIdx; - for (let i = 0; i < charIndexes.length; ++i) { - idx = this._lookupCharIndexNode(idx, charIndexes[i]); - if (!idx) return 0; - } - return idx; + return this.#lookupNode(nodeIdx, charIndexes); } get charIndex(): readonly string[] { diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.ts index 8a09d596addc..858c40305eb7 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.ts @@ -285,8 +285,8 @@ export class FastTrieBlobBuilder implements TrieBuilder { ), this.charIndex.build(), this.bitMasksInfo, + this.options, ), - this.options, ); } diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobIRoot.test.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobIRoot.test.ts index cc024a1ff74e..f2414a8f2570 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobIRoot.test.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobIRoot.test.ts @@ -36,12 +36,9 @@ describe('FastTrieBlob', () => { const root = createTrieRootFromList(words); const ft = FastTrieBlobBuilder.fromTrieRoot(root); const iTrieRoot = FastTrieBlob.toITrieNodeRoot(ft); - const keys = iTrieRoot.keys(); - const values = iTrieRoot.values(); + const keys = [...iTrieRoot.keys()]; + const values = [...iTrieRoot.values()]; expect(values.length).toBe(keys.length); - const valueIds = values.map((v) => v.id); - const idsFromLookUp = keys.map((_, i) => iTrieRoot.child(i).id); - expect(valueIds).toEqual(idsFromLookUp); }); test('extended number of letters', () => { diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobIRoot.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobIRoot.ts index 5eacce891240..1d1285fe014a 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobIRoot.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobIRoot.ts @@ -1,6 +1,5 @@ import type { FindResult, ITrieNode, ITrieNodeId, ITrieNodeRoot } from '../ITrieNode/ITrieNode.js'; -import type { TrieInfo } from '../ITrieNode/TrieInfo.js'; -import type { FastTrieBlobInternals } from './FastTrieBlobInternals.js'; +import type { FastTrieBlobInternalsAndMethods } from './FastTrieBlobInternals.js'; import { Utf8Accumulator } from './Utf8.js'; const EmptyKeys: readonly string[] = Object.freeze([]); @@ -24,15 +23,15 @@ class FastTrieBlobINode implements ITrieNode { protected charToIdx: Readonly> | undefined; constructor( - readonly trie: FastTrieBlobInternals, + readonly trie: FastTrieBlobInternalsAndMethods, readonly nodeIdx: NodeIndex, - protected nodeHas: (idx: number, word: string) => boolean, ) { const node = trie.nodes[nodeIdx]; this.node = node; this.eow = !!(node[0] & trie.NodeMaskEOW); this._count = node.length - 1; this.id = nodeIdx; + this.findExact = (word: string) => trie.nodeFindExact(nodeIdx, word); } /** get keys to children */ @@ -54,19 +53,25 @@ class FastTrieBlobINode implements ITrieNode { if (this._entries) return this._entries; if (!this._count) return EmptyEntries; const entries = this.getNodesEntries(); - this._entries = entries.map(([key, value]) => [key, new FastTrieBlobINode(this.trie, value, this.nodeHas)]); + this._entries = entries.map(([key, value]) => [key, new FastTrieBlobINode(this.trie, value)]); return this._entries; } /** get child ITrieNode */ get(char: string): ITrieNode | undefined { - const idx = this.getCharToIdxMap()[char]; + const idx = this.trie.nodeGetChild(this.id, char); if (idx === undefined) return undefined; - return this.child(idx); + return new FastTrieBlobINode(this.trie, idx); + } + + getNode(chars: string): ITrieNode | undefined { + const idx = this.trie.nodeFindNode(this.id, chars); + if (idx === undefined) return undefined; + return new FastTrieBlobINode(this.trie, idx); } has(char: string): boolean { - const idx = this.getCharToIdxMap()[char]; + const idx = this.trie.nodeGetChild(this.id, char); return idx !== undefined; } @@ -78,7 +83,7 @@ class FastTrieBlobINode implements ITrieNode { if (!this._values && !this.containsChainedIndexes()) { const n = this.node[keyIdx + 1]; const nodeIdx = n >>> this.trie.NodeChildRefShift; - return new FastTrieBlobINode(this.trie, nodeIdx, this.nodeHas); + return new FastTrieBlobINode(this.trie, nodeIdx); } return this.values()[keyIdx]; } @@ -96,7 +101,19 @@ class FastTrieBlobINode implements ITrieNode { } findExact(word: string): boolean { - return this.nodeHas(this.id, word); + return this.trie.nodeFindExact(this.id, word); + } + + isForbidden(word: string): boolean { + const n = this.trie.nodeGetChild(this.id, this.trie.info.forbiddenWordPrefix); + if (n === undefined) return false; + return this.trie.nodeFindExact(n, word); + } + + findCaseInsensitive(word: string): boolean { + const n = this.trie.nodeGetChild(this.id, this.trie.info.stripCaseAndAccentsPrefix); + if (n === undefined) return false; + return this.trie.nodeFindExact(n, word); } private containsChainedIndexes(): boolean { @@ -204,19 +221,18 @@ class FastTrieBlobINode implements ITrieNode { } export class FastTrieBlobIRoot extends FastTrieBlobINode implements ITrieNodeRoot { - constructor( - trie: FastTrieBlobInternals, - nodeIdx: number, - readonly info: Readonly, - readonly findExact: (word: string) => boolean, - readonly isForbidden: (word: string) => boolean, - readonly findCaseInsensitive: (word: string) => boolean, - nodeHas: (idx: number, word: string) => boolean, - ) { - super(trie, nodeIdx, nodeHas); + readonly hasForbiddenWords: boolean; + readonly hasCompoundWords: boolean; + readonly hasNonStrictWords: boolean; + + constructor(trie: FastTrieBlobInternalsAndMethods, nodeIdx: number) { + super(trie, nodeIdx); + this.hasForbiddenWords = trie.hasForbiddenWords; + this.hasCompoundWords = trie.hasCompoundWords; + this.hasNonStrictWords = trie.hasNonStrictWords; } resolveId(id: ITrieNodeId): ITrieNode { - return new FastTrieBlobINode(this.trie, id as number, this.nodeHas); + return new FastTrieBlobINode(this.trie, id as number); } find(word: string, strict: boolean): FindResult | undefined { @@ -229,15 +245,19 @@ export class FastTrieBlobIRoot extends FastTrieBlobINode implements ITrieNodeRoo return found ? { found: word, compoundUsed: false, caseMatched: false } : undefined; } + get info() { + return this.trie.info; + } + get forbidPrefix(): string { - return this.info.forbiddenWordPrefix; + return this.trie.info.forbiddenWordPrefix; } get compoundFix(): string { - return this.info.compoundCharacter; + return this.trie.info.compoundCharacter; } get caseInsensitivePrefix(): string { - return this.info.stripCaseAndAccentsPrefix; + return this.trie.info.stripCaseAndAccentsPrefix; } } diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobInternals.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobInternals.ts index 8e49fd2803d6..20b2ecc07ff8 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobInternals.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobInternals.ts @@ -1,3 +1,5 @@ +import { PartialTrieInfo, TrieCharacteristics, TrieInfo } from '../ITrieNode/TrieInfo.js'; +import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js'; import { CharIndex } from './CharIndex.js'; import type { FastTrieBlobBitMaskInfo } from './FastTrieBlobBitMaskInfo.js'; @@ -9,18 +11,58 @@ export class FastTrieBlobInternals implements FastTrieBlobBitMaskInfo { readonly NodeMaskChildCharIndex: number; readonly NodeChildRefShift: number; readonly isIndexDecoderNeeded: boolean; - readonly sorted = true; + readonly info: Readonly; constructor( readonly nodes: Nodes, readonly charIndex: CharIndex, maskInfo: FastTrieBlobBitMaskInfo, + info: Readonly, ) { const { NodeMaskEOW, NodeMaskChildCharIndex, NodeChildRefShift } = maskInfo; this.NodeMaskEOW = NodeMaskEOW; this.NodeMaskChildCharIndex = NodeMaskChildCharIndex; this.NodeChildRefShift = NodeChildRefShift; this.isIndexDecoderNeeded = charIndex.indexContainsMultiByteChars(); + + this.info = mergeOptionalWithDefaults(info); + } +} + +interface TrieMethods extends Readonly { + readonly nodeFindNode: (idx: number, word: string) => number | undefined; + readonly nodeFindExact: (idx: number, word: string) => boolean; + readonly nodeGetChild: (idx: number, letter: string) => number | undefined; + readonly isForbidden: (word: string) => boolean; + readonly findExact: (word: string) => boolean; +} + +export class FastTrieBlobInternalsAndMethods extends FastTrieBlobInternals implements TrieMethods { + readonly nodeFindNode: (idx: number, word: string) => number | undefined; + readonly nodeFindExact: (idx: number, word: string) => boolean; + readonly nodeGetChild: (idx: number, letter: string) => number | undefined; + readonly isForbidden: (word: string) => boolean; + readonly findExact: (word: string) => boolean; + readonly hasForbiddenWords: boolean; + readonly hasCompoundWords: boolean; + readonly hasNonStrictWords: boolean; + + constructor( + nodes: Nodes, + charIndex: CharIndex, + maskInfo: FastTrieBlobBitMaskInfo, + info: PartialTrieInfo, + trieMethods: Readonly, + ) { + super(nodes, charIndex, maskInfo, info); + this.nodeFindExact = trieMethods.nodeFindExact; + this.nodeGetChild = trieMethods.nodeGetChild; + this.isForbidden = trieMethods.isForbidden; + this.findExact = trieMethods.findExact; + this.nodeFindNode = trieMethods.nodeFindNode; + this.hasForbiddenWords = trieMethods.hasForbiddenWords; + this.hasCompoundWords = trieMethods.hasCompoundWords; + this.hasNonStrictWords = trieMethods.hasNonStrictWords; } } diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts index 9bf3c9a478c6..7268cd406877 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts @@ -53,6 +53,9 @@ export class TrieBlob implements TrieData { #beAdj = endianness() === 'BE' ? 3 : 0; readonly wordToCharacters = (word: string) => [...word]; + readonly hasForbiddenWords: boolean; + readonly hasCompoundWords: boolean; + readonly hasNonStrictWords: boolean; constructor( protected nodes: Uint32Array, @@ -66,6 +69,9 @@ export class TrieBlob implements TrieData { this.#forbidIdx = this._lookupNode(0, this.info.forbiddenWordPrefix); this.#compoundIdx = this._lookupNode(0, this.info.compoundCharacter); this.#nonStrictIdx = this._lookupNode(0, this.info.stripCaseAndAccentsPrefix); + this.hasForbiddenWords = !!this.#forbidIdx; + this.hasCompoundWords = !!this.#compoundIdx; + this.hasNonStrictWords = !!this.#nonStrictIdx; } public wordToUtf8Seq(word: string): Utf8Seq { @@ -84,18 +90,6 @@ export class TrieBlob implements TrieData { return !!this.#forbidIdx && this.#hasWord(this.#forbidIdx, word); } - hasForbiddenWords(): boolean { - return !!this.#forbidIdx; - } - - hasCompoundWords(): boolean { - return !!this.#compoundIdx; - } - - hasNonStrictWords(): boolean { - return !!this.#nonStrictIdx; - } - /** * Try to find the word in the trie. The word must be normalized. * If `strict` is `true` the case and accents must match. @@ -105,7 +99,7 @@ export class TrieBlob implements TrieData { * @param strict - if `true` the case and accents must match. */ find(word: string, strict: boolean): FindResult | undefined { - if (!this.hasCompoundWords()) { + if (!this.hasCompoundWords) { const found = this.#hasWord(0, word); if (found) return { found: word, compoundUsed: false, caseMatched: true }; if (strict || !this.#nonStrictIdx) return { found: false, compoundUsed: false, caseMatched: false }; @@ -132,8 +126,12 @@ export class TrieBlob implements TrieData { { nodeFindExact: (idx, word) => this.#hasWord(idx, word), nodeGetChild: (idx, letter) => this._lookupNode(idx, letter), + nodeFindNode: (idx, word) => this.#findNode(idx, word), isForbidden: (word) => this.isForbiddenWord(word), findExact: (word) => this.has(word), + hasCompoundWords: this.hasCompoundWords, + hasForbiddenWords: this.hasForbiddenWords, + hasNonStrictWords: this.hasNonStrictWords, }, ); return new TrieBlobIRoot(trieData, 0, this.info, { @@ -149,11 +147,16 @@ export class TrieBlob implements TrieData { * Check if the word is in the trie starting at the given node index. */ #hasWord(nodeIdx: number, word: string): boolean { - const wordIndexes = this.wordToUtf8Seq(word); - const nodeIdxFound = this.#lookupNode(nodeIdx, wordIndexes); - if (nodeIdxFound === undefined) return false; + const nodeIdxFound = this.#findNode(nodeIdx, word); + if (!nodeIdxFound) return false; const node = this.nodes[nodeIdxFound]; - return (node & TrieBlob.NodeMaskEOW) === TrieBlob.NodeMaskEOW; + const m = TrieBlob.NodeMaskEOW; + return (node & m) === m; + } + + #findNode(nodeIdx: number, word: string): number | undefined { + const wordIndexes = this.wordToUtf8Seq(word); + return this.#lookupNode(nodeIdx, wordIndexes); } /** @@ -386,8 +389,8 @@ export class TrieBlob implements TrieData { // } // } - static NodeMaskEOW = 0x0000_0100; - static NodeMaskNumChildren = (1 << NodeHeaderNumChildrenBits) - 1; + static NodeMaskEOW = 0x0000_0100 & 0xffff; + static NodeMaskNumChildren = ((1 << NodeHeaderNumChildrenBits) - 1) & 0xffff; static NodeMaskNumChildrenShift = NodeHeaderNumChildrenShift; static NodeChildRefShift = 8; /** diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.test.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.test.ts index d1ae2ed52ed4..f8ae57c96979 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.test.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.test.ts @@ -31,12 +31,9 @@ describe('FastTrieBlob', () => { test('toITrieNodeRoot.values', () => { const trie = createTrieBlob(words); const iTrieRoot = trie.getRoot(); - const keys = iTrieRoot.keys(); - const values = iTrieRoot.values(); + const keys = [...iTrieRoot.keys()]; + const values = [...iTrieRoot.values()]; expect(values.length).toBe(keys.length); - const valueIds = values.map((v) => v.id); - const idsFromLookUp = keys.map((_, i) => iTrieRoot.child(i).id); - expect(valueIds).toEqual(idsFromLookUp); }); test('toITrieNodeRoot with large number of characters', async () => { diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.ts index 09fd0dfb16c1..6284c37b4775 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.ts @@ -1,5 +1,5 @@ import type { ITrieNode, ITrieNodeId, ITrieNodeRoot } from '../ITrieNode/ITrieNode.js'; -import type { TrieInfo } from '../ITrieNode/TrieInfo.js'; +import type { TrieCharacteristics, TrieInfo } from '../ITrieNode/TrieInfo.js'; import { CharIndex } from './CharIndex.js'; import { Utf8Accumulator } from './Utf8.js'; @@ -13,14 +13,15 @@ interface BitMaskInfo { type Node = number; type NodeIndex = number; -interface TrieMethods { +interface TrieMethods extends Readonly { + readonly nodeFindNode: (idx: number, word: string) => number | undefined; readonly nodeFindExact: (idx: number, word: string) => boolean; readonly nodeGetChild: (idx: number, letter: string) => number | undefined; readonly isForbidden: (word: string) => boolean; readonly findExact: (word: string) => boolean; } -export class TrieBlobInternals implements BitMaskInfo { +export class TrieBlobInternals implements TrieMethods, BitMaskInfo { readonly NodeMaskEOW: number; readonly NodeMaskNumChildren: number; readonly NodeMaskChildCharIndex: number; @@ -30,6 +31,11 @@ export class TrieBlobInternals implements BitMaskInfo { readonly isForbidden: (word: string) => boolean; readonly findExact: (word: string) => boolean; readonly nodeGetChild: (idx: number, letter: string) => number | undefined; + readonly nodeFindNode: (idx: number, word: string) => number | undefined; + + readonly hasForbiddenWords: boolean; + readonly hasCompoundWords: boolean; + readonly hasNonStrictWords: boolean; constructor( readonly nodes: Uint32Array, @@ -47,6 +53,10 @@ export class TrieBlobInternals implements BitMaskInfo { this.isForbidden = methods.isForbidden; this.findExact = methods.findExact; this.nodeGetChild = methods.nodeGetChild; + this.nodeFindNode = methods.nodeFindNode; + this.hasForbiddenWords = methods.hasForbiddenWords; + this.hasCompoundWords = methods.hasCompoundWords; + this.hasNonStrictWords = methods.hasNonStrictWords; } } @@ -152,6 +162,11 @@ class TrieBlobINode implements ITrieNode { return map; } + getNode(word: string): ITrieNode | undefined { + const n = this.trie.nodeFindNode(this.nodeIdx, word); + return n === undefined ? undefined : new TrieBlobINode(this.trie, n); + } + findExact(word: string): boolean { return this.trie.nodeFindExact(this.nodeIdx, word); } @@ -264,6 +279,10 @@ export class TrieBlobIRoot extends TrieBlobINode implements ITrieNodeRoot { find: ITrieNodeRoot['find']; isForbidden: ITrieNodeRoot['isForbidden']; + readonly hasForbiddenWords: boolean; + readonly hasCompoundWords: boolean; + readonly hasNonStrictWords: boolean; + constructor( trie: TrieBlobInternals, nodeIdx: number, @@ -273,6 +292,9 @@ export class TrieBlobIRoot extends TrieBlobINode implements ITrieNodeRoot { super(trie, nodeIdx); this.find = methods.find; this.isForbidden = trie.isForbidden; + this.hasForbiddenWords = trie.hasForbiddenWords; + this.hasCompoundWords = trie.hasCompoundWords; + this.hasNonStrictWords = trie.hasNonStrictWords; } resolveId(id: ITrieNodeId): ITrieNode { return new TrieBlobINode(this.trie, id as number); diff --git a/packages/cspell-trie-lib/src/lib/TrieData.ts b/packages/cspell-trie-lib/src/lib/TrieData.ts index 77034cab6553..47e6ce0e4adf 100644 --- a/packages/cspell-trie-lib/src/lib/TrieData.ts +++ b/packages/cspell-trie-lib/src/lib/TrieData.ts @@ -1,8 +1,8 @@ import type { ITrieNode, ITrieNodeRoot } from './ITrieNode/ITrieNode.js'; -import type { TrieInfo } from './ITrieNode/TrieInfo.js'; +import type { TrieCharacteristics, TrieInfo } from './ITrieNode/TrieInfo.js'; -export interface TrieData { - info: Readonly; +export interface TrieData extends Readonly { + readonly info: Readonly; /** Method used to split words into individual characters. */ wordToCharacters(word: string): readonly string[]; /** get an iterable for all the words in the dictionary. */ @@ -11,6 +11,8 @@ export interface TrieData { getNode(prefix: string): ITrieNode | undefined; has(word: string): boolean; isForbiddenWord(word: string): boolean; - hasForbiddenWords(): boolean; - size: number; + readonly hasForbiddenWords: boolean; + readonly hasCompoundWords: boolean; + readonly hasNonStrictWords: boolean; + readonly size: number; } diff --git a/packages/cspell-trie-lib/src/lib/TrieNode/TrieNodeTrie.ts b/packages/cspell-trie-lib/src/lib/TrieNode/TrieNodeTrie.ts index 9a2743545a47..07182c8a1259 100644 --- a/packages/cspell-trie-lib/src/lib/TrieNode/TrieNodeTrie.ts +++ b/packages/cspell-trie-lib/src/lib/TrieNode/TrieNodeTrie.ts @@ -13,8 +13,15 @@ export class TrieNodeTrie implements TrieData { private _iTrieRoot: ITrieNodeRoot | undefined; readonly info: TrieOptions; private _size: number | undefined; + readonly hasForbiddenWords: boolean; + readonly hasCompoundWords: boolean; + readonly hasNonStrictWords: boolean; + constructor(readonly root: TrieRoot) { this.info = mergeOptionalWithDefaults(root); + this.hasForbiddenWords = !!root.c[root.forbiddenWordPrefix]; + this.hasCompoundWords = !!root.c[root.compoundCharacter]; + this.hasNonStrictWords = !!root.c[root.stripCaseAndAccentsPrefix]; } wordToCharacters = (word: string): string[] => [...word]; @@ -43,11 +50,6 @@ export class TrieNodeTrie implements TrieData { return findWordExact(this.root.c[this.root.forbiddenWordPrefix], word); } - hasForbiddenWords(): boolean { - const root = this.root; - return !!root.c[root.forbiddenWordPrefix]; - } - get size() { return (this._size ??= countNodes(this.root)); } diff --git a/packages/cspell-trie-lib/src/lib/TrieNode/find.ts b/packages/cspell-trie-lib/src/lib/TrieNode/find.ts index 29276a8ba8a7..0b9562c93e18 100644 --- a/packages/cspell-trie-lib/src/lib/TrieNode/find.ts +++ b/packages/cspell-trie-lib/src/lib/TrieNode/find.ts @@ -179,7 +179,8 @@ export function findCompoundNode( ]; const compoundPrefix = compoundCharacter || ignoreCasePrefix; const possibleCompoundPrefix = ignoreCasePrefix && compoundCharacter ? ignoreCasePrefix + compoundCharacter : ''; - const w = word.normalize(); + const nw = word.normalize(); + const w = [...nw]; function determineRoot(s: FindCompoundChain): FindCompoundChain { const prefix = s.compoundPrefix; @@ -228,7 +229,7 @@ export function findCompoundNode( if (!r.cr) { break; } - if (!i && !r.caseMatched && w !== w.toLowerCase()) { + if (!i && !r.caseMatched && nw !== nw.toLowerCase()) { // It is not going to be found. break; } diff --git a/packages/cspell-trie-lib/src/lib/TrieNode/trie.ts b/packages/cspell-trie-lib/src/lib/TrieNode/trie.ts index d1dd4ab27134..8b7a009f5f3b 100644 --- a/packages/cspell-trie-lib/src/lib/TrieNode/trie.ts +++ b/packages/cspell-trie-lib/src/lib/TrieNode/trie.ts @@ -58,6 +58,10 @@ class ImplITrieNode implements ITrieNode { return ImplITrieNode.toITrieNode(n); } + getNode(chars: string): ITrieNode | undefined { + return this.findNode(chars); + } + has(char: string): boolean { const c = this.node.c; return (c && char in c) || false; @@ -74,6 +78,25 @@ class ImplITrieNode implements ITrieNode { return !!this.node.c; } + #findTrieNode(word: string): TrieNode | undefined { + let node: TrieNode | undefined = this.node; + for (const char of word) { + if (!node) return undefined; + node = node.c?.[char]; + } + return node; + } + + findNode(word: string): ITrieNode | undefined { + const node = this.#findTrieNode(word); + return node && ImplITrieNode.toITrieNode(node); + } + + findExact(word: string): boolean { + const node = this.#findTrieNode(word); + return !!node && !!node.f; + } + static toITrieNode(node: TrieNode): ITrieNode { return new this(node); } @@ -82,10 +105,17 @@ class ImplITrieNode implements ITrieNode { class ImplITrieRoot extends ImplITrieNode implements ITrieNodeRoot { readonly info: Readonly; + readonly hasForbiddenWords: boolean; + readonly hasCompoundWords: boolean; + readonly hasNonStrictWords: boolean; + protected constructor(readonly root: TrieRoot) { super(root); const { stripCaseAndAccentsPrefix, compoundCharacter, forbiddenWordPrefix, isCaseAware } = root; this.info = { stripCaseAndAccentsPrefix, compoundCharacter, forbiddenWordPrefix, isCaseAware }; + this.hasForbiddenWords = !!root.c[forbiddenWordPrefix]; + this.hasCompoundWords = !!root.c[compoundCharacter]; + this.hasNonStrictWords = !!root.c[stripCaseAndAccentsPrefix]; } get eow(): boolean { diff --git a/packages/cspell-trie-lib/src/lib/buildITrie.ts b/packages/cspell-trie-lib/src/lib/buildITrie.ts index 561a883aa626..be90a68d19cb 100644 --- a/packages/cspell-trie-lib/src/lib/buildITrie.ts +++ b/packages/cspell-trie-lib/src/lib/buildITrie.ts @@ -7,5 +7,5 @@ export function buildITrieFromWords(words: Iterable, info: PartialTrieIn const builder = new FastTrieBlobBuilder(info); builder.insert(words); const ft = builder.build(); - return new ITrieImpl(ft.size > 100 ? ft.toTrieBlob() : ft); + return new ITrieImpl(ft.size > 1000 ? ft.toTrieBlob() : ft); } diff --git a/packages/cspell-trie-lib/src/lib/constants.ts b/packages/cspell-trie-lib/src/lib/constants.ts index 769a6f5d8aff..82099ed2a515 100644 --- a/packages/cspell-trie-lib/src/lib/constants.ts +++ b/packages/cspell-trie-lib/src/lib/constants.ts @@ -12,4 +12,7 @@ export const defaultTrieInfo: TrieInfo = Object.freeze({ forbiddenWordPrefix: FORBID_PREFIX, stripCaseAndAccentsPrefix: CASE_INSENSITIVE_PREFIX, isCaseAware: true, + hasForbiddenWords: false, + hasCompoundWords: false, + hasNonStrictWords: false, }); diff --git a/packages/cspell-trie-lib/src/lib/io/importV3.test.ts b/packages/cspell-trie-lib/src/lib/io/importV3.test.ts index 55caa1c08445..9dd76691dd7e 100644 --- a/packages/cspell-trie-lib/src/lib/io/importV3.test.ts +++ b/packages/cspell-trie-lib/src/lib/io/importV3.test.ts @@ -134,14 +134,10 @@ function toTree(root: ITrieNode): string { function* walk(n: ITrieNode, prefix: string): Generator { const nextPrefix = '.'.repeat(prefix.length); if (n.hasChildren()) { - const keys = n - .keys() - .map((k, i) => ({ k, i })) - .sort((a, b) => (a.k < b.k ? -1 : 1)); - for (const key of keys) { - const c = n.child(key.i); - if (!c) continue; - yield* walk(c, prefix + key.k); + const entries = [...n.entries()]; + entries.sort((a, b) => (a[0] < b[0] ? -1 : 1)); + for (const [key, c] of entries) { + yield* walk(c, prefix + key); prefix = nextPrefix; } } diff --git a/packages/cspell-trie-lib/src/lib/io/importV3FastBlob.test.ts b/packages/cspell-trie-lib/src/lib/io/importV3FastBlob.test.ts index 54e364b89fd8..f2af82d8d427 100644 --- a/packages/cspell-trie-lib/src/lib/io/importV3FastBlob.test.ts +++ b/packages/cspell-trie-lib/src/lib/io/importV3FastBlob.test.ts @@ -121,14 +121,10 @@ function toTree(root: ITrieNode): string { function* walk(n: ITrieNode, prefix: string): Generator { const nextPrefix = '.'.repeat(prefix.length); if (n.hasChildren()) { - const keys = n - .keys() - .map((k, i) => ({ k, i })) - .sort((a, b) => (a.k < b.k ? -1 : 1)); - for (const key of keys) { - const c = n.child(key.i); - if (!c) continue; - yield* walk(c, prefix + key.k); + const entries = [...n.entries()]; + entries.sort((a, b) => (a[0] < b[0] ? -1 : 1)); + for (const [key, c] of entries) { + yield* walk(c, prefix + key); prefix = nextPrefix; } } diff --git a/packages/cspell-trie-lib/src/lib/suggestions/suggestAStar.test.ts b/packages/cspell-trie-lib/src/lib/suggestions/suggestAStar.test.ts index 3efe17dbe89a..b3e695177ec0 100644 --- a/packages/cspell-trie-lib/src/lib/suggestions/suggestAStar.test.ts +++ b/packages/cspell-trie-lib/src/lib/suggestions/suggestAStar.test.ts @@ -1,5 +1,7 @@ +import { DictionaryDefinition } from '@cspell/cspell-types'; import { describe, expect, test } from 'vitest'; +import { readFastTrieBlobFromConfig, readTrieFromConfig } from '../../test/dictionaries.test.helper.js'; import type { WeightMap } from '../distance/index.js'; import { mapDictionaryInformationToWeightMap } from '../mappers/mapDictionaryInfoToWeightMap.js'; import { parseDictionaryLegacy } from '../SimpleDictionaryParser.js'; @@ -318,3 +320,128 @@ function calcWeightMap(): WeightMap { ], }); } + +describe('Validate Suggest A Star with English Dict', async () => { + const changeLimit = 3; + + const trie = new TrieNodeTrie((await _getTrie()).root); + const fastTrie = await _getFastTrieBlob(); + const trieBlob = fastTrie.toTrieBlob(); + + // cspell:ignore Orangges + test('Tests suggestions for Orangges Trie', () => { + const results = Sug.suggestAStar(trie, 'Orangges', { changeLimit: changeLimit, weightMap: dictWeightMap() }); + expect(results).toEqual([ + sr('oranges', 101), + sr('orangs', 191), + sr('Orange', 200), + sr('orange', 201), + sr('orangey', 201), + sr('orangier', 201), + sr('orangiest', 201), + sr('orangeries', 241), + ]); + }); + + test('Tests suggestions for Orangges FastTrie', () => { + const results = Sug.suggestAStar(fastTrie, 'Orangges', { + changeLimit: changeLimit, + weightMap: dictWeightMap(), + }); + expect(results).toEqual([ + sr('oranges', 101), + sr('orangs', 191), + sr('Orange', 200), + sr('orange', 201), + sr('orangey', 201), + sr('orangier', 201), + sr('orangiest', 201), + sr('orangeries', 241), + ]); + }); + + test('Tests suggestions for Orangges TrieBlob', () => { + const results = Sug.suggestAStar(trieBlob, 'Orangges', { + changeLimit: changeLimit, + weightMap: dictWeightMap(), + }); + expect(results).toEqual([ + sr('oranges', 101), + sr('orangs', 191), + sr('Orange', 200), + sr('orange', 201), + sr('orangey', 201), + sr('orangier', 201), + sr('orangiest', 201), + sr('orangeries', 241), + ]); + }); +}); + +function _getTrie() { + return readTrieFromConfig('@cspell/dict-en_us/cspell-ext.json'); +} + +function _getFastTrieBlob() { + return readFastTrieBlobFromConfig('@cspell/dict-en_us/cspell-ext.json'); +} + +function dictWeightMap() { + return mapDictionaryInformationToWeightMap(dictDef().dictionaryInformation); +} + +function dictDef() { + return { + name: 'en_us', + path: './en_US.trie.gz', + repMap: [["'|`|’", "'"]], + description: 'American English Dictionary', + dictionaryInformation: { + locale: 'en-US', + alphabet: 'a-zA-Z', + suggestionEditCosts: [ + { description: "Words like 'break' and 'brake'", map: '(ate)(eat)|(ake)(eak)', replace: 75 }, + { + description: 'Sounds alike', + map: 'f(ph)(gh)|(sion)(tion)(cion)|(ail)(ale)|(r)(ur)(er)(ure)(or)', + replace: 75, + }, + { + description: 'Double letter score', + map: 'l(ll)|s(ss)|t(tt)|e(ee)|b(bb)|d(dd)', + replace: 75, + }, + { + map: 'aeiou', + replace: 98, + swap: 75, + insDel: 90, + }, + { + description: 'Common vowel sounds.', + map: 'o(oh)(oo)|(oo)(ou)|(oa)(ou)|(ee)(ea)', + replace: 75, + }, + { + map: 'o(oo)|a(aa)|e(ee)|u(uu)|(eu)(uu)|(ou)(ui)(ow)|(ie)(ei)|i(ie)|e(en)|e(ie)', + replace: 50, + }, + { + description: "Do not rank `'s` high on the list.", + map: "($)('$)('s$)|(s$)(s'$)(s's$)", + replace: 10, + penalty: 180, + }, + { + description: "Plurals ending in 'y'", + map: '(ys)(ies)', + replace: 75, + }, + { + map: '(d$)(t$)(dt$)', + replace: 75, + }, + ], + }, + } as const satisfies DictionaryDefinition; +} diff --git a/packages/cspell-trie-lib/src/lib/suggestions/suggestAStar.ts b/packages/cspell-trie-lib/src/lib/suggestions/suggestAStar.ts index 3b73b06da963..202298862581 100644 --- a/packages/cspell-trie-lib/src/lib/suggestions/suggestAStar.ts +++ b/packages/cspell-trie-lib/src/lib/suggestions/suggestAStar.ts @@ -77,8 +77,10 @@ export function* getSuggestionsAStar( const compRootIgnoreCase = rootIgnoreCase && rootIgnoreCase.get(comp); const emitted: Record = Object.create(null); + const srcLetters = [...srcWord]; + /** Initial limit is based upon the length of the word. */ - let limit = BC * Math.min(srcWord.length * opCosts.wordLengthCostFactor, changeLimit); + let limit = BC * Math.min(srcLetters.length * opCosts.wordLengthCostFactor, changeLimit); pathHeap.add(rootPNode); if (rootIgnoreCase) { @@ -146,7 +148,7 @@ export function* getSuggestionsAStar( } function processPath(p: PNode) { - const len = srcWord.length; + const len = srcLetters.length; if (p.n.eow && p.i === len) { const word = pNodeToWord(p); @@ -159,8 +161,7 @@ export function* getSuggestionsAStar( function calcEdges(p: PNode): void { const { n, i, t } = p; - const keys = n.keys(); - const s = srcWord[i]; + const s = srcLetters[i]; const sg = visMap[s] || 0; const cost0 = p.c; const cost = cost0 + BC + (i ? 0 : opCosts.firstLetterBias); @@ -169,9 +170,9 @@ export function* getSuggestionsAStar( const costCompound = cost0 + opCosts.compound; if (s) { // Match - const mIdx = keys.indexOf(s); - if (mIdx >= 0) { - storePath(t, n.child(mIdx), i + 1, cost0, s, p, '=', s); + const m = n.get(s); + if (m) { + storePath(t, m, i + 1, cost0, s, p, '=', s); } if (weightMap) { @@ -179,21 +180,20 @@ export function* getSuggestionsAStar( } // Double letter, delete 1 - const ns = srcWord[i + 1]; - if (s == ns && mIdx >= 0) { - storePath(t, n.child(mIdx), i + 2, cost0 + DL, s, p, 'dd', s); + const ns = srcLetters[i + 1]; + if (s == ns && m) { + storePath(t, m, i + 2, cost0 + DL, s, p, 'dd', s); } // Delete storePath(t, n, i + 1, cost, '', p, 'd', ''); // Replace - for (let j = 0; j < keys.length; ++j) { - const ss = keys[j]; - if (j === mIdx || ss in sc) continue; + for (const [ss, node] of n.entries()) { + if (node.id === m?.id || ss in sc) continue; const g = visMap[ss] || 0; // srcWord === 'WALK' && console.log(g.toString(2)); const c = sg & g ? costVis : cost; - storePath(t, n.child(j), i + 1, c, ss, p, 'r', ss); + storePath(t, node, i + 1, c, ss, p, 'r', ss); } if (n.eow && i && compoundMethod) { @@ -213,7 +213,7 @@ export function* getSuggestionsAStar( } // Natural Compound - if (compRoot && costCompound <= limit && keys.includes(comp)) { + if (compRoot && costCompound <= limit && n.get(comp)) { if (compRootIgnoreCase) { storePath(t, compRootIgnoreCase, i, costCompound, '', p, '~+', '~+'); } @@ -223,18 +223,17 @@ export function* getSuggestionsAStar( // Insert if (cost <= limit) { // At the end of the word, only append is possible. - for (let j = 0; j < keys.length; ++j) { - const char = keys[j]; + for (const [char, node] of n.entries()) { if (char in sc) continue; - storePath(t, n.child(j), i, cost, char, p, 'i', char); + storePath(t, node, i, cost, char, p, 'i', char); } } } function processWeightMapEdges(p: PNode, weightMap: WeightMap) { - delLetters(p, weightMap, srcWord, storePath); - insLetters(p, weightMap, srcWord, storePath); - repLetters(p, weightMap, srcWord, storePath); + delLetters(p, weightMap, srcLetters, storePath); + insLetters(p, weightMap, srcLetters, storePath); + repLetters(p, weightMap, srcLetters, storePath); return; } @@ -264,16 +263,16 @@ export function* getSuggestionsAStar( } } -function delLetters(pNode: PNode, weightMap: WeightMap, word: string, storePath: FnStorePath) { +function delLetters(pNode: PNode, weightMap: WeightMap, letters: string[], storePath: FnStorePath) { const { t, n } = pNode; const trie = weightMap.insDel; let ii = pNode.i; const cost0 = pNode.c - pNode.i; - const len = word.length; + const len = letters.length; for (let nn = trie.n; ii < len && nn; ) { - const tt = nn[word[ii]]; + const tt = nn[letters[ii]]; if (!tt) return; ++ii; if (tt.c !== undefined) { @@ -283,7 +282,7 @@ function delLetters(pNode: PNode, weightMap: WeightMap, word: string, storePath: } } -function insLetters(p: PNode, weightMap: WeightMap, _word: string, storePath: FnStorePath) { +function insLetters(p: PNode, weightMap: WeightMap, _letters: string[], storePath: FnStorePath) { const { t, i, c, n } = p; const cost0 = c; @@ -294,16 +293,16 @@ function insLetters(p: PNode, weightMap: WeightMap, _word: string, storePath: Fn }); } -function repLetters(pNode: PNode, weightMap: WeightMap, word: string, storePath: FnStorePath) { +function repLetters(pNode: PNode, weightMap: WeightMap, letters: string[], storePath: FnStorePath) { const node = pNode.n; const pt = pNode.t; const cost0 = pNode.c; - const len = word.length; + const len = letters.length; const trie = weightMap.replace; let i = pNode.i; for (let n = trie.n; i < len && n; ) { - const t = n[word[i]]; + const t = n[letters[i]]; if (!t) return; ++i; // yield { i, t }; @@ -381,12 +380,9 @@ function searchTrieCostNodesMatchingTrie2 }>( ): void { const n = trie.n; if (!n) return; - const keys = node.keys(); - for (let i = 0; i < keys.length; ++i) { - const key = keys[i]; + for (const [key, c] of node.entries()) { const t = n[key]; if (!t) continue; - const c = node.child(i); const pfx = s + key; emit(pfx, t, c); if (t.n) { diff --git a/packages/cspell-trie-lib/src/lib/suggestions/suggestTrieData.test.ts b/packages/cspell-trie-lib/src/lib/suggestions/suggestTrieData.test.ts index 954787812ff9..94e4e769ba84 100644 --- a/packages/cspell-trie-lib/src/lib/suggestions/suggestTrieData.test.ts +++ b/packages/cspell-trie-lib/src/lib/suggestions/suggestTrieData.test.ts @@ -1,6 +1,10 @@ +import { DictionaryDefinition } from '@cspell/cspell-types'; import { describe, expect, test } from 'vitest'; +import { readFastTrieBlobFromConfig, readTrieFromConfig } from '../../test/dictionaries.test.helper.js'; +import { createWeightMap } from '../distance/weightedMaps.js'; import { ITrieImpl } from '../ITrie.js'; +import { mapDictionaryInformation } from '../mappers/mapDictionaryInfo.js'; import { parseDictionaryLegacy } from '../SimpleDictionaryParser.js'; import { TrieNodeTrie } from '../TrieNode/TrieNodeTrie.js'; import { cleanCopy } from '../utils/util.js'; @@ -378,3 +382,134 @@ function sugOpts(opts: Partial): SuggestionCollector function sugOptsMaxNum(maxNumSuggestions: number): SuggestionCollectorOptions { return sugOpts({ numSuggestions: maxNumSuggestions }); } + +describe('Validate Suggest A Star with English Dict', async () => { + const changeLimit = 3; + + const trie = new TrieNodeTrie((await _getTrie()).root); + const fastTrie = await _getFastTrieBlob(); + const trieBlob = fastTrie.toTrieBlob(); + + // cspell:ignore Orangges + test('Tests suggestions for Orangges Trie', () => { + const results = suggest(trie, 'Orangges', { changeLimit: changeLimit, weightMap: dictWeightMap() }); + expect(results).toEqual([ + sr('oranges', 101), + sr('orangs', 191), + sr('Orange', 200), + sr('orange', 201), + sr('orangey', 201), + sr('orangier', 201), + sr('orangiest', 201), + sr('orangeries', 241), + ]); + }); + + test('Tests suggestions for Orangges FastTrie', () => { + const results = suggest(fastTrie, 'Orangges', { + changeLimit: changeLimit, + weightMap: dictWeightMap(), + }); + expect(results).toEqual([ + sr('oranges', 101), + sr('orangs', 191), + sr('Orange', 200), + sr('orange', 201), + sr('orangey', 201), + sr('orangier', 201), + sr('orangiest', 201), + sr('orangeries', 241), + ]); + }); + + test('Tests suggestions for Orangges TrieBlob', () => { + const results = suggest(trieBlob, 'Orangges', { + changeLimit: changeLimit, + weightMap: dictWeightMap(), + }); + expect(results).toEqual([ + sr('oranges', 101), + sr('orangs', 191), + sr('Orange', 200), + sr('orange', 201), + sr('orangey', 201), + sr('orangier', 201), + sr('orangiest', 201), + sr('orangeries', 241), + ]); + }); + + function sr(word: string, cost: number) { + return expect.objectContaining({ word, cost }); + } +}); + +function _getTrie() { + return readTrieFromConfig('@cspell/dict-en_us/cspell-ext.json'); +} + +function _getFastTrieBlob() { + return readFastTrieBlobFromConfig('@cspell/dict-en_us/cspell-ext.json'); +} + +function dictWeightMap() { + return createWeightMap(...mapDictionaryInformation(dictDef().dictionaryInformation)); +} + +function dictDef() { + return { + name: 'en_us', + path: './en_US.trie.gz', + repMap: [["'|`|’", "'"]], + description: 'American English Dictionary', + dictionaryInformation: { + locale: 'en-US', + alphabet: 'a-zA-Z', + suggestionEditCosts: [ + { description: "Words like 'break' and 'brake'", map: '(ate)(eat)|(ake)(eak)', replace: 75 }, + { + description: 'Sounds alike', + // cspell:disable-next-line + map: 'f(ph)(gh)|(sion)(tion)(cion)|(ail)(ale)|(r)(ur)(er)(ure)(or)', + replace: 75, + }, + { + description: 'Double letter score', + map: 'l(ll)|s(ss)|t(tt)|e(ee)|b(bb)|d(dd)', + replace: 75, + }, + { + // cspell:disable-next-line + map: 'aeiou', + replace: 98, + swap: 75, + insDel: 90, + }, + { + description: 'Common vowel sounds.', + map: 'o(oh)(oo)|(oo)(ou)|(oa)(ou)|(ee)(ea)', + replace: 75, + }, + { + map: 'o(oo)|a(aa)|e(ee)|u(uu)|(eu)(uu)|(ou)(ui)(ow)|(ie)(ei)|i(ie)|e(en)|e(ie)', + replace: 50, + }, + { + description: "Do not rank `'s` high on the list.", + map: "($)('$)('s$)|(s$)(s'$)(s's$)", + replace: 10, + penalty: 180, + }, + { + description: "Plurals ending in 'y'", + map: '(ys)(ies)', + replace: 75, + }, + { + map: '(d$)(t$)(dt$)', + replace: 75, + }, + ], + }, + } as const satisfies DictionaryDefinition; +} diff --git a/packages/cspell-trie-lib/src/lib/walker/walker.ts b/packages/cspell-trie-lib/src/lib/walker/walker.ts index bf289f8d6df5..47f53f6c9510 100644 --- a/packages/cspell-trie-lib/src/lib/walker/walker.ts +++ b/packages/cspell-trie-lib/src/lib/walker/walker.ts @@ -158,7 +158,7 @@ function _walkerWords2(root: TrieNode): Iterable { * Walks the Trie and yields each word. */ export function* walkerWordsITrie(root: ITrieNode): Iterable { - type Children = readonly string[]; + type Children = readonly Readonly<[string, ITrieNode]>[]; interface Stack { t: string; n: ITrieNode; @@ -168,19 +168,18 @@ export function* walkerWordsITrie(root: ITrieNode): Iterable { let depth = 0; const stack: Stack[] = []; - stack[depth] = { t: '', n: root, c: root.keys(), ci: 0 }; + stack[depth] = { t: '', n: root, c: [...root.entries()], ci: 0 }; while (depth >= 0) { let s = stack[depth]; let baseText = s.t; while (s.ci < s.c.length && s.n) { - const char = s.c[s.ci++]; - const node = s.n.get(char); + const [char, node] = s.c[s.ci++]; if (!node) continue; const text = baseText + char; if (node.eow) yield text; depth++; baseText = text; - const c = node.keys(); + const c = [...node.entries()]; if (stack[depth]) { s = stack[depth]; s.t = text; diff --git a/packages/cspell-trie-lib/src/perf/charIndex.perf.ts b/packages/cspell-trie-lib/src/perf/charIndex.perf.ts index f60cfded2ae5..262698817154 100644 --- a/packages/cspell-trie-lib/src/perf/charIndex.perf.ts +++ b/packages/cspell-trie-lib/src/perf/charIndex.perf.ts @@ -55,14 +55,33 @@ suite('encode to sequence', async (test) => { } }); - const buffer = new Uint8Array(1024); + const buffer = new ArrayBuffer(1024); + const u8buffer = new Uint8Array(buffer); test('TextEncoder.encodeInto to Uint8Array' + msgSuffix, () => { for (const word of words) { - encoder.encodeInto(word, buffer); + encoder.encodeInto(word, u8buffer); } }); + test('TextEncoder.encodeInto to Uint8Array slice' + msgSuffix, () => { + let s: Uint8Array | undefined; + for (const word of words) { + const n = encoder.encodeInto(word, u8buffer); + s = u8buffer.slice(0, n.written); + } + return s; + }); + + test('TextEncoder.encodeInto to Uint8Array from buffer' + msgSuffix, () => { + let s: Uint8Array | undefined; + for (const word of words) { + const n = encoder.encodeInto(word, u8buffer); + s = new Uint8Array(buffer, 0, n.written); + } + return s; + }); + test('Normalize("NFC")' + msgSuffix, () => { for (const word of words) { word.normalize('NFC'); diff --git a/packages/cspell-trie-lib/src/perf/misc.perf.ts b/packages/cspell-trie-lib/src/perf/misc.perf.ts new file mode 100644 index 000000000000..8c6c9fb4c368 --- /dev/null +++ b/packages/cspell-trie-lib/src/perf/misc.perf.ts @@ -0,0 +1,51 @@ +import { suite } from 'perf-insight'; + +interface SpreadObject { + i: number; + value: string; + node: string; + word: string; +} + +suite('Operators', async (test) => { + const iterations = 100_000; + const objects: SpreadObject[] = Array.from({ length: 1000 }, (_, i) => ({ + i, + value: i.toString(), + node: 'node', + word: `word ${i}`, + })); + + test('spread omit', () => { + let obj: Omit | undefined; + for (let i = iterations; i > 0; --i) { + const { node: _, ...rest } = objects[i % objects.length]; + obj = rest; + } + return obj; + }); + + test('field wise assignment', () => { + let obj: Omit | undefined; + for (let i = iterations; i > 0; --i) { + const v = objects[i % objects.length]; + obj = { + i: v.i, + value: v.value, + word: v.word, + }; + } + return obj; + }); + + test('spread explicity', () => { + let obj: Omit | undefined; + for (let j = iterations; j > 0; --j) { + const { i, value, word } = objects[j % objects.length]; + obj = { i, value, word }; + } + return obj; + }); +}); + +// cspell:ignore tion aeiou diff --git a/packages/cspell/src/app/__snapshots__/app.test.ts.snap b/packages/cspell/src/app/__snapshots__/app.test.ts.snap index dce72e677db7..58a8ab94b910 100644 --- a/packages/cspell/src/app/__snapshots__/app.test.ts.snap +++ b/packages/cspell/src/app/__snapshots__/app.test.ts.snap @@ -2152,7 +2152,7 @@ exports[`Validate cli > app 'typos --no-show-suggestions' Expect Error: [Functio exports[`Validate cli > app 'typos --show-suggestions' Expect Error: [Function CheckFailed] 1`] = `[]`; exports[`Validate cli > app 'typos --show-suggestions' Expect Error: [Function CheckFailed] 2`] = ` -"log code.ts:1:26 - Unknown word (Orangges) Suggestions: [Oranges, orange, Orange, Orangs, Orangey] +"log code.ts:1:26 - Unknown word (Orangges) Suggestions: [Oranges, orange, Orange, Orangs, Orange's] log log test.md:5:3 - Forbidden word (blacklist) Suggestions: [denylist*, backlist, backlit, blackest, blackish] log diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 17ecd6b28b3c..125b8d9abd31 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -790,9 +790,6 @@ importers: specifier: ^2.5.0 version: 2.5.0 devDependencies: - '@types/glob': - specifier: ^8.1.0 - version: 8.1.0 lorem-ipsum: specifier: ^2.0.8 version: 2.0.8