Skip to content

Commit

Permalink
fix: support case sensitive document checking.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S committed Jun 11, 2021
1 parent 5a9aa27 commit 527de4a
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ describe('Verify building Dictionary', () => {
describe('Validate wordSearchForms', () => {
function testCase(word: string, isCaseSensitive: boolean, ignoreCase: boolean, expected: string[]) {
test(`${word} ${isCaseSensitive} ${ignoreCase} ${expected}`, () => {
const words = __testMethods.wordSearchForms(word, isCaseSensitive, ignoreCase);
const words = __testMethods.wordSearchFormsArray(word, isCaseSensitive, ignoreCase);
expect(words.sort()).toEqual(expected.sort());
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
PREFIX_NO_CASE,
regexPrefix,
impersonateCollector,
wordSearchFormsArray,
} from './SpellingDictionaryMethods';
import { SpellingDictionary, HasOptions, SuggestOptions, SpellingDictionaryOptions } from './SpellingDictionary';
export class SpellingDictionaryFromTrie implements SpellingDictionary {
Expand Down Expand Up @@ -66,7 +67,10 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary {
SpellingDictionaryFromTrie.cachedWordsLimit
);
private hasAnyForm(word: string, useCompounds: number | boolean | undefined, ignoreCase: boolean) {
const mWord = this.mapWord(word);
const mWord = this.mapWord(word.normalize('NFC'));
if (this.trie.hasWord(mWord, true)) {
return true;
}
const forms = wordSearchForms(mWord, this.isDictionaryCaseSensitive, ignoreCase);
for (const w of forms) {
if (this.trie.hasWord(w, !ignoreCase)) {
Expand Down Expand Up @@ -117,7 +121,7 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary {
public genSuggestions(collector: SuggestionCollector, suggestOptions: SuggestOptions): void {
const { compoundMethod = CompoundWordsMethod.SEPARATE_WORDS, ignoreCase = true } = suggestOptions;
const _compoundMethod = this.options.useCompounds ? CompoundWordsMethod.JOIN_WORDS : compoundMethod;
wordSearchForms(collector.word, this.isDictionaryCaseSensitive, ignoreCase).forEach((w) =>
wordSearchFormsArray(collector.word, this.isDictionaryCaseSensitive, ignoreCase).forEach((w) =>
this.trie.genSuggestions(impersonateCollector(collector, w), _compoundMethod)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,13 @@ describe('Validate createSpellingDictionary', () => {
const d = createSpellingDictionary(words, 'test create', __filename);
words.forEach((w) => expect(d.has(w)).toBe(true));
});

test('createSpellingDictionary fa', () => {
// cspell:disable-next-line
const words = ['آئینهٔ', 'آبادهٔ', 'کلاه'];
expect(words).toEqual(words.map((w) => w.normalize('NFC')));
const d = createSpellingDictionary(words, 'test create', __filename);
expect(d.has(words[0])).toBe(true);
words.forEach((w) => expect(d.has(w)).toBe(true));
});
});
38 changes: 25 additions & 13 deletions packages/cspell-lib/src/textValidator.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ import { createSpellingDictionary } from './SpellingDictionary/createSpellingDic
import { FreqCounter } from './util/FreqCounter';
import * as Text from './util/text';
import { genSequence } from 'gensequence';
import { settingsToValidateOptions } from './validator';

const sToV = settingsToValidateOptions;

// cspell:ignore whiteberry redmango lightbrown redberry

Expand All @@ -36,14 +39,14 @@ describe('Validate textValidator functions', () => {

test('tests textValidator no word compounds', async () => {
const dictCol = await getSpellingDictionaryCollection();
const result = validateText(sampleText, dictCol, {});
const result = validateText(sampleText, dictCol, sToV({}));
const errors = result.map((wo) => wo.text).toArray();
expect(errors).toEqual(['giraffe', 'lightbrown', 'whiteberry', 'redberry']);
});

test('tests textValidator with word compounds', async () => {
const dictCol = await getSpellingDictionaryCollection();
const result = validateText(sampleText, dictCol, { allowCompoundWords: true });
const result = validateText(sampleText, dictCol, sToV({ allowCompoundWords: true }));
const errors = result.map((wo) => wo.text).toArray();
expect(errors).toEqual(['giraffe', 'whiteberry']);
});
Expand All @@ -52,7 +55,7 @@ describe('Validate textValidator functions', () => {
test('tests ignoring words that consist of a single repeated letter', async () => {
const dictCol = await getSpellingDictionaryCollection();
const text = ' tttt gggg xxxxxxx jjjjj xxxkxxxx xxxbxxxx \n' + sampleText;
const result = validateText(text, dictCol, { allowCompoundWords: true });
const result = validateText(text, dictCol, sToV({ allowCompoundWords: true }));
const errors = result
.map((wo) => wo.text)
.toArray()
Expand All @@ -63,7 +66,7 @@ describe('Validate textValidator functions', () => {
test('tests trailing s, ed, ing, etc. are attached to the words', async () => {
const dictEmpty = await createSpellingDictionary([], 'empty', 'test');
const text = 'We have PUBLISHed multiple FIXesToThePROBLEMs';
const result = validateText(text, dictEmpty, {}).toArray();
const result = validateText(text, dictEmpty, sToV({})).toArray();
const errors = result.map((wo) => wo.text);
expect(errors).toEqual(['have', 'PUBLISHed', 'multiple', 'FIXes', 'PROBLEMs']);
});
Expand All @@ -72,7 +75,7 @@ describe('Validate textValidator functions', () => {
const dictEmpty = await createSpellingDictionary([], 'empty', 'test');
const text = 'We have PUBLISHed published multiple FIXesToThePROBLEMs';
const options: ValidationOptions = {
caseSensitive: true,
ignoreWordsAreCaseSensitive: true,
ignoreWords: ['PUBLISHed', 'FIXesToThePROBLEMs'],
ignoreCase: false,
};
Expand All @@ -95,7 +98,12 @@ describe('Validate textValidator functions', () => {
VeryBadProblem with the 4wheel of the Range8 in Amsterdam, Berlin, and paris.
#define _ERROR_CODE_42 = NETWORK_ERROR42
`;
const options = { allowCompoundWords: false, ignoreCase: false, flagWords };
const options: ValidationOptions = {
allowCompoundWords: false,
ignoreCase: false,
flagWords,
ignoreWordsAreCaseSensitive: true,
};
const result = validateText(text, dict, options).toArray();
const errors = result.map((wo) => wo.text);
expect(errors).toEqual(['have', 'published', 'VeryBadProblem', 'paris']);
Expand All @@ -104,7 +112,7 @@ describe('Validate textValidator functions', () => {
test('tests trailing s, ed, ing, etc.', async () => {
const dictWords = await getSpellingDictionaryCollection();
const text = 'We have PUBLISHed multiple FIXesToThePROBLEMs';
const result = validateText(text, dictWords, { allowCompoundWords: true });
const result = validateText(text, dictWords, sToV({ allowCompoundWords: true }));
const errors = result
.map((wo) => wo.text)
.toArray()
Expand All @@ -117,7 +125,7 @@ describe('Validate textValidator functions', () => {
// cspell:disable
const text = `We should’ve done a better job, but we couldn\\'t have known.`;
// cspell:enable
const result = validateText(text, dictWords, { allowCompoundWords: false });
const result = validateText(text, dictWords, sToV({ allowCompoundWords: false }));
const errors = result
.map((wo) => wo.text)
.toArray()
Expand All @@ -128,15 +136,19 @@ describe('Validate textValidator functions', () => {
test('tests maxDuplicateProblems', async () => {
const dict = await createSpellingDictionary([], 'empty', 'test');
const text = sampleText;
const result = validateText(text, dict, {
maxNumberOfProblems: 1000,
maxDuplicateProblems: 1,
});
const result = validateText(
text,
dict,
sToV({
maxNumberOfProblems: 1000,
maxDuplicateProblems: 1,
})
);
const freq = FreqCounter.create(result.map((t) => t.text));
expect(freq.total).toBe(freq.counters.size);
const words = freq.counters.keys();
const dict2 = await createSpellingDictionary(words, 'test', 'test');
const result2 = [...validateText(text, dict2, { maxNumberOfProblems: 1000, maxDuplicateProblems: 1 })];
const result2 = [...validateText(text, dict2, sToV({ maxNumberOfProblems: 1000, maxDuplicateProblems: 1 }))];
expect(result2.length).toBe(0);
});

Expand Down
8 changes: 4 additions & 4 deletions packages/cspell-lib/src/textValidator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ export interface ValidationOptions extends IncludeExcludeOptions {
ignoreWords?: string[];
allowCompoundWords?: boolean;
/** ignore words are considered case sensitive */
caseSensitive?: boolean;
ignoreWordsAreCaseSensitive: boolean;
/** ignore case when checking words against dictionary or ignore words list */
ignoreCase?: boolean;
ignoreCase: boolean;
}

export interface CheckOptions extends ValidationOptions {
allowCompoundWords: boolean;
caseSensitive: boolean;
ignoreWordsAreCaseSensitive: boolean;
ignoreCase: boolean;
}

Expand Down Expand Up @@ -98,7 +98,7 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li
ignoreWords = [],
allowCompoundWords = false,
ignoreCase = true,
caseSensitive = false,
ignoreWordsAreCaseSensitive: caseSensitive = false,
} = options;
const hasWordOptions: HasWordOptions = {
ignoreCase,
Expand Down
11 changes: 10 additions & 1 deletion packages/cspell-lib/src/validator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export async function validateText(
): Promise<ValidationIssue[]> {
const finalSettings = Settings.finalizeSettings(settings);
const dict = await Dictionary.getDictionary(finalSettings);
const issues = [...TV.validateText(text, dict, finalSettings)];
const issues = [...TV.validateText(text, dict, settingsToValidateOptions(finalSettings))];
if (!options.generateSuggestions) {
return issues;
}
Expand All @@ -40,6 +40,15 @@ export async function validateText(
return withSugs;
}

export function settingsToValidateOptions(settings: CSpellUserSettings): TV.ValidationOptions {
const opt: TV.ValidationOptions = {
...settings,
ignoreWordsAreCaseSensitive: settings.caseSensitive ?? true,
ignoreCase: !(settings.caseSensitive ?? false),
};
return opt;
}

export interface CheckTextInfo {
// Full text
text: string;
Expand Down
5 changes: 1 addition & 4 deletions packages/cspell-trie-lib/src/lib/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,4 @@ export function trieNodeToRoot(node: TrieNode, options: PartialTrieOptions): Tri

export const normalizeWord = (text: string): string => text.normalize();
export const normalizeWordToLowercase = (text: string): string =>
text
.toLowerCase()
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '');
text.toLowerCase().normalize('NFD').replace(/\p{M}/gu, '');

0 comments on commit 527de4a

Please sign in to comment.