From 5db3a1fadea445dec370fde9382d99775b172f5f Mon Sep 17 00:00:00 2001 From: Wanasit Tanakitrungruang Date: Sun, 19 Nov 2023 13:36:08 +0900 Subject: [PATCH] Fix: Apply lazy loading for RU patterns --- .../parsers/AbstractParserWithWordBoundary.ts | 26 ++++++++---- .../AbstractParserWithWordBoundaryChecking.ts | 25 +++++++++++ src/locales/ru/parsers/RUCasualDateParser.ts | 18 ++------ src/locales/ru/parsers/RUCasualTimeParser.ts | 18 ++------ .../parsers/RUMonthNameLittleEndianParser.ts | 42 +++++++------------ src/locales/ru/parsers/RUMonthNameParser.ts | 33 ++++++--------- .../ru/parsers/RURelativeDateFormatParser.ts | 20 +++------ .../ru/parsers/RUTimeUnitAgoFormatParser.ts | 16 +++---- .../RUTimeUnitCasualRelativeFormatParser.ts | 17 ++------ .../parsers/RUTimeUnitWithinFormatParser.ts | 14 +++++-- src/locales/ru/parsers/RUWeekdayParser.ts | 32 ++++++-------- 11 files changed, 116 insertions(+), 145 deletions(-) create mode 100644 src/locales/ru/parsers/AbstractParserWithWordBoundaryChecking.ts diff --git a/src/common/parsers/AbstractParserWithWordBoundary.ts b/src/common/parsers/AbstractParserWithWordBoundary.ts index c5cbb028..e4d80863 100644 --- a/src/common/parsers/AbstractParserWithWordBoundary.ts +++ b/src/common/parsers/AbstractParserWithWordBoundary.ts @@ -3,7 +3,7 @@ import { ParsingComponents, ParsingResult } from "../../results"; import { Component } from "../../types"; /** - * + * A parser that checks for word boundary and applying the inner pattern and extraction. */ export abstract class AbstractParserWithWordBoundaryChecking implements Parser { abstract innerPattern(context: ParsingContext): RegExp; @@ -12,21 +12,29 @@ export abstract class AbstractParserWithWordBoundaryChecking implements Parser { match: RegExpMatchArray ): ParsingComponents | ParsingResult | { [c in Component]?: number } | null; - private cachedInnerPattern?: RegExp = null; - private cachedPattern?: RegExp = null; + // Overrides this method if there is more efficient way to check for inner pattern change. + innerPatternHasChange(context: ParsingContext, currentInnerPattern: RegExp): boolean { + return this.innerPattern(context) !== currentInnerPattern; + } patternLeftBoundary(): string { return `(\\W|^)`; } + private cachedInnerPattern?: RegExp = null; + private cachedPattern?: RegExp = null; + pattern(context: ParsingContext): RegExp { - const innerPattern = this.innerPattern(context); - if (innerPattern == this.cachedInnerPattern) { - return this.cachedPattern; + if (this.cachedInnerPattern) { + if (!this.innerPatternHasChange(context, this.cachedInnerPattern)) { + return this.cachedPattern; + } } - - this.cachedPattern = new RegExp(`${this.patternLeftBoundary()}${innerPattern.source}`, innerPattern.flags); - this.cachedInnerPattern = innerPattern; + this.cachedInnerPattern = this.innerPattern(context); + this.cachedPattern = new RegExp( + `${this.patternLeftBoundary()}${this.cachedInnerPattern.source}`, + this.cachedInnerPattern.flags + ); return this.cachedPattern; } diff --git a/src/locales/ru/parsers/AbstractParserWithWordBoundaryChecking.ts b/src/locales/ru/parsers/AbstractParserWithWordBoundaryChecking.ts new file mode 100644 index 00000000..c7e3264d --- /dev/null +++ b/src/locales/ru/parsers/AbstractParserWithWordBoundaryChecking.ts @@ -0,0 +1,25 @@ +import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary"; +import { REGEX_PARTS } from "../constants"; +import { ParsingContext } from "../../../chrono"; + +export abstract class AbstractParserWithLeftBoundaryChecking extends AbstractParserWithWordBoundaryChecking { + abstract innerPatternString(context: ParsingContext): string; + + patternLeftBoundary(): string { + return REGEX_PARTS.leftBoundary; + } + + innerPattern(context: ParsingContext): RegExp { + return new RegExp(this.innerPatternString(context), REGEX_PARTS.flags); + } + + innerPatternHasChange(context: ParsingContext, currentInnerPattern: RegExp): boolean { + return false; + } +} + +export abstract class AbstractParserWithLeftRightBoundaryChecking extends AbstractParserWithLeftBoundaryChecking { + innerPattern(context: ParsingContext): RegExp { + return new RegExp(`${this.innerPatternString(context)}${REGEX_PARTS.rightBoundary}`, REGEX_PARTS.flags); + } +} diff --git a/src/locales/ru/parsers/RUCasualDateParser.ts b/src/locales/ru/parsers/RUCasualDateParser.ts index 5990d519..f11ecbf6 100644 --- a/src/locales/ru/parsers/RUCasualDateParser.ts +++ b/src/locales/ru/parsers/RUCasualDateParser.ts @@ -1,21 +1,11 @@ import { ParsingContext } from "../../../chrono"; import { ParsingComponents, ParsingResult } from "../../../results"; -import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary"; import * as references from "../../../common/casualReferences"; -import { REGEX_PARTS } from "../constants"; +import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking"; -const PATTERN = new RegExp( - `(?:с|со)?\\s*(сегодня|вчера|завтра|послезавтра|послепослезавтра|позапозавчера|позавчера)${REGEX_PARTS.rightBoundary}`, - REGEX_PARTS.flags -); - -export default class RUCasualDateParser extends AbstractParserWithWordBoundaryChecking { - patternLeftBoundary(): string { - return REGEX_PARTS.leftBoundary; - } - - innerPattern(context: ParsingContext): RegExp { - return PATTERN; +export default class RUCasualDateParser extends AbstractParserWithLeftRightBoundaryChecking { + innerPatternString(context: ParsingContext): string { + return `(?:с|со)?\\s*(сегодня|вчера|завтра|послезавтра|послепослезавтра|позапозавчера|позавчера)`; } innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents | ParsingResult { diff --git a/src/locales/ru/parsers/RUCasualTimeParser.ts b/src/locales/ru/parsers/RUCasualTimeParser.ts index 6a9ac030..9911c7a5 100644 --- a/src/locales/ru/parsers/RUCasualTimeParser.ts +++ b/src/locales/ru/parsers/RUCasualTimeParser.ts @@ -1,22 +1,12 @@ import { ParsingContext } from "../../../chrono"; -import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary"; import * as references from "../../../common/casualReferences"; import { assignSimilarDate } from "../../../utils/dayjs"; import dayjs from "dayjs"; -import { REGEX_PARTS } from "../constants"; +import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking"; -const PATTERN = new RegExp( - `(сейчас|прошлым\\s*вечером|прошлой\\s*ночью|следующей\\s*ночью|сегодня\\s*ночью|этой\\s*ночью|ночью|этим утром|утром|утра|в\\s*полдень|вечером|вечера|в\\s*полночь)` + - `${REGEX_PARTS.rightBoundary}`, - REGEX_PARTS.flags -); -export default class RUCasualTimeParser extends AbstractParserWithWordBoundaryChecking { - patternLeftBoundary(): string { - return REGEX_PARTS.leftBoundary; - } - - innerPattern() { - return PATTERN; +export default class RUCasualTimeParser extends AbstractParserWithLeftRightBoundaryChecking { + innerPatternString(context: ParsingContext): string { + return `(сейчас|прошлым\\s*вечером|прошлой\\s*ночью|следующей\\s*ночью|сегодня\\s*ночью|этой\\s*ночью|ночью|этим утром|утром|утра|в\\s*полдень|вечером|вечера|в\\s*полночь)`; } innerExtract(context: ParsingContext, match: RegExpMatchArray) { diff --git a/src/locales/ru/parsers/RUMonthNameLittleEndianParser.ts b/src/locales/ru/parsers/RUMonthNameLittleEndianParser.ts index 9e00e1e7..9e58eff5 100644 --- a/src/locales/ru/parsers/RUMonthNameLittleEndianParser.ts +++ b/src/locales/ru/parsers/RUMonthNameLittleEndianParser.ts @@ -1,41 +1,31 @@ import { ParsingContext } from "../../../chrono"; import { ParsingResult } from "../../../results"; import { findYearClosestToRef } from "../../../calculation/years"; -import { MONTH_DICTIONARY, REGEX_PARTS } from "../constants"; +import { MONTH_DICTIONARY } from "../constants"; import { YEAR_PATTERN, parseYear } from "../constants"; import { ORDINAL_NUMBER_PATTERN, parseOrdinalNumberPattern } from "../constants"; import { matchAnyPattern } from "../../../utils/pattern"; -import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary"; - -// prettier-ignore -const PATTERN = new RegExp( - `(?:с)?\\s*(${ORDINAL_NUMBER_PATTERN})` + - `(?:` + - `\\s{0,3}(?:по|-|–|до)?\\s{0,3}` + - `(${ORDINAL_NUMBER_PATTERN})` + - `)?` + - `(?:-|\\/|\\s{0,3}(?:of)?\\s{0,3})` + - `(${matchAnyPattern(MONTH_DICTIONARY)})` + - `(?:` + - `(?:-|\\/|,?\\s{0,3})` + - `(${YEAR_PATTERN}(?![^\\s]\\d))` + - `)?` + - `${REGEX_PARTS.rightBoundary}`, - REGEX_PARTS.flags -); +import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking"; const DATE_GROUP = 1; const DATE_TO_GROUP = 2; const MONTH_NAME_GROUP = 3; const YEAR_GROUP = 4; -export default class RUMonthNameLittleEndianParser extends AbstractParserWithWordBoundaryChecking { - patternLeftBoundary(): string { - return REGEX_PARTS.leftBoundary; - } - - innerPattern(): RegExp { - return PATTERN; +export default class RUMonthNameLittleEndianParser extends AbstractParserWithLeftRightBoundaryChecking { + innerPatternString(context: ParsingContext): string { + // prettier-ignore + return `(?:с)?\\s*(${ORDINAL_NUMBER_PATTERN})` + + `(?:` + + `\\s{0,3}(?:по|-|–|до)?\\s{0,3}` + + `(${ORDINAL_NUMBER_PATTERN})` + + `)?` + + `(?:-|\\/|\\s{0,3}(?:of)?\\s{0,3})` + + `(${matchAnyPattern(MONTH_DICTIONARY)})` + + `(?:` + + `(?:-|\\/|,?\\s{0,3})` + + `(${YEAR_PATTERN}(?![^\\s]\\d))` + + `)?`; } innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingResult { diff --git a/src/locales/ru/parsers/RUMonthNameParser.ts b/src/locales/ru/parsers/RUMonthNameParser.ts index 7b05e291..ecb94ecb 100644 --- a/src/locales/ru/parsers/RUMonthNameParser.ts +++ b/src/locales/ru/parsers/RUMonthNameParser.ts @@ -1,20 +1,9 @@ -import { FULL_MONTH_NAME_DICTIONARY, MONTH_DICTIONARY, REGEX_PARTS } from "../constants"; +import { FULL_MONTH_NAME_DICTIONARY, MONTH_DICTIONARY } from "../constants"; import { ParsingContext } from "../../../chrono"; import { findYearClosestToRef } from "../../../calculation/years"; import { matchAnyPattern } from "../../../utils/pattern"; import { YEAR_PATTERN, parseYear } from "../constants"; -import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary"; - -const PATTERN = new RegExp( - `((?:в)\\s*)?` + - `(${matchAnyPattern(MONTH_DICTIONARY)})` + - `\\s*` + - `(?:` + - `[,-]?\\s*(${YEAR_PATTERN})?` + - `)?` + - `(?=[^\\s\\w]|\\s+[^0-9]|\\s+$|$)`, - REGEX_PARTS.flags -); +import { AbstractParserWithLeftBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking"; const MONTH_NAME_GROUP = 2; const YEAR_GROUP = 3; @@ -25,13 +14,17 @@ const YEAR_GROUP = 3; * - Январь 2012 * - Январь */ -export default class RUMonthNameParser extends AbstractParserWithWordBoundaryChecking { - patternLeftBoundary(): string { - return REGEX_PARTS.leftBoundary; - } - - innerPattern(): RegExp { - return PATTERN; +export default class RUMonthNameParser extends AbstractParserWithLeftBoundaryChecking { + innerPatternString(context: ParsingContext): string { + return ( + `((?:в)\\s*)?` + + `(${matchAnyPattern(MONTH_DICTIONARY)})` + + `\\s*` + + `(?:` + + `[,-]?\\s*(${YEAR_PATTERN})?` + + `)?` + + `(?=[^\\s\\w]|\\s+[^0-9]|\\s+$|$)` + ); } innerExtract(context: ParsingContext, match: RegExpMatchArray) { diff --git a/src/locales/ru/parsers/RURelativeDateFormatParser.ts b/src/locales/ru/parsers/RURelativeDateFormatParser.ts index 03b7cfe3..5f929a25 100644 --- a/src/locales/ru/parsers/RURelativeDateFormatParser.ts +++ b/src/locales/ru/parsers/RURelativeDateFormatParser.ts @@ -4,24 +4,16 @@ import { ParsingComponents } from "../../../results"; import dayjs from "dayjs"; import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary"; import { matchAnyPattern } from "../../../utils/pattern"; - -const PATTERN = new RegExp( - `(в прошлом|на прошлой|на следующей|в следующем|на этой|в этом)\\s*(${matchAnyPattern( - TIME_UNIT_DICTIONARY - )})(?=\\s*)${REGEX_PARTS.rightBoundary}`, - REGEX_PARTS.flags -); +import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking"; const MODIFIER_WORD_GROUP = 1; const RELATIVE_WORD_GROUP = 2; -export default class RURelativeDateFormatParser extends AbstractParserWithWordBoundaryChecking { - patternLeftBoundary(): string { - return REGEX_PARTS.leftBoundary; - } - - innerPattern(): RegExp { - return PATTERN; +export default class RURelativeDateFormatParser extends AbstractParserWithLeftRightBoundaryChecking { + innerPatternString(context: ParsingContext): string { + return `(в прошлом|на прошлой|на следующей|в следующем|на этой|в этом)\\s*(${matchAnyPattern( + TIME_UNIT_DICTIONARY + )})`; } innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents { diff --git a/src/locales/ru/parsers/RUTimeUnitAgoFormatParser.ts b/src/locales/ru/parsers/RUTimeUnitAgoFormatParser.ts index 8e8efc18..a08f3649 100644 --- a/src/locales/ru/parsers/RUTimeUnitAgoFormatParser.ts +++ b/src/locales/ru/parsers/RUTimeUnitAgoFormatParser.ts @@ -1,18 +1,12 @@ import { ParsingContext } from "../../../chrono"; -import { parseTimeUnits, REGEX_PARTS, TIME_UNITS_PATTERN } from "../constants"; +import { parseTimeUnits, TIME_UNITS_PATTERN } from "../constants"; import { ParsingComponents } from "../../../results"; -import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary"; import { reverseTimeUnits } from "../../../utils/timeunits"; +import { AbstractParserWithLeftBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking"; -const PATTERN = new RegExp(`(${TIME_UNITS_PATTERN})\\s{0,5}назад(?=(?:\\W|$))`, REGEX_PARTS.flags); - -export default class RUTimeUnitAgoFormatParser extends AbstractParserWithWordBoundaryChecking { - patternLeftBoundary(): string { - return REGEX_PARTS.leftBoundary; - } - - innerPattern(): RegExp { - return PATTERN; +export default class RUTimeUnitAgoFormatParser extends AbstractParserWithLeftBoundaryChecking { + innerPatternString(context: ParsingContext): string { + return `(${TIME_UNITS_PATTERN})\\s{0,5}назад(?=(?:\\W|$))`; } innerExtract(context: ParsingContext, match: RegExpMatchArray) { diff --git a/src/locales/ru/parsers/RUTimeUnitCasualRelativeFormatParser.ts b/src/locales/ru/parsers/RUTimeUnitCasualRelativeFormatParser.ts index 0b7f0ffd..0b9d8be6 100644 --- a/src/locales/ru/parsers/RUTimeUnitCasualRelativeFormatParser.ts +++ b/src/locales/ru/parsers/RUTimeUnitCasualRelativeFormatParser.ts @@ -1,21 +1,12 @@ import { TIME_UNITS_PATTERN, parseTimeUnits, REGEX_PARTS } from "../constants"; import { ParsingContext } from "../../../chrono"; import { ParsingComponents } from "../../../results"; -import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary"; import { reverseTimeUnits } from "../../../utils/timeunits"; +import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking"; -const PATTERN = new RegExp( - `(эти|последние|прошлые|следующие|после|спустя|через|\\+|-)\\s*(${TIME_UNITS_PATTERN})${REGEX_PARTS.rightBoundary}`, - REGEX_PARTS.flags -); - -export default class RUTimeUnitCasualRelativeFormatParser extends AbstractParserWithWordBoundaryChecking { - patternLeftBoundary(): string { - return REGEX_PARTS.leftBoundary; - } - - innerPattern(): RegExp { - return PATTERN; +export default class RUTimeUnitCasualRelativeFormatParser extends AbstractParserWithLeftRightBoundaryChecking { + innerPatternString(context: ParsingContext): string { + return `(эти|последние|прошлые|следующие|после|спустя|через|\\+|-)\\s*(${TIME_UNITS_PATTERN})`; } innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents { diff --git a/src/locales/ru/parsers/RUTimeUnitWithinFormatParser.ts b/src/locales/ru/parsers/RUTimeUnitWithinFormatParser.ts index 16e29b57..44b428f7 100644 --- a/src/locales/ru/parsers/RUTimeUnitWithinFormatParser.ts +++ b/src/locales/ru/parsers/RUTimeUnitWithinFormatParser.ts @@ -4,17 +4,23 @@ import { ParsingComponents } from "../../../results"; import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary"; const PATTERN = `(?:(?:около|примерно)\\s*(?:~\\s*)?)?(${TIME_UNITS_PATTERN})${REGEX_PARTS.rightBoundary}`; -const PATTERN_WITH_PREFIX = new RegExp(`(?:в течение|в течении)\\s*${PATTERN}`, REGEX_PARTS.flags); - -const PATTERN_WITHOUT_PREFIX = new RegExp(PATTERN, "i"); export default class RUTimeUnitWithinFormatParser extends AbstractParserWithWordBoundaryChecking { + private readonly patternWithPrefix: RegExp; + private readonly patternWithoutPrefix: RegExp; + + constructor() { + super(); + this.patternWithPrefix = new RegExp(`(?:в течение|в течении)\\s*${PATTERN}`, REGEX_PARTS.flags); + this.patternWithoutPrefix = new RegExp(PATTERN, REGEX_PARTS.flags); + } + patternLeftBoundary(): string { return REGEX_PARTS.leftBoundary; } innerPattern(context: ParsingContext): RegExp { - return context.option.forwardDate ? PATTERN_WITHOUT_PREFIX : PATTERN_WITH_PREFIX; + return context.option.forwardDate ? this.patternWithoutPrefix : this.patternWithPrefix; } innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents { diff --git a/src/locales/ru/parsers/RUWeekdayParser.ts b/src/locales/ru/parsers/RUWeekdayParser.ts index 9ca9bfc6..0560a0e2 100644 --- a/src/locales/ru/parsers/RUWeekdayParser.ts +++ b/src/locales/ru/parsers/RUWeekdayParser.ts @@ -1,32 +1,24 @@ import { ParsingContext } from "../../../chrono"; import { ParsingComponents } from "../../../results"; -import { REGEX_PARTS, WEEKDAY_DICTIONARY } from "../constants"; +import { WEEKDAY_DICTIONARY } from "../constants"; import { matchAnyPattern } from "../../../utils/pattern"; -import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary"; import { createParsingComponentsAtWeekday } from "../../../common/calculation/weekdays"; - -const PATTERN = new RegExp( - `(?:(?:,|\\(|()\\s*)?` + - `(?:в\\s*?)?` + - `(?:(эту|этот|прошлый|прошлую|следующий|следующую|следующего)\\s*)?` + - `(${matchAnyPattern(WEEKDAY_DICTIONARY)})` + - `(?:\\s*(?:,|\\)|)))?` + - `(?:\\s*на\\s*(этой|прошлой|следующей)\\s*неделе)?` + - `${REGEX_PARTS.rightBoundary}`, - REGEX_PARTS.flags -); +import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking"; const PREFIX_GROUP = 1; const WEEKDAY_GROUP = 2; const POSTFIX_GROUP = 3; -export default class RUWeekdayParser extends AbstractParserWithWordBoundaryChecking { - innerPattern(): RegExp { - return PATTERN; - } - - patternLeftBoundary(): string { - return REGEX_PARTS.leftBoundary; +export default class RUWeekdayParser extends AbstractParserWithLeftRightBoundaryChecking { + innerPatternString(context: ParsingContext): string { + return ( + `(?:(?:,|\\(|()\\s*)?` + + `(?:в\\s*?)?` + + `(?:(эту|этот|прошлый|прошлую|следующий|следующую|следующего)\\s*)?` + + `(${matchAnyPattern(WEEKDAY_DICTIONARY)})` + + `(?:\\s*(?:,|\\)|)))?` + + `(?:\\s*на\\s*(этой|прошлой|следующей)\\s*неделе)?` + ); } innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents {