Skip to content

Commit

Permalink
Fix: Apply lazy loading for RU patterns
Browse files Browse the repository at this point in the history
  • Loading branch information
Wanasit Tanakitrungruang committed Nov 19, 2023
1 parent 5ebfcc0 commit 5db3a1f
Show file tree
Hide file tree
Showing 11 changed files with 116 additions and 145 deletions.
26 changes: 17 additions & 9 deletions src/common/parsers/AbstractParserWithWordBoundary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { ParsingComponents, ParsingResult } from "../../results";
import { Component } from "../../types";

/**
*
* A parser that checks for word boundary and applying the inner pattern and extraction.
*/
export abstract class AbstractParserWithWordBoundaryChecking implements Parser {
abstract innerPattern(context: ParsingContext): RegExp;
Expand All @@ -12,21 +12,29 @@ export abstract class AbstractParserWithWordBoundaryChecking implements Parser {
match: RegExpMatchArray
): ParsingComponents | ParsingResult | { [c in Component]?: number } | null;

private cachedInnerPattern?: RegExp = null;
private cachedPattern?: RegExp = null;
// Overrides this method if there is more efficient way to check for inner pattern change.
innerPatternHasChange(context: ParsingContext, currentInnerPattern: RegExp): boolean {
return this.innerPattern(context) !== currentInnerPattern;
}

patternLeftBoundary(): string {
return `(\\W|^)`;
}

private cachedInnerPattern?: RegExp = null;
private cachedPattern?: RegExp = null;

pattern(context: ParsingContext): RegExp {
const innerPattern = this.innerPattern(context);
if (innerPattern == this.cachedInnerPattern) {
return this.cachedPattern;
if (this.cachedInnerPattern) {
if (!this.innerPatternHasChange(context, this.cachedInnerPattern)) {
return this.cachedPattern;
}
}

this.cachedPattern = new RegExp(`${this.patternLeftBoundary()}${innerPattern.source}`, innerPattern.flags);
this.cachedInnerPattern = innerPattern;
this.cachedInnerPattern = this.innerPattern(context);
this.cachedPattern = new RegExp(
`${this.patternLeftBoundary()}${this.cachedInnerPattern.source}`,
this.cachedInnerPattern.flags
);
return this.cachedPattern;
}

Expand Down
25 changes: 25 additions & 0 deletions src/locales/ru/parsers/AbstractParserWithWordBoundaryChecking.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import { REGEX_PARTS } from "../constants";
import { ParsingContext } from "../../../chrono";

export abstract class AbstractParserWithLeftBoundaryChecking extends AbstractParserWithWordBoundaryChecking {
abstract innerPatternString(context: ParsingContext): string;

patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(context: ParsingContext): RegExp {
return new RegExp(this.innerPatternString(context), REGEX_PARTS.flags);
}

innerPatternHasChange(context: ParsingContext, currentInnerPattern: RegExp): boolean {
return false;
}
}

export abstract class AbstractParserWithLeftRightBoundaryChecking extends AbstractParserWithLeftBoundaryChecking {
innerPattern(context: ParsingContext): RegExp {
return new RegExp(`${this.innerPatternString(context)}${REGEX_PARTS.rightBoundary}`, REGEX_PARTS.flags);
}
}
18 changes: 4 additions & 14 deletions src/locales/ru/parsers/RUCasualDateParser.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,11 @@
import { ParsingContext } from "../../../chrono";
import { ParsingComponents, ParsingResult } from "../../../results";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import * as references from "../../../common/casualReferences";
import { REGEX_PARTS } from "../constants";
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const PATTERN = new RegExp(
`(?:с|со)?\\s*(сегодня|вчера|завтра|послезавтра|послепослезавтра|позапозавчера|позавчера)${REGEX_PARTS.rightBoundary}`,
REGEX_PARTS.flags
);

export default class RUCasualDateParser extends AbstractParserWithWordBoundaryChecking {
patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(context: ParsingContext): RegExp {
return PATTERN;
export default class RUCasualDateParser extends AbstractParserWithLeftRightBoundaryChecking {
innerPatternString(context: ParsingContext): string {
return `(?:с|со)?\\s*(сегодня|вчера|завтра|послезавтра|послепослезавтра|позапозавчера|позавчера)`;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents | ParsingResult {
Expand Down
18 changes: 4 additions & 14 deletions src/locales/ru/parsers/RUCasualTimeParser.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,12 @@
import { ParsingContext } from "../../../chrono";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import * as references from "../../../common/casualReferences";
import { assignSimilarDate } from "../../../utils/dayjs";
import dayjs from "dayjs";
import { REGEX_PARTS } from "../constants";
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const PATTERN = new RegExp(
`(сейчас|прошлым\\s*вечером|прошлой\\s*ночью|следующей\\s*ночью|сегодня\\s*ночью|этой\\s*ночью|ночью|этим утром|утром|утра|в\\s*полдень|вечером|вечера|в\\s*полночь)` +
`${REGEX_PARTS.rightBoundary}`,
REGEX_PARTS.flags
);
export default class RUCasualTimeParser extends AbstractParserWithWordBoundaryChecking {
patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern() {
return PATTERN;
export default class RUCasualTimeParser extends AbstractParserWithLeftRightBoundaryChecking {
innerPatternString(context: ParsingContext): string {
return `(сейчас|прошлым\\s*вечером|прошлой\\s*ночью|следующей\\s*ночью|сегодня\\s*ночью|этой\\s*ночью|ночью|этим утром|утром|утра|в\\s*полдень|вечером|вечера|в\\s*полночь)`;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray) {
Expand Down
42 changes: 16 additions & 26 deletions src/locales/ru/parsers/RUMonthNameLittleEndianParser.ts
Original file line number Diff line number Diff line change
@@ -1,41 +1,31 @@
import { ParsingContext } from "../../../chrono";
import { ParsingResult } from "../../../results";
import { findYearClosestToRef } from "../../../calculation/years";
import { MONTH_DICTIONARY, REGEX_PARTS } from "../constants";
import { MONTH_DICTIONARY } from "../constants";
import { YEAR_PATTERN, parseYear } from "../constants";
import { ORDINAL_NUMBER_PATTERN, parseOrdinalNumberPattern } from "../constants";
import { matchAnyPattern } from "../../../utils/pattern";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";

// prettier-ignore
const PATTERN = new RegExp(
`(?:с)?\\s*(${ORDINAL_NUMBER_PATTERN})` +
`(?:` +
`\\s{0,3}(?:по|-|–|до)?\\s{0,3}` +
`(${ORDINAL_NUMBER_PATTERN})` +
`)?` +
`(?:-|\\/|\\s{0,3}(?:of)?\\s{0,3})` +
`(${matchAnyPattern(MONTH_DICTIONARY)})` +
`(?:` +
`(?:-|\\/|,?\\s{0,3})` +
`(${YEAR_PATTERN}(?![^\\s]\\d))` +
`)?` +
`${REGEX_PARTS.rightBoundary}`,
REGEX_PARTS.flags
);
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const DATE_GROUP = 1;
const DATE_TO_GROUP = 2;
const MONTH_NAME_GROUP = 3;
const YEAR_GROUP = 4;

export default class RUMonthNameLittleEndianParser extends AbstractParserWithWordBoundaryChecking {
patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(): RegExp {
return PATTERN;
export default class RUMonthNameLittleEndianParser extends AbstractParserWithLeftRightBoundaryChecking {
innerPatternString(context: ParsingContext): string {
// prettier-ignore
return `(?:с)?\\s*(${ORDINAL_NUMBER_PATTERN})` +
`(?:` +
`\\s{0,3}(?:по|-|–|до)?\\s{0,3}` +
`(${ORDINAL_NUMBER_PATTERN})` +
`)?` +
`(?:-|\\/|\\s{0,3}(?:of)?\\s{0,3})` +
`(${matchAnyPattern(MONTH_DICTIONARY)})` +
`(?:` +
`(?:-|\\/|,?\\s{0,3})` +
`(${YEAR_PATTERN}(?![^\\s]\\d))` +
`)?`;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingResult {
Expand Down
33 changes: 13 additions & 20 deletions src/locales/ru/parsers/RUMonthNameParser.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,9 @@
import { FULL_MONTH_NAME_DICTIONARY, MONTH_DICTIONARY, REGEX_PARTS } from "../constants";
import { FULL_MONTH_NAME_DICTIONARY, MONTH_DICTIONARY } from "../constants";
import { ParsingContext } from "../../../chrono";
import { findYearClosestToRef } from "../../../calculation/years";
import { matchAnyPattern } from "../../../utils/pattern";
import { YEAR_PATTERN, parseYear } from "../constants";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";

const PATTERN = new RegExp(
`((?:в)\\s*)?` +
`(${matchAnyPattern(MONTH_DICTIONARY)})` +
`\\s*` +
`(?:` +
`[,-]?\\s*(${YEAR_PATTERN})?` +
`)?` +
`(?=[^\\s\\w]|\\s+[^0-9]|\\s+$|$)`,
REGEX_PARTS.flags
);
import { AbstractParserWithLeftBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const MONTH_NAME_GROUP = 2;
const YEAR_GROUP = 3;
Expand All @@ -25,13 +14,17 @@ const YEAR_GROUP = 3;
* - Январь 2012
* - Январь
*/
export default class RUMonthNameParser extends AbstractParserWithWordBoundaryChecking {
patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(): RegExp {
return PATTERN;
export default class RUMonthNameParser extends AbstractParserWithLeftBoundaryChecking {
innerPatternString(context: ParsingContext): string {
return (
`((?:в)\\s*)?` +
`(${matchAnyPattern(MONTH_DICTIONARY)})` +
`\\s*` +
`(?:` +
`[,-]?\\s*(${YEAR_PATTERN})?` +
`)?` +
`(?=[^\\s\\w]|\\s+[^0-9]|\\s+$|$)`
);
}

innerExtract(context: ParsingContext, match: RegExpMatchArray) {
Expand Down
20 changes: 6 additions & 14 deletions src/locales/ru/parsers/RURelativeDateFormatParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,16 @@ import { ParsingComponents } from "../../../results";
import dayjs from "dayjs";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import { matchAnyPattern } from "../../../utils/pattern";

const PATTERN = new RegExp(
`(в прошлом|на прошлой|на следующей|в следующем|на этой|в этом)\\s*(${matchAnyPattern(
TIME_UNIT_DICTIONARY
)})(?=\\s*)${REGEX_PARTS.rightBoundary}`,
REGEX_PARTS.flags
);
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const MODIFIER_WORD_GROUP = 1;
const RELATIVE_WORD_GROUP = 2;

export default class RURelativeDateFormatParser extends AbstractParserWithWordBoundaryChecking {
patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(): RegExp {
return PATTERN;
export default class RURelativeDateFormatParser extends AbstractParserWithLeftRightBoundaryChecking {
innerPatternString(context: ParsingContext): string {
return `(в прошлом|на прошлой|на следующей|в следующем|на этой|в этом)\\s*(${matchAnyPattern(
TIME_UNIT_DICTIONARY
)})`;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents {
Expand Down
16 changes: 5 additions & 11 deletions src/locales/ru/parsers/RUTimeUnitAgoFormatParser.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
import { ParsingContext } from "../../../chrono";
import { parseTimeUnits, REGEX_PARTS, TIME_UNITS_PATTERN } from "../constants";
import { parseTimeUnits, TIME_UNITS_PATTERN } from "../constants";
import { ParsingComponents } from "../../../results";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import { reverseTimeUnits } from "../../../utils/timeunits";
import { AbstractParserWithLeftBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const PATTERN = new RegExp(`(${TIME_UNITS_PATTERN})\\s{0,5}назад(?=(?:\\W|$))`, REGEX_PARTS.flags);

export default class RUTimeUnitAgoFormatParser extends AbstractParserWithWordBoundaryChecking {
patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(): RegExp {
return PATTERN;
export default class RUTimeUnitAgoFormatParser extends AbstractParserWithLeftBoundaryChecking {
innerPatternString(context: ParsingContext): string {
return `(${TIME_UNITS_PATTERN})\\s{0,5}назад(?=(?:\\W|$))`;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray) {
Expand Down
17 changes: 4 additions & 13 deletions src/locales/ru/parsers/RUTimeUnitCasualRelativeFormatParser.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,12 @@
import { TIME_UNITS_PATTERN, parseTimeUnits, REGEX_PARTS } from "../constants";
import { ParsingContext } from "../../../chrono";
import { ParsingComponents } from "../../../results";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import { reverseTimeUnits } from "../../../utils/timeunits";
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const PATTERN = new RegExp(
`(эти|последние|прошлые|следующие|после|спустя|через|\\+|-)\\s*(${TIME_UNITS_PATTERN})${REGEX_PARTS.rightBoundary}`,
REGEX_PARTS.flags
);

export default class RUTimeUnitCasualRelativeFormatParser extends AbstractParserWithWordBoundaryChecking {
patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(): RegExp {
return PATTERN;
export default class RUTimeUnitCasualRelativeFormatParser extends AbstractParserWithLeftRightBoundaryChecking {
innerPatternString(context: ParsingContext): string {
return `(эти|последние|прошлые|следующие|после|спустя|через|\\+|-)\\s*(${TIME_UNITS_PATTERN})`;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents {
Expand Down
14 changes: 10 additions & 4 deletions src/locales/ru/parsers/RUTimeUnitWithinFormatParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,23 @@ import { ParsingComponents } from "../../../results";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";

const PATTERN = `(?:(?:около|примерно)\\s*(?:~\\s*)?)?(${TIME_UNITS_PATTERN})${REGEX_PARTS.rightBoundary}`;
const PATTERN_WITH_PREFIX = new RegExp(`(?:в течение|в течении)\\s*${PATTERN}`, REGEX_PARTS.flags);

const PATTERN_WITHOUT_PREFIX = new RegExp(PATTERN, "i");

export default class RUTimeUnitWithinFormatParser extends AbstractParserWithWordBoundaryChecking {
private readonly patternWithPrefix: RegExp;
private readonly patternWithoutPrefix: RegExp;

constructor() {
super();
this.patternWithPrefix = new RegExp(`(?:в течение|в течении)\\s*${PATTERN}`, REGEX_PARTS.flags);
this.patternWithoutPrefix = new RegExp(PATTERN, REGEX_PARTS.flags);
}

patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(context: ParsingContext): RegExp {
return context.option.forwardDate ? PATTERN_WITHOUT_PREFIX : PATTERN_WITH_PREFIX;
return context.option.forwardDate ? this.patternWithoutPrefix : this.patternWithPrefix;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents {
Expand Down
32 changes: 12 additions & 20 deletions src/locales/ru/parsers/RUWeekdayParser.ts
Original file line number Diff line number Diff line change
@@ -1,32 +1,24 @@
import { ParsingContext } from "../../../chrono";
import { ParsingComponents } from "../../../results";
import { REGEX_PARTS, WEEKDAY_DICTIONARY } from "../constants";
import { WEEKDAY_DICTIONARY } from "../constants";
import { matchAnyPattern } from "../../../utils/pattern";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import { createParsingComponentsAtWeekday } from "../../../common/calculation/weekdays";

const PATTERN = new RegExp(
`(?:(?:,|\\(|()\\s*)?` +
`(?:в\\s*?)?` +
`(?:(эту|этот|прошлый|прошлую|следующий|следующую|следующего)\\s*)?` +
`(${matchAnyPattern(WEEKDAY_DICTIONARY)})` +
`(?:\\s*(?:,|\\)|)))?` +
`(?:\\s*на\\s*(этой|прошлой|следующей)\\s*неделе)?` +
`${REGEX_PARTS.rightBoundary}`,
REGEX_PARTS.flags
);
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const PREFIX_GROUP = 1;
const WEEKDAY_GROUP = 2;
const POSTFIX_GROUP = 3;

export default class RUWeekdayParser extends AbstractParserWithWordBoundaryChecking {
innerPattern(): RegExp {
return PATTERN;
}

patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
export default class RUWeekdayParser extends AbstractParserWithLeftRightBoundaryChecking {
innerPatternString(context: ParsingContext): string {
return (
`(?:(?:,|\\(|()\\s*)?` +
`(?:в\\s*?)?` +
`(?:(эту|этот|прошлый|прошлую|следующий|следующую|следующего)\\s*)?` +
`(${matchAnyPattern(WEEKDAY_DICTIONARY)})` +
`(?:\\s*(?:,|\\)|)))?` +
`(?:\\s*на\\s*(этой|прошлой|следующей)\\s*неделе)?`
);
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents {
Expand Down

0 comments on commit 5db3a1f

Please sign in to comment.