Skip to content

Commit

Permalink
Fix: Apply lazy loading for UK patterns
Browse files Browse the repository at this point in the history
  • Loading branch information
Wanasit Tanakitrungruang committed Nov 19, 2023
1 parent 30128bc commit 4c4d6d1
Show file tree
Hide file tree
Showing 10 changed files with 98 additions and 142 deletions.
27 changes: 27 additions & 0 deletions src/locales/uk/parsers/AbstractParserWithWordBoundaryChecking.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// noinspection DuplicatedCode

import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import { REGEX_PARTS } from "../constants";
import { ParsingContext } from "../../../chrono";

export abstract class AbstractParserWithLeftBoundaryChecking extends AbstractParserWithWordBoundaryChecking {
abstract innerPatternString(context: ParsingContext): string;

patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(context: ParsingContext): RegExp {
return new RegExp(this.innerPatternString(context), REGEX_PARTS.flags);
}

innerPatternHasChange(context: ParsingContext, currentInnerPattern: RegExp): boolean {
return false;
}
}

export abstract class AbstractParserWithLeftRightBoundaryChecking extends AbstractParserWithLeftBoundaryChecking {
innerPattern(context: ParsingContext): RegExp {
return new RegExp(`${this.innerPatternString(context)}${REGEX_PARTS.rightBoundary}`, REGEX_PARTS.flags);
}
}
18 changes: 4 additions & 14 deletions src/locales/uk/parsers/UKCasualDateParser.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,11 @@
import { ParsingContext } from "../../../chrono";
import { ParsingComponents, ParsingResult } from "../../../results";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import * as references from "../../../common/casualReferences";
import { REGEX_PARTS } from "../constants";
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const PATTERN = new RegExp(
`(?:з|із|від)?\\s*(сьогодні|вчора|завтра|післязавтра|післяпіслязавтра|позапозавчора|позавчора)${REGEX_PARTS.rightBoundary}`,
REGEX_PARTS.flags
);

export default class UKCasualDateParser extends AbstractParserWithWordBoundaryChecking {
patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(context: ParsingContext): RegExp {
return PATTERN;
export default class UKCasualDateParser extends AbstractParserWithLeftRightBoundaryChecking {
innerPatternString(context: ParsingContext): string {
return `(?:з|із|від)?\\s*(сьогодні|вчора|завтра|післязавтра|післяпіслязавтра|позапозавчора|позавчора)`;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents | ParsingResult {
Expand Down
18 changes: 4 additions & 14 deletions src/locales/uk/parsers/UKCasualTimeParser.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,12 @@
import { ParsingContext } from "../../../chrono";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import * as references from "../../../common/casualReferences";
import { assignSimilarDate } from "../../../utils/dayjs";
import dayjs from "dayjs";
import { REGEX_PARTS } from "../constants";
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const PATTERN = new RegExp(
`(зараз|минулого\\s*вечора|минулої\\s*ночі|наступної\\s*ночі|сьогодні\\s*вночі|цієї\\s*ночі|цього ранку|вранці|ранку|зранку|опівдні|ввечері|вечора|опівночі|вночі)` +
`${REGEX_PARTS.rightBoundary}`,
REGEX_PARTS.flags
);
export default class UKCasualTimeParser extends AbstractParserWithWordBoundaryChecking {
patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern() {
return PATTERN;
export default class UKCasualTimeParser extends AbstractParserWithLeftRightBoundaryChecking {
innerPatternString(context: ParsingContext): string {
return `(зараз|минулого\\s*вечора|минулої\\s*ночі|наступної\\s*ночі|сьогодні\\s*вночі|цієї\\s*ночі|цього ранку|вранці|ранку|зранку|опівдні|ввечері|вечора|опівночі|вночі)`;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray) {
Expand Down
43 changes: 17 additions & 26 deletions src/locales/uk/parsers/UKMonthNameLittleEndianParser.ts
Original file line number Diff line number Diff line change
@@ -1,41 +1,32 @@
import { ParsingContext } from "../../../chrono";
import { ParsingResult } from "../../../results";
import { findYearClosestToRef } from "../../../calculation/years";
import { MONTH_DICTIONARY, REGEX_PARTS } from "../constants";
import { MONTH_DICTIONARY } from "../constants";
import { YEAR_PATTERN, parseYearPattern } from "../constants";
import { ORDINAL_NUMBER_PATTERN, parseOrdinalNumberPattern } from "../constants";
import { matchAnyPattern } from "../../../utils/pattern";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";

// prettier-ignore
const PATTERN = new RegExp(
`(?:з|із)?\\s*(${ORDINAL_NUMBER_PATTERN})` +
`(?:` +
`\\s{0,3}(?:по|-|–|до)?\\s{0,3}` +
`(${ORDINAL_NUMBER_PATTERN})` +
`)?` +
`(?:-|\\/|\\s{0,3}(?:of)?\\s{0,3})` +
`(${matchAnyPattern(MONTH_DICTIONARY)})` +
`(?:` +
`(?:-|\\/|,?\\s{0,3})` +
`(${YEAR_PATTERN}(?![^\\s]\\d))` +
`)?` +
`${REGEX_PARTS.rightBoundary}`,
REGEX_PARTS.flags
);
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const DATE_GROUP = 1;
const DATE_TO_GROUP = 2;
const MONTH_NAME_GROUP = 3;
const YEAR_GROUP = 4;

export default class UKMonthNameLittleEndianParser extends AbstractParserWithWordBoundaryChecking {
patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(): RegExp {
return PATTERN;
export default class UKMonthNameLittleEndianParser extends AbstractParserWithLeftRightBoundaryChecking {
innerPatternString(context: ParsingContext): string {
return (
`(?:з|із)?\\s*(${ORDINAL_NUMBER_PATTERN})` +
`(?:` +
`\\s{0,3}(?:по|-|–|до)?\\s{0,3}` +
`(${ORDINAL_NUMBER_PATTERN})` +
`)?` +
`(?:-|\\/|\\s{0,3}(?:of)?\\s{0,3})` +
`(${matchAnyPattern(MONTH_DICTIONARY)})` +
`(?:` +
`(?:-|\\/|,?\\s{0,3})` +
`(${YEAR_PATTERN}(?![^\\s]\\d))` +
`)?`
);
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingResult {
Expand Down
33 changes: 13 additions & 20 deletions src/locales/uk/parsers/UKMonthNameParser.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,9 @@
import { FULL_MONTH_NAME_DICTIONARY, MONTH_DICTIONARY, REGEX_PARTS } from "../constants";
import { FULL_MONTH_NAME_DICTIONARY, MONTH_DICTIONARY } from "../constants";
import { ParsingContext } from "../../../chrono";
import { findYearClosestToRef } from "../../../calculation/years";
import { matchAnyPattern } from "../../../utils/pattern";
import { YEAR_PATTERN, parseYearPattern } from "../constants";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";

const PATTERN = new RegExp(
`((?:в|у)\\s*)?` +
`(${matchAnyPattern(MONTH_DICTIONARY)})` +
`\\s*` +
`(?:` +
`[,-]?\\s*(${YEAR_PATTERN})?` +
`)?` +
`(?=[^\\s\\w]|\\s+[^0-9]|\\s+$|$)`,
REGEX_PARTS.flags
);
import { AbstractParserWithLeftBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const MONTH_NAME_GROUP = 2;
const YEAR_GROUP = 3;
Expand All @@ -25,13 +14,17 @@ const YEAR_GROUP = 3;
* - Січень 2012
* - Січень
*/
export default class UkMonthNameParser extends AbstractParserWithWordBoundaryChecking {
patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(): RegExp {
return PATTERN;
export default class UkMonthNameParser extends AbstractParserWithLeftBoundaryChecking {
innerPatternString(context: ParsingContext): string {
return (
`((?:в|у)\\s*)?` +
`(${matchAnyPattern(MONTH_DICTIONARY)})` +
`\\s*` +
`(?:` +
`[,-]?\\s*(${YEAR_PATTERN})?` +
`)?` +
`(?=[^\\s\\w]|\\s+[^0-9]|\\s+$|$)`
);
}

innerExtract(context: ParsingContext, match: RegExpMatchArray) {
Expand Down
24 changes: 8 additions & 16 deletions src/locales/uk/parsers/UKRelativeDateFormatParser.ts
Original file line number Diff line number Diff line change
@@ -1,27 +1,19 @@
import { REGEX_PARTS, TIME_UNIT_DICTIONARY } from "../constants";
import { TIME_UNIT_DICTIONARY } from "../constants";
import { ParsingContext } from "../../../chrono";
import { ParsingComponents } from "../../../results";
import dayjs from "dayjs";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import { matchAnyPattern } from "../../../utils/pattern";

const PATTERN = new RegExp(
`(в минулому|у минулому|на минулому|минулого|на наступному|в наступному|у наступному|наступного|на цьому|в цьому|у цьому|цього)\\s*(${matchAnyPattern(
TIME_UNIT_DICTIONARY
)})(?=\\s*)${REGEX_PARTS.rightBoundary}`,
REGEX_PARTS.flags
);
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const MODIFIER_WORD_GROUP = 1;
const RELATIVE_WORD_GROUP = 2;

export default class UKRelativeDateFormatParser extends AbstractParserWithWordBoundaryChecking {
patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(): RegExp {
return PATTERN;
export default class UKRelativeDateFormatParser extends AbstractParserWithLeftRightBoundaryChecking {
innerPatternString(context: ParsingContext): string {
return (
`(в минулому|у минулому|на минулому|минулого|на наступному|в наступному|у наступному|наступного|на цьому|в цьому|у цьому|цього)\\s*` +
`(${matchAnyPattern(TIME_UNIT_DICTIONARY)})(?=\\s*)`
);
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents {
Expand Down
16 changes: 5 additions & 11 deletions src/locales/uk/parsers/UKTimeUnitAgoFormatParser.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
import { ParsingContext } from "../../../chrono";
import { parseTimeUnits, REGEX_PARTS, TIME_UNITS_PATTERN } from "../constants";
import { parseTimeUnits, TIME_UNITS_PATTERN } from "../constants";
import { ParsingComponents } from "../../../results";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import { reverseTimeUnits } from "../../../utils/timeunits";
import { AbstractParserWithLeftBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const PATTERN = new RegExp(`(${TIME_UNITS_PATTERN})\\s{0,5}тому(?=(?:\\W|$))`, REGEX_PARTS.flags);

export default class UKTimeUnitAgoFormatParser extends AbstractParserWithWordBoundaryChecking {
patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(): RegExp {
return PATTERN;
export default class UKTimeUnitAgoFormatParser extends AbstractParserWithLeftBoundaryChecking {
innerPatternString(context: ParsingContext): string {
return `(${TIME_UNITS_PATTERN})\\s{0,5}тому(?=(?:\\W|$))`;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray) {
Expand Down
17 changes: 4 additions & 13 deletions src/locales/uk/parsers/UKTimeUnitCasualRelativeFormatParser.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,12 @@
import { TIME_UNITS_PATTERN, parseTimeUnits, REGEX_PARTS } from "../constants";
import { ParsingContext } from "../../../chrono";
import { ParsingComponents } from "../../../results";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import { reverseTimeUnits } from "../../../utils/timeunits";
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const PATTERN = new RegExp(
`(ці|останні|минулі|майбутні|наступні|після|через|\\+|-)\\s*(${TIME_UNITS_PATTERN})${REGEX_PARTS.rightBoundary}`,
REGEX_PARTS.flags
);

export default class UKTimeUnitCasualRelativeFormatParser extends AbstractParserWithWordBoundaryChecking {
patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(): RegExp {
return PATTERN;
export default class UKTimeUnitCasualRelativeFormatParser extends AbstractParserWithLeftRightBoundaryChecking {
innerPatternString(context: ParsingContext): string {
return `(ці|останні|минулі|майбутні|наступні|після|через|\\+|-)\\s*(${TIME_UNITS_PATTERN})`;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents {
Expand Down
10 changes: 3 additions & 7 deletions src/locales/uk/parsers/UKTimeUnitWithinFormatParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,16 @@ import { ParsingComponents } from "../../../results";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";

const PATTERN = `(?:(?:приблизно|орієнтовно)\\s*(?:~\\s*)?)?(${TIME_UNITS_PATTERN})${REGEX_PARTS.rightBoundary}`;
const PATTERN_WITH_PREFIX = new RegExp(
`(?:протягом|на протязі|протягом|упродовж|впродовж)\\s*${PATTERN}`,
REGEX_PARTS.flags
);

const PATTERN_WITHOUT_PREFIX = new RegExp(PATTERN, "i");

export default class UKTimeUnitWithinFormatParser extends AbstractParserWithWordBoundaryChecking {
patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
}

innerPattern(context: ParsingContext): RegExp {
return context.option.forwardDate ? PATTERN_WITHOUT_PREFIX : PATTERN_WITH_PREFIX;
return context.option.forwardDate
? new RegExp(PATTERN, "i")
: new RegExp(`(?:протягом|на протязі|протягом|упродовж|впродовж)\\s*${PATTERN}`, REGEX_PARTS.flags);
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents {
Expand Down
34 changes: 13 additions & 21 deletions src/locales/uk/parsers/UKWeekdayParser.ts
Original file line number Diff line number Diff line change
@@ -1,34 +1,26 @@
import { ParsingContext } from "../../../chrono";
import { ParsingComponents } from "../../../results";
// TODO: ADD REGEX_PARTS below
import { REGEX_PARTS, WEEKDAY_DICTIONARY } from "../constants";
import { WEEKDAY_DICTIONARY } from "../constants";
import { matchAnyPattern } from "../../../utils/pattern";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import { createParsingComponentsAtWeekday } from "../../../common/calculation/weekdays";

const PATTERN = new RegExp(
`(?:(?:,|\\(|()\\s*)?` +
`(?:в\\s*?)?` +
`(?:у\\s*?)?` +
`(?:(цей|минулого|минулий|попередній|попереднього|наступного|наступний|наступному)\\s*)?` +
`(${matchAnyPattern(WEEKDAY_DICTIONARY)})` +
`(?:\\s*(?:,|\\)|)))?` +
`(?:\\s*(на|у|в)\\s*(цьому|минулому|наступному)\\s*тижні)?` +
`${REGEX_PARTS.rightBoundary}`,
REGEX_PARTS.flags
);
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";

const PREFIX_GROUP = 1;
const WEEKDAY_GROUP = 2;
const POSTFIX_GROUP = 3;

export default class UKWeekdayParser extends AbstractParserWithWordBoundaryChecking {
innerPattern(): RegExp {
return PATTERN;
}

patternLeftBoundary(): string {
return REGEX_PARTS.leftBoundary;
export default class UKWeekdayParser extends AbstractParserWithLeftRightBoundaryChecking {
innerPatternString(context: ParsingContext): string {
return (
`(?:(?:,|\\(|()\\s*)?` +
`(?:в\\s*?)?` +
`(?:у\\s*?)?` +
`(?:(цей|минулого|минулий|попередній|попереднього|наступного|наступний|наступному)\\s*)?` +
`(${matchAnyPattern(WEEKDAY_DICTIONARY)})` +
`(?:\\s*(?:,|\\)|)))?` +
`(?:\\s*(на|у|в)\\s*(цьому|минулому|наступному)\\s*тижні)?`
);
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents {
Expand Down

0 comments on commit 4c4d6d1

Please sign in to comment.