Skip to content

Commit

Permalink
[New] FR month name little endian parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
wanasit committed Jul 23, 2020
1 parent 2a6e043 commit 072c24c
Show file tree
Hide file tree
Showing 13 changed files with 522 additions and 125 deletions.
10 changes: 5 additions & 5 deletions src/common/parsers/SlashDateFormatParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@ import { findMostLikelyADYear, findYearClosestToRef } from "../../calculation/ye
const PATTERN = new RegExp(
"([^\\d]|^)" +
"([0-3]{0,1}[0-9]{1})[\\/\\.\\-]([0-3]{0,1}[0-9]{1})" +
"(?:" +
"[\\/\\.\\-]" +
"([0-9]{4}\\s*\\,?\\s*|[0-9]{2}\\s*\\,?\\s*)" +
")?" +
"(?:[\\/\\.\\-]([0-9]{4}|[0-9]{2}))?" +
"(\\W|$)",
"i"
);
Expand Down Expand Up @@ -52,7 +49,10 @@ export default class SlashDateFormatParser implements Parser {
}

const index = match.index + match[OPENING_GROUP].length;
const text = match[0].substr(match[OPENING_GROUP].length, match[0].length - match[ENDING_GROUP].length);
const text = match[0].substr(
match[OPENING_GROUP].length,
match[0].length - match[OPENING_GROUP].length - match[ENDING_GROUP].length
);

// '1.12', '1.12.12' is more like a version numbers
if (text.match(/^\d\.\d$/) || text.match(/^\d\.\d{1,2}\.\d{1,2}\s*$/)) {
Expand Down
23 changes: 9 additions & 14 deletions src/locales/fr/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ export const MONTH_DICTIONARY: { [word: string]: number } = {
"juin": 6,
"jun": 6,
"juillet": 7,
"juil": 7,
"jul": 7,
"jul.": 7,
"août": 8,
Expand Down Expand Up @@ -120,31 +121,25 @@ export function parseNumberPattern(match: string): number {

//-----------------------------

export const ORDINAL_NUMBER_PATTERN = `(?:[0-9]{1,2}(?:st|nd|rd|th)?)`;
export const ORDINAL_NUMBER_PATTERN = `(?:[0-9]{1,2}(?:er)?)`;
export function parseOrdinalNumberPattern(match: string): number {
let num = match.toLowerCase();
num = num.replace(/(?:st|nd|rd|th)$/i, "");
num = num.replace(/(?:er)$/i, "");
return parseInt(num);
}

//-----------------------------

export const YEAR_PATTERN = `(?:[1-9][0-9]{0,3}\\s*(?:BE|AD|BC)|[1-2][0-9]{3}|[5-9][0-9])`;
// 88 p. Chr. n.
// 234 AC
export const YEAR_PATTERN = `(?:[1-9][0-9]{0,3}\\s*(?:AC|AD|p\\.\\s*C(?:hr?)?\\.\\s*n\\.)|[1-2][0-9]{3}|[5-9][0-9])`;
export function parseYear(match: string): number {
if (/BE/i.test(match)) {
// Buddhist Era
match = match.replace(/BE/i, "");
return parseInt(match) - 543;
}

if (/BC/i.test(match)) {
// Before Christ
if (/AC/i.test(match)) {
match = match.replace(/BC/i, "");
return -parseInt(match);
}

if (/AD/i.test(match)) {
match = match.replace(/AD/i, "");
if (/AD/i.test(match) || /C/i.test(match)) {
match = match.replace(/[^\d]+/i, "");
return parseInt(match);
}

Expand Down
2 changes: 2 additions & 0 deletions src/locales/fr/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import FRMergeDateTimeRefiner from "./refiners/FRMergeDateTimeRefiner";
import FRMergeDateRangeRefiner from "./refiners/FRMergeDateRangeRefiner";
import FRWeekdayParser from "./parsers/FRWeekdayParser";
import FRSpecificTimeExpressionParser from "./parsers/FRSpecificTimeExpressionParser";
import FRMonthNameLittleEndianParser from "./parsers/FRMonthNameLittleEndianParser";

// Shortcuts
export const casual = new Chrono(createCasualConfiguration());
Expand All @@ -34,6 +35,7 @@ export function createConfiguration(strictMode = true, littleEndian = true): Con
{
parsers: [
new SlashDateFormatParser(littleEndian),
new FRMonthNameLittleEndianParser(),
new FRTimeExpressionParser(),
new FRSpecificTimeExpressionParser(),
new FRWeekdayParser(),
Expand Down
64 changes: 64 additions & 0 deletions src/locales/fr/parsers/FRMonthNameLittleEndianParser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { ParsingContext } from "../../../chrono";
import { ParsingResult } from "../../../results";
import { findYearClosestToRef } from "../../../calculation/years";
import { MONTH_DICTIONARY } from "../constants";
import { YEAR_PATTERN, parseYear } from "../constants";
import { ORDINAL_NUMBER_PATTERN, parseOrdinalNumberPattern } from "../constants";
import { matchAnyPattern } from "../../../utils/pattern";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";

const PATTERN = new RegExp(
"(?:on\\s*?)?" +
`(${ORDINAL_NUMBER_PATTERN})` +
`(?:\\s*(?:au|\\-|\\–|jusqu\'au?|\\s)\\s*(${ORDINAL_NUMBER_PATTERN}))?` +
`(?:-|/|\\s*(?:de)?\\s*)` +
`(${matchAnyPattern(MONTH_DICTIONARY)})` +
`(?:(?:-|/|,?\\s*)(${YEAR_PATTERN}(?![^\\s]\\d)))?` +
`(?=\\W|$)`,
"i"
);

const DATE_GROUP = 1;
const DATE_TO_GROUP = 2;
const MONTH_NAME_GROUP = 3;
const YEAR_GROUP = 4;

export default class FRMonthNameLittleEndianParser extends AbstractParserWithWordBoundaryChecking {
innerPattern(): RegExp {
return PATTERN;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingResult {
//console.log(match)

const result = context.createParsingResult(match.index, match[0]);

const month = MONTH_DICTIONARY[match[MONTH_NAME_GROUP].toLowerCase()];
const day = parseOrdinalNumberPattern(match[DATE_GROUP]);
if (day > 31) {
// e.g. "[96 Aug]" => "9[6 Aug]", we need to shift away from the next number
match.index = match.index + match[DATE_GROUP].length;
return null;
}

result.start.assign("month", month);
result.start.assign("day", day);

if (match[YEAR_GROUP]) {
const yearNumber = parseYear(match[YEAR_GROUP]);
result.start.assign("year", yearNumber);
} else {
const year = findYearClosestToRef(context.refDate, day, month);
result.start.imply("year", year);
}

if (match[DATE_TO_GROUP]) {
const endDate = parseOrdinalNumberPattern(match[DATE_TO_GROUP]);

result.end = result.start.clone();
result.end.assign("day", endDate);
}

return result;
}
}
11 changes: 6 additions & 5 deletions src/locales/fr/parsers/FRSpecificTimeExpressionParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,14 @@ export default class FRSpecificTimeExpressionParser implements Parser {
}

extract(context: ParsingContext, match: RegExpMatchArray): ParsingResult | null {
const refDate = dayjs(context.refDate);
let result = context.createParsingResult(match.index + match[1].length, match[0].substring(match[1].length));

// This looks more like a year e.g. 2020
if (result.text.match(/^\d{4}$/)) {
match.index += match[0].length;
return null;
}

result.start = FRSpecificTimeExpressionParser.extractTimeComponent(result.start.clone(), match);
if (!result.start) {
match.index += match[0].length;
Expand All @@ -59,10 +64,6 @@ export default class FRSpecificTimeExpressionParser implements Parser {
}
}

if (result.text.match(/^\d+$/)) {
return null;
}

return result;
}

Expand Down
11 changes: 11 additions & 0 deletions src/locales/fr/parsers/FRTimeExpressionParser.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import { AbstractTimeExpressionParser } from "../../../common/parsers/AbstractTimeExpressionParser";
import { ParsingComponents, ParsingResult } from "../../../results";
import { ParsingContext } from "../../../chrono";

export default class FRTimeExpressionParser extends AbstractTimeExpressionParser {
primaryPrefix(): string {
Expand All @@ -8,4 +10,13 @@ export default class FRTimeExpressionParser extends AbstractTimeExpressionParser
followingPhase(): string {
return "\\s*(?:\\-|\\–|\\~|\\〜|[àa]|\\?)\\s*";
}

extractPrimaryTimeComponents(context: ParsingContext, match: RegExpMatchArray): ParsingComponents | null {
// This looks more like a year e.g. 2020
if (match[0].match(/^\s*\d{4}\s*$/)) {
return null;
}

return super.extractPrimaryTimeComponents(context, match);
}
}
7 changes: 7 additions & 0 deletions test/en/en_slash.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import * as chrono from "../../src";
import { testSingleCase, testUnexpectedResult, testWithExpectedDate } from "../test_util";

test("Test - Parsing Offset Expression", function () {
testSingleCase(chrono, " 04/2016 ", new Date(2012, 7, 10), (result) => {
expect(result.index).toBe(4);
expect(result.text).toBe("04/2016");
});
});

test("Test - Single Expression", function () {
testSingleCase(chrono, "The event is going ahead (04/2016)", new Date(2012, 7, 10), (result) => {
expect(result.start).not.toBeNull();
Expand Down
5 changes: 5 additions & 0 deletions test/en/en_time_exp.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ test("Test - Parsing text offset", function () {
expect(result.index).toBe(2);
expect(result.text).toBe("11 AM");
});

testSingleCase(chrono, "2020 at 11 AM ", new Date(2016, 10 - 1, 1, 8), (result, text) => {
expect(result.index).toBe(5);
expect(result.text).toBe("at 11 AM");
});
});

test("Test - Time expression", function () {
Expand Down
24 changes: 12 additions & 12 deletions test/fr/fr_casual.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,18 +162,18 @@ test("Test - Combined Expression", function () {
expect(result.start).toBeDate(new Date(2012, 7, 11, 17));
});

// testSingleCase(chrono.fr, "La deadline est demain matin 11h", new Date(2012, 7, 10, 12), (result) => {
// expect(result.index).toBe(16);
// expect(result.text).toBe("demain matin 11h");
//
// expect(result.start).not.toBeNull();
// expect(result.start.get("year")).toBe(2012);
// expect(result.start.get("month")).toBe(8);
// expect(result.start.get("day")).toBe(11);
// expect(result.start.get("hour")).toBe(11);
//
// expect(result.start).toBeDate(new Date(2012, 7, 11, 11));
// });
testSingleCase(chrono.fr, "La deadline est demain matin 11h", new Date(2012, 7, 10, 12), (result) => {
expect(result.index).toBe(16);
expect(result.text).toBe("demain matin 11h");

expect(result.start).not.toBeNull();
expect(result.start.get("year")).toBe(2012);
expect(result.start.get("month")).toBe(8);
expect(result.start.get("day")).toBe(11);
expect(result.start.get("hour")).toBe(11);

expect(result.start).toBeDate(new Date(2012, 7, 11, 11));
});
});

//
Expand Down
Loading

0 comments on commit 072c24c

Please sign in to comment.