Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EN DateTime V2] Added support for cases like "April ninth through 15th" (#2905) #2994

Merged
merged 3 commits into from
Jul 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ public static class DateTimeDefinitions
public const string WrittenElevenToNineteenRegex = @"(?:eleven|twelve|(?:thir|four|fif|six|seven|eigh|nine)teen)";
public const string WrittenTensRegex = @"(?:ten|twenty|thirty|fou?rty|fifty|sixty|seventy|eighty|ninety)";
public static readonly string WrittenNumRegex = $@"(?:{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s+{WrittenOneToNineRegex})?)";
public const string WrittenOneToNineOrdinalRegex = @"(?:first|second|third|fourth|fifth|sixth|seventh|eighth|nine?th)";
public const string WrittenTensOrdinalRegex = @"(?:tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)";
public static readonly string WrittenOrdinalRegex = $@"(?:{WrittenOneToNineOrdinalRegex}|{WrittenTensOrdinalRegex}|{WrittenTensRegex}\s+{WrittenOneToNineOrdinalRegex})";
public static readonly string WrittenOrdinalDayRegex = $@"\b(the\s+)?(?<day>(?<ordinal>{WrittenOneToNineOrdinalRegex}|(?:tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth)|(?:ten|twenty)\s+{WrittenOneToNineOrdinalRegex}|thirty\s+first))\b";
public static readonly string WrittenCenturyFullYearRegex = $@"(?:(one|two)\s+thousand((\s+and)?\s+{WrittenOneToNineRegex}\s+hundred)?)";
public const string WrittenCenturyOrdinalYearRegex = @"(?:twenty(\s+(one|two))?|ten|eleven|twelve|thirteen|fifteen|eighteen|(?:four|six|seven|nine)(teen)?|one|two|three|five|eight)";
public static readonly string CenturyRegex = $@"\b(?<century>{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}(\s+hundred)?)\b";
Expand Down Expand Up @@ -78,10 +82,10 @@ public static class DateTimeDefinitions
public const string ToTokenRegex = @"\b(to)$";
public const string FromRegex = @"\b(from(\s+the)?)$";
public const string BetweenTokenRegex = @"\b(between(\s+the)?)$";
public static readonly string SimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex}\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+{DayRegex})((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string MonthFrontSimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((from)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+(between\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string BetweenRegex = $@"\b(between\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string SimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?({DayRegex}|{WrittenOrdinalDayRegex})\s*{TillRegex}\s*(({DayRegex}|{WrittenOrdinalDayRegex})\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+({DayRegex}|{WrittenOrdinalDayRegex}))((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string MonthFrontSimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((from)\s+)?({DayRegex}|{WrittenOrdinalDayRegex})\s*{TillRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+(between\s+)({DayRegex}|{WrittenOrdinalDayRegex})\s*{RangeConnectorRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string BetweenRegex = $@"\b(between\s+)({DayRegex}|{WrittenOrdinalDayRegex})\s*{RangeConnectorRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string MonthWithYear = $@"\b((({WrittenMonthRegex}[\.]?|((the\s+)?(?<cardinal>first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|sixth|6th|seventh|7th|eighth|8th|ninth|9th|tenth|10th|eleventh|11th|twelfth|12th|last)\s+month(?=\s+(of|in))))((\s*)[/\\\-\.,]?(\s+(of|in))?(\s*)({YearRegex}|(?<order>following|next|last|this)\s+year)|\s+(of|in)\s+{TwoDigitYearRegex}))|(({YearRegex}|(?<order>following|next|last|this)\s+year)(\s*),?(\s*){WrittenMonthRegex}))\b";
public const string SpecialYearPrefixes = @"(calendar|(?<special>fiscal|school))";
public static readonly string OneWordPeriodRegex = $@"\b((((the\s+)?month of\s+)?({StrictRelativeRegex}\s+)?{MonthRegex})|(month|year) to date|(?<toDate>((un)?till?|to)\s+date)|({RelativeRegex}\s+)?(my\s+)?((?<business>working\s+week|workweek)|week(end)?|month|fortnight|(({SpecialYearPrefixes}\s+)?year))(?!((\s+of)?\s+\d+(?!({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex}))|\s+to\s+date))(\s+{AfterNextSuffixRegex})?)\b";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public static class NumbersDefinitions
public const string DecimalUnitsRegex = @"(?:डेढ़|डेढ़|डेढ|ढाई|सवा|सावा)";
public static readonly string DecimalUnitsWithRoundNumberRegex = $@"({DecimalUnitsRegex}\s+({{AllNumericalIntRegex}}\s+)?{RoundNumberIntegerRegex}|{DecimalUnitsRegex})";
public const string RoundNumberOrdinalRegex = @"(?:(सौ|हजार|हज़ार|लाख|करोड़|अरब|खरब)(वां|वीं|वें|वाँ))";
public const string OneToNineOrdinalRegex = @"(?:पहला|पहले|पहली|तीसरे|प्रथम|दूसरा|दूसरी|दूसरे|तिहाई|चौथाई|((पांच|पाँच|छठ|सात|आठ|नौ)(वां|वीं|वें|वाँ|वा)))";
public const string OneToNineOrdinalRegex = @"(?:पहला|(?<!से\s*)पहले|पहली|तीसरे|प्रथम|दूसरा|दूसरी|दूसरे|तिहाई|चौथाई|((पांच|पाँच|छठ|सात|आठ|नौ)(वां|वीं|वें|वाँ|वा)))";
public const string TenToNineteenOrdinalRegex = @"(?:(दस|ग्यारह|बारह|तेरह|चौदह|पंद्रह|सोलह|सत्रह|अठारह|उन्नीस)(वां|वीं|वें|वाँ))";
public const string TwentyToTwentyNineOrdinalRegex = @"(?:(बीस|इक्कीस|बाईस|बाइस|तेईस|तेइस|चौबीस|पच्चीस|छब्बीस|सत्ताईस|सत्ताइस|अट्ठाईस|अट्ठाइस|उनतीस)(वां|वीं|वें|वाँ))";
public const string ThirtyToThirtyNineOrdinalRegex = @"(?:(तीस|इकतीस|इकत्तीस|बत्तीस|तैंतीस|चौंतीस|पैंतीस|छ्त्तीस|सैंतीस|अड़तीस|उनतालीस)(वां|वीं|वें|वाँ))";
Expand Down
5 changes: 5 additions & 0 deletions .NET/Microsoft.Recognizers.Text.DateTime/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ public static class Constants
// SourceEntity Types
public const string SYS_DATETIME_DATETIMEPOINT = "datetimepoint";

// Number Types
public const string SYS_NUMBER_ORDINAL = "builtin.num.ordinal";

// Model Name
public const string MODEL_DATETIME = "datetime";

Expand Down Expand Up @@ -113,6 +116,7 @@ public static class Constants
public const int MaxWeekOfMonth = 5;
public const int MaxMonth = 12;
public const int MinMonth = 1;
public const int MaxDayMonth = 31;

// Day start hour
public const int DayHourStart = 0;
Expand Down Expand Up @@ -237,6 +241,7 @@ public static class Constants
public const string EndGroupName = "end";
public const string WithinGroupName = "within";
public const string NumberGroupName = "number";
public const string OrdinalGroupName = "ordinal";
public const string OrderGroupName = "order";
public const string AgoGroupName = "ago";
public const string YesterdayGroupName = "yesterday";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,10 +348,10 @@ private List<ExtractResult> ExtractImpl(string text, DateObject reference)

tokens.AddRange(MergeTwoTimePoints(text, reference));
tokens.AddRange(MatchDuration(text, reference));
tokens.AddRange(SingleTimePointWithPatterns(text, new List<ExtractResult>(ordinalExtractions), reference));
tokens.AddRange(SingleTimePointWithPatterns(text, ordinalExtractions, reference));
tokens.AddRange(MatchComplexCases(text, simpleCasesResults, reference));
tokens.AddRange(MatchYearPeriod(text, reference));
tokens.AddRange(MatchOrdinalNumberWithCenturySuffix(text, new List<ExtractResult>(ordinalExtractions)));
tokens.AddRange(MatchOrdinalNumberWithCenturySuffix(text, ordinalExtractions));

return Token.MergeAllTokens(tokens, text, ExtractorName);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -699,9 +699,47 @@ private DateTimeResolutionResult ParseSimpleCases(string text, DateObject refere

if (match.Success)
{
var days = match.Groups["day"];
beginDay = this.config.DayOfMonth[days.Captures[0].Value];
endDay = this.config.DayOfMonth[days.Captures[1].Value];
var days = match.Groups[Constants.DayGroupName];
var writtenDay = match.Groups[Constants.OrdinalGroupName];
if (writtenDay.Captures.Count > 0 && days.Captures[0].Value == writtenDay.Captures[0].Value)
{
// Parse beginDay in written form
var dayMatch = writtenDay.Captures[0];
var dayEr = new ExtractResult
{
Start = dayMatch.Index,
Length = dayMatch.Length,
Text = dayMatch.Value,
Type = Constants.SYS_NUMBER_ORDINAL,
Metadata = new Metadata { IsOrdinalRelative = false, },
};
var dayPr = this.config.NumberParser.Parse(dayEr);
beginDay = (int)(double)dayPr.Value;
}
else
{
beginDay = this.config.DayOfMonth[days.Captures[0].Value];
}

if (writtenDay.Captures.Count > 0 && days.Captures[1].Value == writtenDay.Captures[writtenDay.Captures.Count - 1].Value)
{
// Parse endDay in written form
var dayMatch = writtenDay.Captures[writtenDay.Captures.Count - 1];
var dayEr = new ExtractResult
{
Start = dayMatch.Index,
Length = dayMatch.Length,
Text = dayMatch.Value,
Type = Constants.SYS_NUMBER_ORDINAL,
Metadata = new Metadata { IsOrdinalRelative = false, },
};
var dayPr = this.config.NumberParser.Parse(dayEr);
endDay = (int)(double)dayPr.Value;
}
else
{
endDay = this.config.DayOfMonth[days.Captures[1].Value];
}

// parse year
year = config.DateExtractor.GetYearFromText(match.Match);
Expand Down
26 changes: 18 additions & 8 deletions Patterns/English/English-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,16 @@ WrittenTensRegex: !simpleRegex
WrittenNumRegex: !nestedRegex
def: (?:{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s+{WrittenOneToNineRegex})?)
references: [ WrittenOneToNineRegex, WrittenElevenToNineteenRegex, WrittenTensRegex ]
WrittenOneToNineOrdinalRegex: !simpleRegex
def: (?:first|second|third|fourth|fifth|sixth|seventh|eighth|nine?th)
WrittenTensOrdinalRegex: !simpleRegex
def: (?:tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)
WrittenOrdinalRegex: !nestedRegex
def: (?:{WrittenOneToNineOrdinalRegex}|{WrittenTensOrdinalRegex}|{WrittenTensRegex}\s+{WrittenOneToNineOrdinalRegex})
references: [ WrittenOneToNineOrdinalRegex, WrittenTensOrdinalRegex, WrittenTensRegex ]
WrittenOrdinalDayRegex: !nestedRegex
def: \b(the\s+)?(?<day>(?<ordinal>{WrittenOneToNineOrdinalRegex}|(?:tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth)|(?:ten|twenty)\s+{WrittenOneToNineOrdinalRegex}|thirty\s+first))\b
references: [ WrittenOneToNineOrdinalRegex ]
WrittenCenturyFullYearRegex: !nestedRegex
def: (?:(one|two)\s+thousand((\s+and)?\s+{WrittenOneToNineRegex}\s+hundred)?)
references: [ WrittenOneToNineRegex]
Expand Down Expand Up @@ -137,17 +147,17 @@ FromRegex: !simpleRegex
BetweenTokenRegex: !simpleRegex
def: \b(between(\s+the)?)$
SimpleCasesRegex: !nestedRegex
def: \b({RangePrefixRegex}\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex}\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+{DayRegex})((\s+|\s*,\s*){YearRegex})?\b
references: [ DayRegex, TillRegex, MonthSuffixRegex, YearRegex, RangePrefixRegex ]
def: \b({RangePrefixRegex}\s+)?({DayRegex}|{WrittenOrdinalDayRegex})\s*{TillRegex}\s*(({DayRegex}|{WrittenOrdinalDayRegex})\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+({DayRegex}|{WrittenOrdinalDayRegex}))((\s+|\s*,\s*){YearRegex})?\b
references: [ DayRegex, TillRegex, MonthSuffixRegex, YearRegex, RangePrefixRegex, WrittenOrdinalDayRegex ]
MonthFrontSimpleCasesRegex: !nestedRegex
def: \b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((from)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b
references: [ MonthSuffixRegex, DayRegex, TillRegex, YearRegex, RangePrefixRegex ]
def: \b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((from)\s+)?({DayRegex}|{WrittenOrdinalDayRegex})\s*{TillRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})((\s+|\s*,\s*){YearRegex})?\b
references: [ MonthSuffixRegex, DayRegex, TillRegex, YearRegex, RangePrefixRegex, WrittenOrdinalDayRegex ]
MonthFrontBetweenRegex: !nestedRegex
def: \b{MonthSuffixRegex}\s+(between\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b
references: [ MonthSuffixRegex, DayRegex, RangeConnectorRegex , YearRegex ]
def: \b{MonthSuffixRegex}\s+(between\s+)({DayRegex}|{WrittenOrdinalDayRegex})\s*{RangeConnectorRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})((\s+|\s*,\s*){YearRegex})?\b
references: [ MonthSuffixRegex, DayRegex, RangeConnectorRegex , YearRegex, WrittenOrdinalDayRegex ]
BetweenRegex: !nestedRegex
def: \b(between\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b
references: [ DayRegex, RangeConnectorRegex , MonthSuffixRegex, YearRegex ]
def: \b(between\s+)({DayRegex}|{WrittenOrdinalDayRegex})\s*{RangeConnectorRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b
references: [ DayRegex, RangeConnectorRegex , MonthSuffixRegex, YearRegex, WrittenOrdinalDayRegex ]
MonthWithYear: !nestedRegex
def: \b((({WrittenMonthRegex}[\.]?|((the\s+)?(?<cardinal>first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|sixth|6th|seventh|7th|eighth|8th|ninth|9th|tenth|10th|eleventh|11th|twelfth|12th|last)\s+month(?=\s+(of|in))))((\s*)[/\\\-\.,]?(\s+(of|in))?(\s*)({YearRegex}|(?<order>following|next|last|this)\s+year)|\s+(of|in)\s+{TwoDigitYearRegex}))|(({YearRegex}|(?<order>following|next|last|this)\s+year)(\s*),?(\s*){WrittenMonthRegex}))\b
references: [ WrittenMonthRegex, YearRegex, TwoDigitYearRegex ]
Expand Down
2 changes: 1 addition & 1 deletion Patterns/Hindi/Hindi-Numbers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ DecimalUnitsWithRoundNumberRegex: !nestedRegex
RoundNumberOrdinalRegex: !simpleRegex
def: (?:(सौ|हजार|हज़ार|लाख|करोड़|अरब|खरब)(वां|वीं|वें|वाँ))
OneToNineOrdinalRegex: !simpleRegex
def: (?:पहला|पहले|पहली|तीसरे|प्रथम|दूसरा|दूसरी|दूसरे|तिहाई|चौथाई|((पांच|पाँच|छठ|सात|आठ|नौ)(वां|वीं|वें|वाँ|वा)))
def: (?:पहला|(?<!से\s*)पहले|पहली|तीसरे|प्रथम|दूसरा|दूसरी|दूसरे|तिहाई|चौथाई|((पांच|पाँच|छठ|सात|आठ|नौ)(वां|वीं|वें|वाँ|वा)))
TenToNineteenOrdinalRegex: !simpleRegex
def: (?:(दस|ग्यारह|बारह|तेरह|चौदह|पंद्रह|सोलह|सत्रह|अठारह|उन्नीस)(वां|वीं|वें|वाँ))
TwentyToTwentyNineOrdinalRegex: !simpleRegex
Expand Down
Loading