Skip to content

Commit

Permalink
timezone allows leading MINUS SIGN (U+2212)
Browse files Browse the repository at this point in the history
Timezone signage also allows MINUS SIGN (U+2212) as
specified by ISO 8601 and RFC 3339.

Not for RFC 2822 format or RFC 8536 transition string.

Issue chronotope#835
  • Loading branch information
jtmoon79 committed Mar 28, 2023
1 parent 2fcdd9e commit 3d47112
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 38 deletions.
69 changes: 64 additions & 5 deletions src/format/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,7 @@ fn test_parse() {
check!("+12:3", [fix!(TimezoneOffset)]; TOO_SHORT);
check!("+12:34", [fix!(TimezoneOffset)]; offset: 45_240);
check!("-12:34", [fix!(TimezoneOffset)]; offset: -45_240);
check!("−12:34", [fix!(TimezoneOffset)]; offset: -45_240);
check!("+12:34:", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+12:34:5", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+12:34:56", [fix!(TimezoneOffset)]; TOO_LONG);
Expand All @@ -944,12 +945,14 @@ fn test_parse() {
check!("+1234:567", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+00:00", [fix!(TimezoneOffset)]; offset: 0);
check!("-00:00", [fix!(TimezoneOffset)]; offset: 0);
check!("−00:00", [fix!(TimezoneOffset)]; offset: 0);
check!("+00:01", [fix!(TimezoneOffset)]; offset: 60);
check!("-00:01", [fix!(TimezoneOffset)]; offset: -60);
check!("+00:30", [fix!(TimezoneOffset)]; offset: 1_800);
check!("-00:30", [fix!(TimezoneOffset)]; offset: -1_800);
check!("+24:00", [fix!(TimezoneOffset)]; offset: 86_400);
check!("-24:00", [fix!(TimezoneOffset)]; offset: -86_400);
check!("−24:00", [fix!(TimezoneOffset)]; offset: -86_400);
check!("+99:59", [fix!(TimezoneOffset)]; offset: 359_940);
check!("-99:59", [fix!(TimezoneOffset)]; offset: -359_940);
check!("+00:60", [fix!(TimezoneOffset)]; OUT_OF_RANGE);
Expand All @@ -959,6 +962,7 @@ fn test_parse() {
check!("+12 34 ", [fix!(TimezoneOffset)]; TOO_LONG);
check!(" +12:34", [fix!(TimezoneOffset)]; offset: 45_240);
check!(" -12:34", [fix!(TimezoneOffset)]; offset: -45_240);
check!(" −12:34", [fix!(TimezoneOffset)]; offset: -45_240);
check!(" +12:34", [fix!(TimezoneOffset)]; INVALID);
check!(" -12:34", [fix!(TimezoneOffset)]; INVALID);
check!("\t -12:34", [fix!(TimezoneOffset)]; INVALID);
Expand All @@ -981,10 +985,16 @@ fn test_parse() {
check!("X12:34", [fix!(TimezoneOffset)]; INVALID);
check!("Z+12:34", [fix!(TimezoneOffset)]; INVALID);
check!("X+12:34", [fix!(TimezoneOffset)]; INVALID);
check!("X−12:34", [fix!(TimezoneOffset)]; INVALID);
check!("🤠+12:34", [fix!(TimezoneOffset)]; INVALID);
check!("+12:34🤠", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+12:🤠34", [fix!(TimezoneOffset)]; INVALID);
check!("+1234🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: 45_240);
check!("-1234🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: -45_240);
check!("−1234🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: -45_240);
check!("+12:34🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: 45_240);
check!("-12:34🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: -45_240);
check!("−12:34🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: -45_240);
check!("🤠+12:34", [lit!("🤠"), fix!(TimezoneOffset)]; offset: 45_240);
check!("Z", [fix!(TimezoneOffset)]; INVALID);
check!("A", [fix!(TimezoneOffset)]; INVALID);
Expand Down Expand Up @@ -1013,6 +1023,7 @@ fn test_parse() {
check!("+123", [fix!(TimezoneOffsetColon)]; TOO_SHORT);
check!("+1234", [fix!(TimezoneOffsetColon)]; offset: 45_240);
check!("-1234", [fix!(TimezoneOffsetColon)]; offset: -45_240);
check!("−1234", [fix!(TimezoneOffsetColon)]; offset: -45_240);
check!("+12345", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+123456", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+1234567", [fix!(TimezoneOffsetColon)]; TOO_LONG);
Expand All @@ -1029,6 +1040,7 @@ fn test_parse() {
check!("+12:3", [fix!(TimezoneOffsetColon)]; TOO_SHORT);
check!("+12:34", [fix!(TimezoneOffsetColon)]; offset: 45_240);
check!("-12:34", [fix!(TimezoneOffsetColon)]; offset: -45_240);
check!("−12:34", [fix!(TimezoneOffsetColon)]; offset: -45_240);
check!("+12:34:", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+12:34:5", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+12:34:56", [fix!(TimezoneOffsetColon)]; TOO_LONG);
Expand All @@ -1041,6 +1053,8 @@ fn test_parse() {
check!("+12: 34", [fix!(TimezoneOffsetColon)]; offset: 45_240);
check!("+12 :34", [fix!(TimezoneOffsetColon)]; offset: 45_240);
check!("+12 : 34", [fix!(TimezoneOffsetColon)]; offset: 45_240);
check!("-12 : 34", [fix!(TimezoneOffsetColon)]; offset: -45_240);
check!("−12 : 34", [fix!(TimezoneOffsetColon)]; offset: -45_240);
check!("+12 : 34", [fix!(TimezoneOffsetColon)]; INVALID);
check!("+12 : 34", [fix!(TimezoneOffsetColon)]; INVALID);
check!("+12 : 34", [fix!(TimezoneOffsetColon)]; INVALID);
Expand Down Expand Up @@ -1094,6 +1108,7 @@ fn test_parse() {
check!("+123", [fix!(TimezoneOffsetZ)]; TOO_SHORT);
check!("+1234", [fix!(TimezoneOffsetZ)]; offset: 45_240);
check!("-1234", [fix!(TimezoneOffsetZ)]; offset: -45_240);
check!("−1234", [fix!(TimezoneOffsetZ)]; offset: -45_240);
check!("+12345", [fix!(TimezoneOffsetZ)]; TOO_LONG);
check!("+123456", [fix!(TimezoneOffsetZ)]; TOO_LONG);
check!("+1234567", [fix!(TimezoneOffsetZ)]; TOO_LONG);
Expand All @@ -1110,6 +1125,7 @@ fn test_parse() {
check!("+12:3", [fix!(TimezoneOffsetZ)]; TOO_SHORT);
check!("+12:34", [fix!(TimezoneOffsetZ)]; offset: 45_240);
check!("-12:34", [fix!(TimezoneOffsetZ)]; offset: -45_240);
check!("−12:34", [fix!(TimezoneOffsetZ)]; offset: -45_240);
check!("+12:34:", [fix!(TimezoneOffsetZ)]; TOO_LONG);
check!("+12:34:5", [fix!(TimezoneOffsetZ)]; TOO_LONG);
check!("+12:34:56", [fix!(TimezoneOffsetZ)]; TOO_LONG);
Expand Down Expand Up @@ -1180,6 +1196,7 @@ fn test_parse() {
check!("+123", [internal_fix!(TimezoneOffsetPermissive)]; TOO_SHORT);
check!("+1234", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240);
check!("-1234", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240);
check!("−1234", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240);
check!("+12345", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
check!("+123456", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
check!("+1234567", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
Expand All @@ -1196,6 +1213,7 @@ fn test_parse() {
check!("+12:3", [internal_fix!(TimezoneOffsetPermissive)]; TOO_SHORT);
check!("+12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240);
check!("-12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240);
check!("−12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240);
check!("+12:34:", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
check!("+12:34:5", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
check!("+12:34:56", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
Expand Down Expand Up @@ -1223,6 +1241,8 @@ fn test_parse() {
check!(" 12:34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID);
check!("+12:34 ", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
check!(" +12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240);
check!(" -12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240);
check!(" −12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240);
check!("+12345", [internal_fix!(TimezoneOffsetPermissive), num!(Day)]; offset: 45_240, day: 5);
check!("+12:345", [internal_fix!(TimezoneOffsetPermissive), num!(Day)]; offset: 45_240, day: 5);
check!("+12:34:", [internal_fix!(TimezoneOffsetPermissive), lit!(":")]; offset: 45_240);
Expand Down Expand Up @@ -1269,6 +1289,16 @@ fn test_parse() {
num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset)];
year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2,
minute: 37, second: 5, offset: 32400);
check!("2015-02-04T14:37:05-09:00",
[num!(Year), lit!("-"), num!(Month), lit!("-"), num!(Day), lit!("T"),
num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset)];
year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2,
minute: 37, second: 5, offset: -32400);
check!("2015-02-04T14:37:05−09:00",
[num!(Year), lit!("-"), num!(Month), lit!("-"), num!(Day), lit!("T"),
num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset)];
year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2,
minute: 37, second: 5, offset: -32400);
check!("20150204143705567",
[num!(Year), num!(Month), num!(Day),
num!(Hour), num!(Minute), num!(Second), internal_fix!(Nanosecond3NoDot)];
Expand Down Expand Up @@ -1374,9 +1404,14 @@ fn test_rfc2822() {
("Tue, 20 Jan 2015 7:35:20 -0800", Err(INVALID)), // bad # of digits in hour
("Tue, 20 Jan 2015 17:65:20 -0800", Err(OUT_OF_RANGE)), // bad minute
("Tue, 20 Jan 2015 17:35:90 -0800", Err(OUT_OF_RANGE)), // bad second
("Tue, 20 Jan 2015 17:35:20 -0890", Err(OUT_OF_RANGE)), // bad offset
("6 Jun 1944 04:00:00Z", Err(INVALID)), // bad offset (zulu not allowed)
("Tue, 20 Jan 2015 17:35:20 HAS", Err(NOT_ENOUGH)), // bad named time zone
("Tue, 20 Jan 2015 17:35:20 -0890", Err(OUT_OF_RANGE)), // bad offset minutes
("Tue, 20 Jan 2015 17:35:20Z", Err(INVALID)), // bad offset: zulu not allowed
("Tue, 20 Jan 2015 17:35:20 Z", Err(NOT_ENOUGH)), // bad offset: zulu not allowed
("Tue, 20 Jan 2015 17:35:20 Zulu", Err(NOT_ENOUGH)), // bad offset: zulu not allowed
("Tue, 20 Jan 2015 17:35:20 ZULU", Err(NOT_ENOUGH)), // bad offset: zulu not allowed
("Tue, 20 Jan 2015 17:35:20 −0800", Err(INVALID)), // bad offset: MINUS SIGN (U+2212) not specified in RFC 2822
("Tue, 20 Jan 2015 17:35:20 0800", Err(INVALID)), // missing offset sign
("Tue, 20 Jan 2015 17:35:20 HAS", Err(NOT_ENOUGH)), // bad named timezone
("Tue, 20 Jan 2015😈17:35:20 -0800", Err(INVALID)), // bad character!
];

Expand All @@ -1392,6 +1427,8 @@ fn test_rfc2822() {

// Test against test data above
for &(date, checkdate) in testdates.iter() {
eprintln!("Test input: {:?}", date);
eprintln!(" Expect: {:?}", checkdate);
let d = rfc2822_to_datetime(date); // parse a date
let dt = match d {
// did we get a value?
Expand Down Expand Up @@ -1459,12 +1496,16 @@ fn test_rfc3339() {
// Test data - (input, Ok(expected result after parse and format) or Err(error code))
let testdates = [
("2015-01-20T17:35:20-08:00", Ok("2015-01-20T17:35:20-08:00")), // normal case
("2015-01-20T17:35:20−08:00", Ok("2015-01-20T17:35:20-08:00")), // normal case with MINUS SIGN (U+2212)
("1944-06-06T04:04:00Z", Ok("1944-06-06T04:04:00+00:00")), // D-day
("2001-09-11T09:45:00-08:00", Ok("2001-09-11T09:45:00-08:00")),
("2015-01-20T17:35:20.001-08:00", Ok("2015-01-20T17:35:20.001-08:00")),
("2015-01-20T17:35:20.001−08:00", Ok("2015-01-20T17:35:20.001-08:00")), // with MINUS SIGN (U+2212)
("2015-01-20T17:35:20.000031-08:00", Ok("2015-01-20T17:35:20.000031-08:00")),
("2015-01-20T17:35:20.000000004-08:00", Ok("2015-01-20T17:35:20.000000004-08:00")),
("2015-01-20T17:35:20.000000004−08:00", Ok("2015-01-20T17:35:20.000000004-08:00")), // with MINUS SIGN (U+2212)
("2015-01-20T17:35:20.000000000452-08:00", Ok("2015-01-20T17:35:20-08:00")), // too small
("2015-01-20T17:35:20.000000000452−08:00", Ok("2015-01-20T17:35:20-08:00")), // too small with MINUS SIGN (U+2212)
("2015-01-20 17:35:20.001-08:00", Err(INVALID)), // missing separator 'T'
("2015/01/20T17:35:20.001-08:00", Err(INVALID)), // wrong separator char YMD
("2015-01-20T17-35-20.001-08:00", Err(INVALID)), // wrong separator char HMS
Expand All @@ -1477,10 +1518,28 @@ fn test_rfc3339() {
("2015-01-20T17:35:20-24:00", Err(OUT_OF_RANGE)), // bad offset value
("15-01-20T17:35:20-08:00", Err(INVALID)), // bad year format
("15-01-20T17:35:20-08:00:00", Err(INVALID)), // bad year format, bad offset format
("2015-01-20T17:35:20-0800", Err(INVALID)), // bad offset format
("2015-01-20T17:35:20.001-08 : 00", Err(INVALID)), // bad offset format
("2015-01-20T17:35:2008:00", Err(INVALID)), // missing offset sign
("2015-01-20T17:35:20 08:00", Err(INVALID)), // missing offset sign
("2015-01-20T17:35:20Zulu", Err(TOO_LONG)), // bad offset format
("2015-01-20T17:35:20 Zulu", Err(INVALID)), // bad offset format
("2015-01-20T17:35:20GMT", Err(INVALID)), // bad offset format
("2015-01-20T17:35:20 GMT", Err(INVALID)), // bad offset format
("2015-01-20T17:35:20+GMT", Err(INVALID)), // bad offset format
("2015-01-20T17:35:20++08:00", Err(INVALID)), // bad offset format
("2015-01-20T17:35:20--08:00", Err(INVALID)), // bad offset format
("2015-01-20T17:35:20−−08:00", Err(INVALID)), // bad offset format
("2015-01-20T17:35:20±08:00", Err(INVALID)), // bad offset sign
("2015-01-20T17:35:20-08-00", Err(INVALID)), // bad offset separator
("2015-01-20T17:35:20-08;00", Err(INVALID)), // bad offset separator
("2015-01-20T17:35:20-0800", Err(INVALID)), // bad offset separator
("2015-01-20T17:35:20-08:0", Err(TOO_SHORT)), // bad offset minutes
("2015-01-20T17:35:20-08:AA", Err(INVALID)), // bad offset minutes
("2015-01-20T17:35:20-08:ZZ", Err(INVALID)), // bad offset minutes
("2015-01-20T17:35:20.001-08 : 00", Err(INVALID)), // bad offset separator
("2015-01-20T17:35:20-08:00:00", Err(TOO_LONG)), // bad offset format
("2015-01-20T17:35:20+08:", Err(TOO_SHORT)), // bad offset format
("2015-01-20T17:35:20-08:", Err(TOO_SHORT)), // bad offset format
("2015-01-20T17:35:20−08:", Err(TOO_SHORT)), // bad offset format
("2015-01-20T17:35:20-08", Err(TOO_SHORT)), // bad offset format
("2015-01-20T", Err(TOO_SHORT)), // missing HMS
("2015-01-20T00:00:1", Err(TOO_SHORT)), // missing complete S
Expand Down
45 changes: 38 additions & 7 deletions src/format/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,13 +269,23 @@ pub(super) fn timezone_offset<F>(s: &str, consume_colon: F) -> ParseResult<(&str
where
F: FnMut(&str) -> ParseResult<&str>,
{
timezone_offset_internal(s, consume_colon, false)
timezone_offset_internal(s, consume_colon, false, true)
}

/// The `consume_colon` function is used to parse a mandatory or optional `:`
/// separator between hours offset and minutes offset.
///
/// The `allow_missing_minutes` flag allows the timezone minutes offset to be
/// missing from `s`.
///
/// The `allow_tz_minus_sign` flag allows the timezone offset negative character
/// to also be MINUS SIGN (U+2212) (part of RFC 3339 & ISO 8601)
/// in addition to the typical HYPHEN-MINUS (U+2D).
fn timezone_offset_internal<F>(
mut s: &str,
mut consume_colon: F,
allow_missing_minutes: bool,
allow_tz_minus_sign: bool,
) -> ParseResult<(&str, i32)>
where
F: FnMut(&str) -> ParseResult<&str>,
Expand All @@ -289,12 +299,33 @@ where
}
}
let negative = match s.as_bytes().first() {
Some(&b'+') => false,
Some(&b'-') => true,
Some(_) => return Err(INVALID),
Some(&b'+') => {
s = &s[1..];

false
}
Some(&b'-') => {
s = &s[1..];

true
}
Some(_) => {
if !allow_tz_minus_sign {
return Err(INVALID);
}
// RFC 3339 / ISO 8601 allows the use of `−` MINUS SIGN (U+2212)
match s.chars().next() {
Some('−') => {
let skip = '−'.len_utf8();
s = &s[skip..];

true
}
_ => return Err(INVALID),
}
}
None => return Err(TOO_SHORT),
};
s = &s[1..];

// hours (00--99)
let hours = match digits(s)? {
Expand Down Expand Up @@ -360,7 +391,7 @@ where
{
match s.as_bytes().first() {
Some(&b'z') | Some(&b'Z') => Ok((&s[1..], 0)),
_ => timezone_offset_internal(s, colon, true),
_ => timezone_offset_internal(s, colon, true, true),
}
}

Expand Down Expand Up @@ -388,7 +419,7 @@ pub(super) fn timezone_offset_2822(s: &str) -> ParseResult<(&str, Option<i32>)>
Ok((s, None)) // recommended by RFC 2822: consume but treat it as -0000
}
} else {
let (s_, offset) = timezone_offset(s, |s| Ok(s))?;
let (s_, offset) = timezone_offset_internal(s, |s| Ok(s), false, false)?;
Ok((s_, Some(offset)))
}
}
Expand Down
Loading

0 comments on commit 3d47112

Please sign in to comment.