From bc06024c7f3759e61ad75915a0c1ea49af0b4fe8 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Thu, 29 Dec 2016 21:54:34 -0500 Subject: [PATCH] Make ASCII classes consistent with other engines. For example, the regex `[:upper:]` used to correspond to the `upper` ASCII character class, but it now corresponds to the character class containing the characters `:upper:`. Forms like `[[:upper:][:blank:]]` are still accepted. Fixes #175 --- regex-syntax/src/parser.rs | 13 ++++--------- src/lib.rs | 4 ++-- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/regex-syntax/src/parser.rs b/regex-syntax/src/parser.rs index a9370c24c9..e883013a9b 100644 --- a/regex-syntax/src/parser.rs +++ b/regex-syntax/src/parser.rs @@ -112,10 +112,7 @@ impl Parser { '*' => try!(self.parse_simple_repeat(Repeater::ZeroOrMore)), '+' => try!(self.parse_simple_repeat(Repeater::OneOrMore)), '{' => try!(self.parse_counted_repeat()), - '[' => match self.maybe_parse_ascii() { - None => try!(self.parse_class()), - Some(cls) => Build::Expr(Expr::Class(cls)), - }, + '[' => try!(self.parse_class()), '^' => { if self.flags.multi { self.parse_one(Expr::StartLine) @@ -2224,10 +2221,11 @@ mod tests { #[test] fn ascii_classes() { - assert_eq!(p("[:upper:]"), Expr::Class(class(UPPER))); + assert_eq!(p("[:blank:]"), Expr::Class(class(&[ + (':', ':'), ('a', 'b'), ('k', 'l'), ('n', 'n'), + ]))); assert_eq!(p("[[:upper:]]"), Expr::Class(class(UPPER))); - assert_eq!(pb("(?-u)[:upper:]"), Expr::Class(class(UPPER))); assert_eq!(pb("(?-u)[[:upper:]]"), Expr::ClassBytes(class(UPPER).to_byte_class())); } @@ -2270,12 +2268,9 @@ mod tests { #[test] fn ascii_classes_case_fold() { - assert_eq!(p("(?i)[:upper:]"), Expr::Class(class(UPPER).case_fold())); assert_eq!(p("(?i)[[:upper:]]"), Expr::Class(class(UPPER).case_fold())); - assert_eq!(pb("(?i-u)[:upper:]"), - Expr::Class(class(UPPER).case_fold())); assert_eq!(pb("(?i-u)[[:upper:]]"), Expr::ClassBytes(class(UPPER).to_byte_class().case_fold())); } diff --git a/src/lib.rs b/src/lib.rs index efdb008369..4f14115cd9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -276,8 +276,8 @@ //! //! //! Any named character class may appear inside a bracketed `[...]` character -//! class. For example, `[\p{Greek}\pN]` matches any Greek or numeral -//! character. +//! class. For example, `[\p{Greek}[:digit:]]` matches any Greek or ASCII +//! digit. //! //! ## Composites //!