From bbf4e159dc745fed177f6163537490cf384dc317 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Mon, 20 Mar 2017 12:31:45 +1100 Subject: [PATCH] Fix panics parsing regex with whitespace in extended mode The added tests fail without the fix like this: ---- parser::tests::ignore_space_escape_hex2 stdout ---- thread 'parser::tests::ignore_space_escape_hex2' panicked at 'called `Result::unwrap()` on an `Err` value: Error { pos: 10, surround: "x 5 3", kind: InvalidBase16(" 5 3") }', src/libcore/result.rs:860 ---- parser::tests::ignore_space_escape_hex stdout ---- thread 'parser::tests::ignore_space_escape_hex' panicked at 'called `Result::unwrap()` on an `Err` value: Error { pos: 12, surround: "{ 5 3 }", kind: InvalidBase16(" 5 3") }', src/libcore/result.rs:860 ---- parser::tests::ignore_space_ascii_classes stdout ---- thread 'parser::tests::ignore_space_ascii_classes' panicked at 'called `Result::unwrap()` on an `Err` value: Error { pos: 5, surround: "(?x)[ [ : ", kind: UnsupportedClassChar('[') }', src/libcore/result.rs:860 note: Run with `RUST_BACKTRACE=1` for a backtrace. ---- parser::tests::ignore_space_escape_octal stdout ---- thread 'parser::tests::ignore_space_escape_octal' panicked at 'valid octal number', src/libcore/option.rs:785 ---- parser::tests::ignore_space_escape_unicode_name stdout ---- thread 'parser::tests::ignore_space_escape_unicode_name' panicked at 'called `Result::unwrap()` on an `Err` value: Error { pos: 15, surround: "Y i }", kind: UnrecognizedUnicodeClass(" Y i") }', src/libcore/result.rs:860 ---- parser::tests::ignore_space_repeat_counted stdout ---- thread 'parser::tests::ignore_space_repeat_counted' panicked at 'called `Result::unwrap()` on an `Err` value: Error { pos: 15, surround: ", 1 0 }", kind: InvalidBase10("1 0") }', src/libcore/result.rs:860 The reason for the panics is that `bump_get` would ignore space when walking the characters, but then keep the spaces in the returned String. Found using cargo-fuzz. --- regex-syntax/src/parser.rs | 59 ++++++++++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/regex-syntax/src/parser.rs b/regex-syntax/src/parser.rs index c2aca269bc..e71712f112 100644 --- a/regex-syntax/src/parser.rs +++ b/regex-syntax/src/parser.rs @@ -423,7 +423,7 @@ impl Parser { // // Start: `1` // End: `,` (where `until == ','`) - fn parse_decimal(&mut self, until: B) -> Result { + fn parse_decimal bool>(&mut self, until: F) -> Result { match self.bump_get(until) { // e.g., a{} None => Err(self.err(ErrorKind::MissingBase10)), @@ -809,14 +809,23 @@ impl Parser { fn eof(&self) -> bool { self.chars().next().is_none() } - fn bump_get(&mut self, s: B) -> Option { - let n = s.match_end(self); + fn bump_get bool>(&mut self, mut f: F) -> Option { + let mut s = String::new(); + let n = { + let bumpable = |c| { + if f(c) { + s.push(c); + true + } else { + false + } + }; + bumpable.match_end(self) + }; if n == 0 { None } else { let end = checkadd(self.chari, n); - let s = self.chars[self.chari..end] - .iter().cloned().collect::(); self.chari = end; Some(s) } @@ -2374,6 +2383,46 @@ mod tests { D"), Expr::Class(class(PERLD).negate())); } + #[test] + fn ignore_space_escape_unicode_name() { + assert_eq!(p(r"(?x)\ p { Y i }"), Expr::Class(class(YI))); + } + + #[test] + fn ignore_space_escape_octal() { + assert_eq!(p(r"(?x)\ 1 2 3"), lit('S')); + assert_eq!(p(r"(?x)\ + 1 2 3"), lit('S')); + } + + #[test] + fn ignore_space_escape_hex() { + assert_eq!(p(r"(?x)\x { 5 3 }"), lit('S')); + assert_eq!(p(r"(?x)\x + { 5 3 }"), lit('S')); + } + + #[test] + fn ignore_space_escape_hex2() { + assert_eq!(p(r"(?x)\x 5 3"), lit('S')); + assert_eq!(p(r"(?x)\x + 5 3"), lit('S')); + } + + #[test] + fn ignore_space_repeat_counted() { + assert_eq!(p("(?x)a { 5 , 1 0 }"), Expr::Repeat { + e: b(lit('a')), + r: Repeater::Range { min: 5, max: Some(10) }, + greedy: true, + }); + } + + #[test] + fn ignore_space_ascii_classes() { + assert_eq!(p("(?x)[ [ : u p p e r : ] ]"), Expr::Class(class(UPPER))); + } + #[test] fn ignore_space_comments() { assert_eq!(p(r"(?x)(?P