From 0471c74929ae7291749e88cf9197e69505a1ff65 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Thu, 27 Oct 2016 17:36:12 -0400 Subject: [PATCH] Fixes a bug introduced by the fix in 630049. This is another variant of incorrectly producing an unambiguous set of literals. Fixes #291. --- .gitignore | 1 + regex-syntax/Cargo.toml | 3 +++ regex-syntax/src/literals.rs | 12 +++++++++--- tests/regression.rs | 6 +++++- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index dc01bfda71..7c25d48667 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ Cargo.lock bench-log .*.swp wiki +tags diff --git a/regex-syntax/Cargo.toml b/regex-syntax/Cargo.toml index ee00ec07cf..1f935cff0d 100644 --- a/regex-syntax/Cargo.toml +++ b/regex-syntax/Cargo.toml @@ -11,3 +11,6 @@ description = "A regular expression parser." [dev-dependencies] quickcheck = "0.2" rand = "0.3" + +[profile.test] +codegen-units = 8 diff --git a/regex-syntax/src/literals.rs b/regex-syntax/src/literals.rs index 49de379be4..0b89ccb250 100644 --- a/regex-syntax/src/literals.rs +++ b/regex-syntax/src/literals.rs @@ -250,9 +250,12 @@ impl Literals { } } else { if let Some(i) = position(&lit2, &candidate) { - candidate.truncate(i); - candidate.cut(); lit2.cut(); + let mut new_candidate = candidate.clone(); + new_candidate.truncate(i); + new_candidate.cut(); + old.push(new_candidate); + candidate.clear(); } } // Oops, the candidate is already represented in the set. @@ -1385,7 +1388,7 @@ mod tests { vec![M("Mo'"), M("Mu'"), M("Mo"), M("Mu")], vec![C("Mo"), C("Mu")]); test_unamb!(unambiguous11, - vec![M("zazb"), M("azb")], vec![C("azb"), C("z")]); + vec![M("zazb"), M("azb")], vec![C("a"), C("z")]); test_unamb!(unambiguous12, vec![M("foo"), C("foo")], vec![C("foo")]); test_unamb!(unambiguous13, vec![M("ABCX"), M("CDAX"), M("BCX")], @@ -1393,6 +1396,9 @@ mod tests { test_unamb!(unambiguous14, vec![M("IMGX"), M("MVIX"), M("MGX"), M("DSX")], vec![M("DSX"), C("I"), C("MGX"), C("MV")]); + test_unamb!(unambiguous15, + vec![M("IMG_"), M("MG_"), M("CIMG")], + vec![C("C"), C("I"), C("MG_")]); // ************************************************************************ diff --git a/tests/regression.rs b/tests/regression.rs index fb85507561..d2732467cf 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -70,4 +70,8 @@ ismatch!(partial_anchor_alternate_begin, u!(r"^a|z"), "yyyyya", false); ismatch!(partial_anchor_alternate_end, u!(r"a$|z"), "ayyyyy", false); // See: https://github.com/rust-lang-nursery/regex/issues/289 -mat!(lits_unambiguous, u!(r"(ABC|CDA|BC)X"), "CDAX", Some((0, 4))); +mat!(lits_unambiguous1, u!(r"(ABC|CDA|BC)X"), "CDAX", Some((0, 4))); + +// See: https://github.com/rust-lang-nursery/regex/issues/291 +mat!(lits_unambiguous2, u!(r"((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P[0-9]+)$"), + "CIMG2341", Some((0, 8)), Some((0, 4)), None, Some((0, 4)), Some((4, 8)));