-
Notifications
You must be signed in to change notification settings - Fork 452
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added participating_captures_len
#908
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -243,6 +243,7 @@ impl Hir { | |
info.set_match_empty(true); | ||
info.set_literal(false); | ||
info.set_alternation_literal(false); | ||
info.static_capture_count = Some(0); | ||
Hir { kind: HirKind::Empty, info } | ||
} | ||
|
||
|
@@ -268,6 +269,7 @@ impl Hir { | |
info.set_match_empty(false); | ||
info.set_literal(true); | ||
info.set_alternation_literal(true); | ||
info.static_capture_count = Some(0); | ||
Hir { kind: HirKind::Literal(lit), info } | ||
} | ||
|
||
|
@@ -285,6 +287,7 @@ impl Hir { | |
info.set_match_empty(false); | ||
info.set_literal(false); | ||
info.set_alternation_literal(false); | ||
info.static_capture_count = Some(0); | ||
Hir { kind: HirKind::Class(class), info } | ||
} | ||
|
||
|
@@ -318,6 +321,7 @@ impl Hir { | |
if let Anchor::EndLine = anchor { | ||
info.set_line_anchored_end(true); | ||
} | ||
info.static_capture_count = Some(0); | ||
Hir { kind: HirKind::Anchor(anchor), info } | ||
} | ||
|
||
|
@@ -345,6 +349,7 @@ impl Hir { | |
if let WordBoundary::AsciiNegate = word_boundary { | ||
info.set_always_utf8(false); | ||
} | ||
info.static_capture_count = Some(0); | ||
Hir { kind: HirKind::WordBoundary(word_boundary), info } | ||
} | ||
|
||
|
@@ -372,6 +377,28 @@ impl Hir { | |
info.set_match_empty(rep.is_match_empty() || rep.hir.is_match_empty()); | ||
info.set_literal(false); | ||
info.set_alternation_literal(false); | ||
info.static_capture_count = { | ||
use RepetitionRange::{AtLeast, Bounded, Exactly}; | ||
match (&rep.kind, rep.hir.info.static_capture_count) { | ||
// Always zero. | ||
(_, Some(0)) => Some(0), | ||
(RepetitionKind::Range(Exactly(0)), _) => Some(0), | ||
(RepetitionKind::Range(Bounded(_, 0)), _) => Some(0), | ||
|
||
// Impossible to know statically. | ||
(_, None) => None, | ||
(RepetitionKind::ZeroOrOne, _) => None, | ||
(RepetitionKind::ZeroOrMore, _) => None, | ||
(RepetitionKind::Range(AtLeast(0)), _) => None, | ||
(RepetitionKind::Range(Bounded(0, _)), _) => None, | ||
|
||
// Guaranteed to be static. | ||
(RepetitionKind::OneOrMore, Some(n)) => Some(n), | ||
(RepetitionKind::Range(Exactly(_)), Some(n)) => Some(n), | ||
(RepetitionKind::Range(AtLeast(_)), Some(n)) => Some(n), | ||
(RepetitionKind::Range(Bounded(_, _)), Some(n)) => Some(n), | ||
} | ||
}; | ||
Hir { kind: HirKind::Repetition(rep), info } | ||
} | ||
|
||
|
@@ -389,6 +416,12 @@ impl Hir { | |
info.set_match_empty(group.hir.is_match_empty()); | ||
info.set_literal(false); | ||
info.set_alternation_literal(false); | ||
info.static_capture_count = match group.kind { | ||
GroupKind::NonCapturing { .. } => { | ||
group.hir.info.static_capture_count | ||
} | ||
_ => group.hir.info.static_capture_count.map(|n| n + 1), | ||
}; | ||
Hir { kind: HirKind::Group(group), info } | ||
} | ||
|
||
|
@@ -480,6 +513,10 @@ impl Hir { | |
}) | ||
.any(|e| e.is_line_anchored_end()), | ||
); | ||
info.static_capture_count = | ||
exprs.iter().fold(Some(0), |cnt, e| { | ||
Some(cnt? + e.info.static_capture_count?) | ||
}); | ||
Hir { kind: HirKind::Concat(exprs), info } | ||
} | ||
} | ||
|
@@ -542,6 +579,11 @@ impl Hir { | |
let x = info.is_alternation_literal() && e.is_literal(); | ||
info.set_alternation_literal(x); | ||
} | ||
let mut capture_counts = | ||
exprs.iter().map(|e| e.info.static_capture_count); | ||
let first = capture_counts.next().unwrap_or(Some(0)); | ||
info.static_capture_count = capture_counts | ||
.fold(first, |a, b| if a == b { a } else { None }); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like all of this logic can be pushed up into the loop as well. |
||
Hir { kind: HirKind::Alternation(exprs), info } | ||
} | ||
} | ||
|
@@ -692,6 +734,13 @@ impl Hir { | |
pub fn is_alternation_literal(&self) -> bool { | ||
self.info.is_alternation_literal() | ||
} | ||
|
||
/// Returns the number of captures groups that would participate in a | ||
/// successful match of this expression. If this number can not be | ||
/// statically determined from the regex this function returns `None`. | ||
pub fn participating_captures_len(&self) -> Option<usize> { | ||
self.info.static_capture_count.map(|c| c as usize) | ||
} | ||
} | ||
|
||
impl HirKind { | ||
|
@@ -1484,6 +1533,11 @@ struct HirInfo { | |
/// If more attributes need to be added, it is OK to increase the size of | ||
/// this as appropriate. | ||
bools: u16, | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This blank line can be dropped. |
||
/// How many capture groups this HIR expression deterministically fills. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still think the word here should be "static" and not "deterministic." |
||
/// If this number could depend on the input (e.g. an Alternation where the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Replace "input" with "haystack" here please. (I'm trying to be better about always using the word "haystack" when referring to the text being searched.) |
||
/// two branches have a different number of capture groups), this is None. | ||
static_capture_count: Option<u32>, | ||
} | ||
|
||
// A simple macro for defining bitfield accessors/mutators. | ||
|
@@ -1505,7 +1559,7 @@ macro_rules! define_bool { | |
|
||
impl HirInfo { | ||
fn new() -> HirInfo { | ||
HirInfo { bools: 0 } | ||
HirInfo { bools: 0, static_capture_count: None } | ||
} | ||
|
||
define_bool!(0, is_always_utf8, set_always_utf8); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -667,6 +667,13 @@ impl Regex { | |
self.0.capture_names().len() | ||
} | ||
|
||
/// Returns the number of participating captures that this regex will | ||
/// return on a successful match. If this number can not be statically | ||
/// determined from the regex this function returns `None`. | ||
pub fn participating_captures_len(&self) -> Option<usize> { | ||
self.0.participating_captures_len() | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you back out the public API changes to |
||
/// Returns an empty set of capture locations that can be reused in | ||
/// multiple calls to `captures_read` or `captures_read_at`. | ||
pub fn capture_locations(&self) -> CaptureLocations { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -128,6 +128,47 @@ fn capture_index_lifetime() { | |
assert_eq!(3, inner("123")); | ||
} | ||
|
||
#[test] | ||
fn participating_captures_len() { | ||
let tests = [ | ||
("", Some(0)), | ||
("foo|bar", Some(0)), | ||
("(foo)|bar", None), | ||
("foo|(bar)", None), | ||
("(foo|bar)", Some(1)), | ||
("(a|b|c|d|e|f)", Some(1)), | ||
("(a)|(b)|(c)|(d)|(e)|(f)", Some(1)), | ||
("(a)(b)|(c)(d)|(e)(f)", Some(2)), | ||
("(a)(b)(c)|(d)(e)(f)", Some(3)), | ||
("(a)(b)(c)(d)(e)(f)", Some(6)), | ||
("(a)(b)(extra)|(a)(b)()", Some(3)), | ||
("(a)(b)((?:extra)?)", Some(3)), | ||
("(a)(b)(extra)?", None), | ||
("(foo)|(bar)", Some(1)), | ||
("(foo)(bar)", Some(2)), | ||
("(foo)+(bar)", Some(2)), | ||
("(foo)*(bar)", None), | ||
("(foo)?{0}", Some(0)), | ||
("(foo)?{1}", None), | ||
("(foo){1}", Some(1)), | ||
("(foo){1,}", Some(1)), | ||
("(foo){1,}?", Some(1)), | ||
("(foo){0,}", None), | ||
("(foo)(?:bar)", Some(1)), | ||
("(foo(?:bar)+)(?:baz(boo))", Some(2)), | ||
("(?P<bar>foo)(?:bar)(bal|loon)", Some(2)), | ||
(r#"<(a)[^>]+href="([^"]+)"|<(img)[^>]+src="([^"]+)""#, Some(2)), | ||
]; | ||
for (test_regex, expected) in tests { | ||
let re = regex!(test_regex); | ||
assert_eq!( | ||
re.participating_captures_len(), | ||
expected, | ||
"for regex {test_regex}" | ||
); | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you move these tests to |
||
|
||
#[test] | ||
fn capture_misc() { | ||
let re = regex!(r"(.)(?P<a>a)?(.)(?P<b>.)"); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be in the loop above. Otherwise this iterates over the entire concatenation a second time for no good reason as far as I can tell.