Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow multiple capture groups for debounce regex action #14687

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 26 additions & 5 deletions components/debounce/browser/debounce_rule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -187,17 +187,36 @@ bool DebounceRule::ValidateAndParsePatternRegex(
<< " which is an invalid regex pattern";
return false;
}
if (pattern_regex.NumberOfCapturingGroups() != 1) {
if (pattern_regex.NumberOfCapturingGroups() < 1) {
VLOG(1) << "Debounce rule has param: " << pattern
<< " which captures != 1 groups";
<< " which captures < 1 groups";
return false;
}

if (!RE2::PartialMatch(path, pattern_regex, parsed_value)) {
// Get matching capture groups by applying regex to the path
size_t number_of_capturing_groups =
pattern_regex.NumberOfCapturingGroups() + 1;
std::vector<re2::StringPiece> match_results(number_of_capturing_groups);

if (!pattern_regex.Match(path, 0, path.size(), RE2::UNANCHORED,
match_results.data(), match_results.size())) {
VLOG(1) << "Debounce rule with param: " << param_
<< " was unable to capture string";
return false;
}

// This will always be at least 2: the first one is the full match
DCHECK_GT(match_results.size(), 1u);

// Build parsed_value string by appending matches, ignoring the first match
// which will be the whole match
std::for_each(std::begin(match_results) + 1, std::end(match_results),
ShivanKaul marked this conversation as resolved.
Show resolved Hide resolved
[parsed_value](re2::StringPiece matched_string) {
if (!matched_string.empty()) {
matched_string.AppendToString(parsed_value);
}
});

return true;
}

Expand All @@ -212,12 +231,14 @@ bool DebounceRule::Apply(const GURL& original_url,
action_ != kDebounceRegexPath)
return false;
// If URL matches an explicitly excluded pattern, this rule does not apply.
if (exclude_pattern_set_.MatchesURL(original_url))
if (exclude_pattern_set_.MatchesURL(original_url)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this diff is not needed, existing formatting is acceptable

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know but I just wanted to match everything up. It helps me a tiny bit when I want to add local logging to have these, and given that they're both equivalent I thought might as well. Personally I think always specifying {} is easier to read, but no strong feelings

return false;
}
// If URL does not match an explicitly included pattern, this rule does not
// apply.
if (!include_pattern_set_.MatchesURL(original_url))
if (!include_pattern_set_.MatchesURL(original_url)) {
return false;
}

if (!DebounceRule::CheckPrefForRule(prefs)) {
return false;
Expand Down
68 changes: 68 additions & 0 deletions components/debounce/browser/test/debounce_rule_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,74 @@ TEST(DebounceRuleUnitTest, ParamCapturesNonURLWithPrependScheme) {
}
}

TEST(DebounceRuleUnitTest, TwoCaptureGroups) {
const std::string contents = R"json(

[{
"include": [
"*://test.com/*"
],
"exclude": [
],
"action": "regex-path",
"prepend_scheme": "https",
"param": "^/([^/]+)/xyz(/.*)$"
}]

)json";
std::vector<std::unique_ptr<DebounceRule>> rules = StringToRules(contents);

for (const std::unique_ptr<DebounceRule>& rule : rules) {
CheckApplyResult(rule.get(), GURL("https://test.com/brave.com/xyz/abc.jpg"),
"https://brave.com/abc.jpg", false);
}
}

TEST(DebounceRuleUnitTest, CurlyBracesInRegexGetParseError) {
const std::string contents = R"json(

[{
"include": [
"*://test.com/*"
],
"exclude": [
],
"action": "regex-path",
"prepend_scheme": "https",
"param": "^/turbo/([^/]+)/xyz(/\d{4})/xyzzy(/.*)$"
}]

)json";

auto parsed = DebounceRule::ParseRules(contents);
EXPECT_FALSE(parsed.has_value());
}

TEST(DebounceRuleUnitTest, ThreeCaptureGroups) {
const std::string contents = R"json(

[{
"include": [
"*://test.com/*"
],
"exclude": [
],
"action": "regex-path",
"prepend_scheme": "https",
"param": "^/turbo/([^/]+)/xyz(/[0-9]+)/xyzzy(/.*)$"
}]

)json";
std::vector<std::unique_ptr<DebounceRule>> rules = StringToRules(contents);

for (const std::unique_ptr<DebounceRule>& rule : rules) {
CheckApplyResult(
rule.get(),
GURL("https://test.com/turbo/brave.com/xyz/2022/xyzzy/abc.jpg"),
"https://brave.com/2022/abc.jpg", false);
}
}

TEST(DebounceRuleUnitTest, ParamCapturesURLWithPrependScheme) {
const std::string contents = R"json(

Expand Down