Skip to content

Commit

Permalink
#332 Regexp rules shortcut
Browse files Browse the repository at this point in the history
  • Loading branch information
Mizzick committed Nov 18, 2016
1 parent a0656b1 commit 07735b8
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 4 deletions.
59 changes: 55 additions & 4 deletions Extension/lib/filter/rules/url-filter-rule.js
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ var UrlFilterRule = exports.UrlFilterRule = function (rule, filterId) {
if (!regexp) {
throw 'Illegal regexp rule';
}

// Extract shortcut from regexp rule
this.shortcut = extractRegexpShortcut(urlRuleText);
} else {
// Searching for shortcut
this.shortcut = findShortcut(urlRuleText);
Expand Down Expand Up @@ -403,10 +406,15 @@ function parseRuleDomain(ruleText, parseOptions) {
}
}

// Searches for the shortcut of this url mask.
// Shortcut is the longest part of the mask without special characters:
// *,^,|. If not found anything with the length greater or equal to 8 characters -
// shortcut is not used.
/**
* Searches for the shortcut of this url mask.
* Shortcut is the longest part of the mask without special characters:
* *,^,|. If not found anything with the length greater or equal to 8 characters -
* shortcut is not used.
*
* @param urlmask
* @returns {string}
*/
function findShortcut(urlmask) {
var longest = "";
var parts = urlmask.split(/[*^|]/);
Expand All @@ -419,6 +427,49 @@ function findShortcut(urlmask) {
return longest.toLowerCase();
}

/**
* Extracts a shortcut from a regexp rule.
*
* @param ruleText
* @returns {*}
*/
function extractRegexpShortcut(ruleText) {

// Get the regexp text
var reText = ruleText.match(/\/(.*)\/(\$.*)?/)[1];

var specialCharacter = "...";

if (reText.indexOf('(?') >= 0 || reText.indexOf('(!?') >= 0) {
// Do not mess with complex expressions which use lookahead
return null;
}

// (Dirty) prepend specialCharacter for the following replace calls to work properly
reText = specialCharacter + reText;

// Strip all types of brackets
reText = reText.replace(/[^\\]\(.*[^\\]\)/, specialCharacter);
reText = reText.replace(/[^\\]\[.*[^\\]\]/, specialCharacter);
reText = reText.replace(/[^\\]\{.*[^\\]\}/, specialCharacter);

// Strip some special characters
reText = reText.replace(/[^\\]\\[a-zA-Z]/, specialCharacter);

// Split by special characters
var parts = reText.split(/[\\^$*+?.()|[\]{}]/);
var token = "";
var iParts = parts.length;
while (iParts--) {
var part = parts[iParts];
if (part.length > token.length) {
token = part;
}
}

return token;
}

/**
* Parse rule text
* @param ruleText
Expand Down
12 changes: 12 additions & 0 deletions Extension/tests/url-filter/test-url-filter.js
Original file line number Diff line number Diff line change
Expand Up @@ -253,4 +253,16 @@ QUnit.test("Complex regexp rule", function(assert) {
assert.notOk(rule.isFiltered("http://connect.facebook.net/blahblah.js", true, RequestTypes.SCRIPT));
assert.notOk(rule.isFiltered("https://ajax.cloudflare.com/blahblah.js", true, RequestTypes.SCRIPT));
assert.notOk(rule.isFiltered("https://www.google-analytics.com/blahblah.js", true, RequestTypes.SCRIPT));
});

QUnit.test("Regexp rules shortcuts", function(assert) {
assert.equal(new UrlFilterRule('/quang%20cao/').shortcut, 'quang%20cao');
assert.equal(new UrlFilterRule('/YanAds/').shortcut, 'YanAds');
assert.equal(new UrlFilterRule('/^http://m\.autohome\.com\.cn\/[a-z0-9]{32}\//$domain=m.autohome.com.cn').shortcut, 'autohome');
assert.equal(new UrlFilterRule('/cdsbData_gal/bannerFile/$image,domain=mybogo.net|zipbogo.net ').shortcut, 'cdsbData_gal/bannerFile');
assert.equal(new UrlFilterRule('/http:\/\/rustorka.com\/[a-z]+\.js/$domain=rustorka.com').shortcut, 'http://rustorka');
assert.equal(new UrlFilterRule('/^http://www\.iqiyi\.com\/common\/flashplayer\/[0-9]{8}/[0-9a-z]{32}.swf/$domain=iqiyi.com').shortcut, 'com/common/flashplayer');
assert.equal(new UrlFilterRule('/ulightbox/$domain=hdkinomax.com|tvfru.net').shortcut, 'ulightbox');
assert.equal(new UrlFilterRule('/\.sharesix\.com/.*[a-zA-Z0-9]{4}/$script').shortcut, 'sharesix');
assert.equal(new UrlFilterRule('/serial_adv_files/$image,domain=xn--80aacbuczbw9a6a.xn--p1ai|куражбамбей.рф').shortcut, 'serial_adv_files');
});

0 comments on commit 07735b8

Please sign in to comment.