Skip to content

Commit

Permalink
Do not store impossible to match filters in HNTrie
Browse files Browse the repository at this point in the history
Consider the two following filters:

    example.com
    www.example.com

This commit make it so that if the first filter is
already present in a given HNTrie, the second filter
will not be stored, since HNTrie will _always_
return the first filter as a match whenever the
hostname to match is example.com or any subdomain
of example.com.

The detection of such pointless filters is
virtually free when adding a hostname to an HNTrie
instance (given how data is stored in the trie), so
in practice no overhead is incurred to detect such
pointless filters.

The ability to ignore impossible to match filters
in HNTrie instances will _especially_ benefit those
using large hosts files.

Examples of how this helps using real configurations:

- Default lists:
  444 filters out of 100,382 were ignored as a result
  of this commit.

- Default lists + "Energized Ultimate Protection":
  283,669 filters out of 903,235 were ignored as a
  result of this commit.

Side note: There was no measurable difference between
the two configurations above in the performance of
the matching algorithm as reported by the built-in
benchmark tool.
  • Loading branch information
gorhill committed Apr 29, 2019
1 parent c4f9ae7 commit adabb56
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 38 deletions.
2 changes: 1 addition & 1 deletion src/js/background.js
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ const µBlock = (function() { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 13, // Increase when compiled format changes
selfieMagic: 13 // Increase when selfie format changes
selfieMagic: 14 // Increase when selfie format changes
},

restoreBackupSettings: {
Expand Down
27 changes: 17 additions & 10 deletions src/js/hntrie.js
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ const HNTrieContainer = class {
matchesJS(iroot) {
const char0 = this.buf32[HNTRIE_CHAR0_SLOT];
let ineedle = this.buf[255];
let icell = iroot;
let icell = this.buf32[iroot+0];
if ( icell === 0 ) { return -1; }
for (;;) {
if ( ineedle === 0 ) { return -1; }
ineedle -= 1;
Expand Down Expand Up @@ -238,19 +239,19 @@ const HNTrieContainer = class {
addJS(iroot) {
let lhnchar = this.buf[255];
if ( lhnchar === 0 ) { return 0; }
let icell = iroot;
// special case: first node in trie
if ( this.buf32[icell+2] === 0 ) {
this.buf32[icell+2] = this.addSegment(lhnchar);
return 1;
}
// grow buffer if needed
if (
(this.buf32[HNTRIE_CHAR0_SLOT] - this.buf32[HNTRIE_TRIE1_SLOT]) < 24 ||
(this.buf.length - this.buf32[HNTRIE_CHAR1_SLOT]) < 256
) {
this.growBuf(24, 256);
}
let icell = this.buf32[iroot+0];
// special case: first node in trie
if ( icell === 0 ) {
this.buf32[iroot+0] = this.addCell(0, 0, this.addSegment(lhnchar));
return 1;
}
//
const char0 = this.buf32[HNTRIE_CHAR0_SLOT];
let inext;
Expand All @@ -259,6 +260,9 @@ const HNTrieContainer = class {
const vseg = this.buf32[icell+2];
// skip boundary cells
if ( vseg === 0 ) {
// remainder is at label boundary? if yes, no need to add
// the rest since the shortest match is always reported
if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
icell = this.buf32[icell+1];
continue;
}
Expand Down Expand Up @@ -303,6 +307,9 @@ const HNTrieContainer = class {
icell = inext;
continue;
}
// remainder is at label boundary? if yes, no need to add
// the rest since the shortest match is always reported
if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
// boundary cell + needle remainder
inext = this.addCell(0, 0, 0);
this.buf32[icell+1] = inext;
Expand Down Expand Up @@ -550,7 +557,7 @@ HNTrieContainer.prototype.HNTrieRef = class {
}

add(hn) {
if ( this.container.setNeedle(hn).add(this.iroot) === 1 ) {
if ( this.container.setNeedle(hn).add(this.iroot) > 0 ) {
this.last = -1;
this.needle = '';
this.size += 1;
Expand All @@ -560,7 +567,7 @@ HNTrieContainer.prototype.HNTrieRef = class {
}

addJS(hn) {
if ( this.container.setNeedle(hn).addJS(this.iroot) === 1 ) {
if ( this.container.setNeedle(hn).addJS(this.iroot) > 0 ) {
this.last = -1;
this.needle = '';
this.size += 1;
Expand All @@ -570,7 +577,7 @@ HNTrieContainer.prototype.HNTrieRef = class {
}

addWASM(hn) {
if ( this.container.setNeedle(hn).addWASM(this.iroot) === 1 ) {
if ( this.container.setNeedle(hn).addWASM(this.iroot) > 0 ) {
this.last = -1;
this.needle = '';
this.size += 1;
Expand Down
Binary file modified src/js/wasm/hntrie.wasm
Binary file not shown.
94 changes: 67 additions & 27 deletions src/js/wasm/hntrie.wat
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@
;; offset.
;;
(func (export "matches")
(param $icell i32) ;; offset to root cell of the trie
(param $iroot i32) ;; offset to root cell of the trie
(result i32) ;; result = match index, -1 = miss
(local $icell i32) ;; offset to the current cell
(local $char0 i32) ;; offset to first character data
(local $ineedle i32) ;; current needle offset
(local $c i32)
Expand All @@ -64,15 +65,24 @@
i32.const 264 ;; start of char section is stored at addr 264
i32.load
set_local $char0
;; $icell is an index into an array of 32-bit values
get_local $icell
i32.const 2
i32.shl
set_local $icell
;; let ineedle = this.buf[255];
i32.const 255 ;; addr of needle is stored at addr 255
i32.load8_u
set_local $ineedle
;; let icell = this.buf32[iroot+0];
get_local $iroot
i32.const 2
i32.shl
i32.load
i32.const 2
i32.shl
tee_local $icell
;; if ( icell === 0 ) { return -1; }
i32.eqz
if
i32.const -1
return
end
;; for (;;) {
block $noSegment loop $nextSegment
;; if ( ineedle === 0 ) { return -1; }
Expand Down Expand Up @@ -244,8 +254,9 @@
;; Add a new hostname to a trie which root cell is passed as argument.
;;
(func (export "add")
(param $icell i32) ;; index of root cell of the trie
(param $iroot i32) ;; index of root cell of the trie
(result i32) ;; result: 0 not added, 1 = added
(local $icell i32) ;; index of current cell in the trie
(local $lhnchar i32) ;; number of characters left to process in hostname
(local $char0 i32) ;; offset to start of character data section
(local $vseg i32) ;; integer value describing a segment
Expand All @@ -264,24 +275,6 @@
i32.const 0
return
end
;; let icell = iroot;
get_local $icell
i32.const 2
i32.shl
tee_local $icell
;; if ( this.buf32[icell+2] === 0 ) {
i32.load offset=8
i32.eqz
if
;;this.buf32[icell+2] = this.addSegment(lhnchar);
;; return 1;
get_local $icell
get_local $lhnchar
call $addSegment
i32.store offset=8
i32.const 1
return
end
;; if (
;; (this.buf32[HNBIGTRIE_CHAR0_SLOT] - this.buf32[HNBIGTRIE_TRIE1_SLOT]) < 24 ||
;; (this.buf.length - this.buf32[HNBIGTRIE_CHAR1_SLOT]) < 256
Expand Down Expand Up @@ -310,6 +303,30 @@
call $growBuf
end
end
;; let icell = this.buf32[iroot+0];
get_local $iroot
i32.const 2
i32.shl
tee_local $iroot
i32.load
i32.const 2
i32.shl
tee_local $icell
;; if ( this.buf32[icell+2] === 0 ) {
i32.eqz
if
;; this.buf32[iroot+0] = this.addCell(0, 0, this.addSegment(lhnchar));
;; return 1;
get_local $iroot
i32.const 0
i32.const 0
get_local $lhnchar
call $addSegment
call $addCell
i32.store
i32.const 1
return
end
;; const char0 = this.buf32[HNBIGTRIE_CHAR0_SLOT];
i32.const 264
i32.load
Expand All @@ -323,6 +340,19 @@
;; if ( vseg === 0 ) {
i32.eqz
if
;; if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
get_local $lhnchar
i32.const -1
i32.add
i32.load8_u
i32.const 0x2E
i32.eq
if
i32.const -1
return
end
;; icell = this.buf32[icell+1];
;; continue;
get_local $icell
i32.load offset=4
i32.const 2
Expand Down Expand Up @@ -463,13 +493,23 @@
else
;; if ( inext !== 0 ) {
get_local $inext
i32.eqz
if else
if
;; icell = inext;
get_local $inext
set_local $icell
br $nextSegment
end
;; if ( this.buf[lhnchar-1] === 0x2E /* '.' */ ) { return -1; }
get_local $lhnchar
i32.const -1
i32.add
i32.load8_u
i32.const 0x2E
i32.eq
if
i32.const -1
return
end
;; inext = this.addCell(0, 0, 0);
;; this.buf32[icell+1] = inext;
get_local $icell
Expand Down

0 comments on commit adabb56

Please sign in to comment.