Skip to content

Commit 1cc7fa0

Browse files
committed
Pull request 799: Buffer-based tsurlfilter Engine
The idea is to replace String-based StringRuleList with a Uint8Array-based version. Benefits of this appoarch: * Immediately saves ~10-20MB of RAM due to the fact that the strings are now encoded in UTF-8 (javascript String is UTF-16 so it takes 2 bytes for every character, UTF-8 will use 1 byte for 99.9% of characters in filter lists). * Fully solves the "leak" issues as there are no links to original filter list strings anymore. Testing shows that the buffer-based approach does not affect performance and actually improves memory usage of the extension. Squashed commit of the following: commit b50b15c Merge: 210ee8e 3f2e0d7 Author: Andrey Meshkov <[email protected]> Date: Tue Feb 13 14:36:32 2024 +0300 Merge branch 'master' into buffer-rule-list commit 210ee8e Author: Andrey Meshkov <[email protected]> Date: Tue Feb 13 13:24:02 2024 +0300 fix build, resolve review comments commit ced9492 Author: Andrey Meshkov <[email protected]> Date: Tue Feb 13 13:13:30 2024 +0300 fix review comment commit a3ebdb2 Author: Andrey Meshkov <[email protected]> Date: Tue Feb 13 12:54:26 2024 +0300 Fix comment, fix linter run commit b27721b Author: Andrey Meshkov <[email protected]> Date: Tue Feb 13 12:42:48 2024 +0300 Resolve review comments commit 85f095c Author: Andrey Meshkov <[email protected]> Date: Mon Feb 12 15:12:23 2024 +0300 Added comments commit 9226ed8 Author: Andrey Meshkov <[email protected]> Date: Mon Feb 12 15:05:36 2024 +0300 fix imports commit f34428f Merge: a2927eb aeb4336 Author: Andrey Meshkov <[email protected]> Date: Mon Feb 12 15:04:05 2024 +0300 merge with master commit a2927eb Author: Andrey Meshkov <[email protected]> Date: Mon Feb 12 14:52:44 2024 +0300 Added info about VS Code workspace commit a990bed Author: Andrey Meshkov <[email protected]> Date: Mon Feb 12 14:40:24 2024 +0300 initial implementation of BufferRuleList
1 parent 3f2e0d7 commit 1cc7fa0

29 files changed

+975
-407
lines changed

.gitignore

+8-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
1+
.DS_Store
12
node_modules
23
.nyc_output
3-
.DS_Store
44
*.log
5-
.vscode
6-
.idea
75
.awcache
86
.rpt2_cache
97
.nx
8+
9+
# IDEA files
10+
.idea
11+
12+
# VS code settings and workspace project
13+
.vscode
14+
tsurlfilter.code-workspace

README.md

+46
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,49 @@ Test pages:
9797

9898
[testcasessimplerules]: https://testcases.agrd.dev/Filters/simple-rules/test-simple-rules.html
9999
[testcasesscriptrules]: https://testcases.agrd.dev/Filters/script-rules/test-script-rules.html
100+
101+
### Visual Studio Code Workspace
102+
103+
If you're using Visual Studio Code for development, it may be easier to work
104+
with the monorepo if you use the workspace functionality. To do this, create a
105+
`tsurlfilter.code-workspace` file in the monorepo root directory.
106+
107+
`jest.runMode` and `jest.enable` would be useful to those that use
108+
[Jest][jestplugin] plugin.
109+
110+
```json
111+
{
112+
"folders": [
113+
{
114+
"path": "packages/tsurlfilter",
115+
},
116+
{
117+
"path": "packages/tswebextension",
118+
},
119+
{
120+
"path": "packages/agtree",
121+
},
122+
{
123+
"path": "packages/css-tokenizer",
124+
},
125+
{
126+
"path": "packages/adguard-api",
127+
},
128+
{
129+
"path": "packages/examples/adguard-api",
130+
},
131+
{
132+
"path": "packages/examples/tswebextension-mv2",
133+
},
134+
{
135+
"path": "packages/examples/tswebextension-mv3",
136+
}
137+
],
138+
"settings": {
139+
"jest.runMode": "on-demand",
140+
"jest.enable": true,
141+
}
142+
}
143+
```
144+
145+
[jestplugin]: https://marketplace.visualstudio.com/items?itemName=Orta.vscode-jest
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import { BufferLineReader } from './reader/buffer-line-reader';
2+
import { type IRuleList, LIST_ID_MAX_VALUE } from './rule-list';
3+
import { RuleScanner } from './scanner/rule-scanner';
4+
import { ScannerType } from './scanner/scanner-type';
5+
6+
/**
7+
* BufferRuleList represents a string-based rule list. It keeps the original
8+
* rule list as a byte array with UTF-8 encoded characters. This approach
9+
* allows saving on the memory used by tsurlfilter compared to StringRuleList.
10+
*/
11+
export class BufferRuleList implements IRuleList {
12+
/**
13+
* Rule list ID.
14+
*/
15+
private readonly id: number;
16+
17+
/**
18+
* String with filtering rules (one per line) encoded as a
19+
* UTF-8 array.
20+
*/
21+
private readonly rulesBuffer: Uint8Array;
22+
23+
/**
24+
* Whether to ignore cosmetic rules or not.
25+
*/
26+
private readonly ignoreCosmetic: boolean;
27+
28+
/**
29+
* Whether to ignore javascript cosmetic rules or not.
30+
*/
31+
private readonly ignoreJS: boolean;
32+
33+
/**
34+
* Whether to ignore unsafe rules or not.
35+
*/
36+
private readonly ignoreUnsafe: boolean;
37+
38+
/**
39+
* Text decoder that is used to read strings from the internal buffer of
40+
* UTF-8 encoded characters.
41+
*/
42+
private static readonly decoder = new TextDecoder('utf-8');
43+
44+
/**
45+
* Constructor of BufferRuleList.
46+
*
47+
* @param listId - List identifier.
48+
* @param rulesText - String with filtering rules (one per line).
49+
* @param ignoreCosmetic - (Optional) True to ignore cosmetic rules.
50+
* @param ignoreJS - (Optional) True to ignore JS rules.
51+
* @param ignoreUnsafe - (Optional) True to ignore unsafe rules.
52+
*/
53+
constructor(
54+
listId: number,
55+
rulesText: string,
56+
ignoreCosmetic?: boolean,
57+
ignoreJS?: boolean,
58+
ignoreUnsafe?: boolean,
59+
) {
60+
if (listId >= LIST_ID_MAX_VALUE) {
61+
throw new Error(`Invalid list identifier, it must be less than ${LIST_ID_MAX_VALUE}`);
62+
}
63+
64+
this.id = listId;
65+
const encoder = new TextEncoder();
66+
this.rulesBuffer = encoder.encode(rulesText);
67+
this.ignoreCosmetic = !!ignoreCosmetic;
68+
this.ignoreJS = !!ignoreJS;
69+
this.ignoreUnsafe = !!ignoreUnsafe;
70+
}
71+
72+
/**
73+
* Close does nothing as here's nothing to close in the BufferRuleList.
74+
*/
75+
// eslint-disable-next-line class-methods-use-this
76+
public close(): void {
77+
// Empty
78+
}
79+
80+
/**
81+
* @return - The rule list identifier
82+
*/
83+
getId(): number {
84+
return this.id;
85+
}
86+
87+
/**
88+
* Creates a new rules scanner that reads the list contents.
89+
*
90+
* @return - Scanner object.
91+
*/
92+
newScanner(scannerType: ScannerType): RuleScanner {
93+
const reader = new BufferLineReader(this.rulesBuffer);
94+
return new RuleScanner(reader, this.id, {
95+
scannerType,
96+
ignoreCosmetic: this.ignoreCosmetic,
97+
ignoreJS: this.ignoreJS,
98+
ignoreUnsafe: this.ignoreUnsafe,
99+
});
100+
}
101+
102+
/**
103+
* Finds rule text by its index.
104+
*
105+
* If there's no rule by that index or the rule is invalid, it will return
106+
* null.
107+
*
108+
* @param ruleIdx - rule index.
109+
* @return - rule text or null.
110+
*/
111+
retrieveRuleText(ruleIdx: number): string | null {
112+
if (ruleIdx < 0 || ruleIdx >= this.rulesBuffer.length) {
113+
return null;
114+
}
115+
116+
let endOfLine = this.rulesBuffer.indexOf(BufferLineReader.EOL, ruleIdx);
117+
if (endOfLine === -1) {
118+
endOfLine = this.rulesBuffer.length;
119+
}
120+
121+
const lineBuffer = this.rulesBuffer.subarray(ruleIdx, endOfLine);
122+
const line = BufferRuleList.decoder.decode(lineBuffer).trim();
123+
124+
if (!line) {
125+
return null;
126+
}
127+
128+
return line;
129+
}
130+
}
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,43 @@
11
import { IRule } from '../rules/rule';
22

33
/**
4-
* Rule list's cache
4+
* Cache of an individual filter list.
55
*/
66
export class ListCache {
77
/**
8-
* Cache with the rules which were retrieved.
8+
* Cache with the rules which are stored inside this cache instance..
99
*/
1010
private readonly cache: Map<number, IRule>;
1111

1212
/**
13-
* Constructor
13+
* ListCache constructor.
1414
*/
1515
constructor() {
1616
this.cache = new Map();
1717
}
1818

1919
/**
20-
* @param key
21-
* @return rule for specified key
20+
* @param key - Cache key.
21+
* @return - Rule found for specified key or undefined if nothing found.
2222
*/
2323
public get(key: number): IRule | undefined {
2424
return this.cache.get(key);
2525
}
2626

2727
/**
28-
* Sets rule for specified key
28+
* Stores the rule for the specified key in the cache.
2929
*
30-
* @param key
31-
* @param rule
30+
* @param key - Cache key.
31+
* @param rule - Cached value.
3232
*/
3333
public set(key: number, rule: IRule): void {
3434
this.cache.set(key, rule);
3535
}
36+
37+
/**
38+
* @returns - The list cache size.
39+
*/
40+
public getSize() {
41+
return this.cache.size;
42+
}
3643
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import { type ILineReader } from './line-reader';
2+
3+
/**
4+
* BufferLineReader is a class responsible for reading content line by line
5+
* from a bytes buffer with a UTF-8 encoded string.
6+
*/
7+
export class BufferLineReader implements ILineReader {
8+
/**
9+
* EOL is a new line character that is used to detect line endings. We only
10+
* rely on \n and not \r so the lines need to be trimmed after processing.
11+
*/
12+
public static readonly EOL = '\n'.charCodeAt(0);
13+
14+
/**
15+
* Byte buffer with a UTF-8 encoded string.
16+
*/
17+
private readonly buffer: Uint8Array;
18+
19+
/**
20+
* Current position of the reader.
21+
*/
22+
private currentIndex = 0;
23+
24+
/**
25+
* Text decoder that is used to read strings from the internal buffer of
26+
* UTF-8 encoded characters.
27+
*/
28+
private static readonly decoder = new TextDecoder('utf-8');
29+
30+
/**
31+
* Constructor of a BufferLineReader.
32+
*
33+
* @param buffer - Uint8Array that contains a UTF-8 encoded string.
34+
*/
35+
constructor(buffer: Uint8Array) {
36+
this.buffer = buffer;
37+
}
38+
39+
/**
40+
* Reads the next line in the buffer
41+
*
42+
* @return text or null on end
43+
*/
44+
public readLine(): string | null {
45+
if (this.currentIndex === -1) {
46+
return null;
47+
}
48+
49+
const startIndex = this.currentIndex;
50+
this.currentIndex = this.buffer.indexOf(BufferLineReader.EOL, startIndex);
51+
52+
if (this.currentIndex === -1) {
53+
return BufferLineReader.decoder.decode(this.buffer.subarray(startIndex));
54+
}
55+
56+
const lineBytes = this.buffer.subarray(startIndex, this.currentIndex);
57+
const line = BufferLineReader.decoder.decode(lineBytes);
58+
59+
// Increment to not include the EOL character.
60+
this.currentIndex += 1;
61+
62+
return line;
63+
}
64+
65+
/**
66+
* Returns the current position of this reader or -1 if there's nothing to
67+
* read.
68+
*
69+
* @returns - The current position or -1 if there's nothing to read.
70+
*/
71+
public getCurrentPos(): number {
72+
return this.currentIndex;
73+
}
74+
}
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,39 @@
11
import fs from 'fs';
2-
import { ILineReader } from './line-reader';
3-
import { StringLineReader } from './string-line-reader';
2+
import { type ILineReader } from './line-reader';
3+
import { BufferLineReader } from './buffer-line-reader';
44

55
/**
6-
* Reads file line by line
6+
* FileLineReader is a class responsible for reading file contents line by line.
77
*/
88
export class FileLineReader implements ILineReader {
99
/**
10-
* Temp implementation inner reader
10+
* FileLineReader relies on an internal BufferLineReader to provide the line
11+
* reading functionality.
1112
*/
12-
private readonly innerReader: StringLineReader;
13+
private readonly innerReader: BufferLineReader;
1314

1415
/**
15-
* Constructor
16-
* @param path
16+
* Constructor of the FileLineReader.
1717
*
18-
* @throws
18+
* @param path - Path to the file to read.
19+
* @throws Error if the file cannot be read.
1920
*/
2021
constructor(path: string) {
21-
const text = fs.readFileSync(path, 'utf8');
22-
this.innerReader = new StringLineReader(text);
22+
const buffer = fs.readFileSync(path);
23+
this.innerReader = new BufferLineReader(buffer);
2324
}
2425

2526
/**
26-
* Reads next line
27+
* Reads next line in the reader.
2728
*/
2829
public readLine(): string | null {
2930
return this.innerReader.readLine();
3031
}
32+
33+
/**
34+
* Returns the current position of this line reader.
35+
*/
36+
getCurrentPos(): number {
37+
return this.innerReader.getCurrentPos();
38+
}
3139
}

packages/tsurlfilter/src/filterlist/reader/line-reader.ts

+7-2
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,14 @@
33
*/
44
export interface ILineReader {
55
/**
6-
* Reads the next line
6+
* Reads the next line.
77
*
8-
* @return line string or null
8+
* @return line string or null.
99
*/
1010
readLine(): string | null;
11+
12+
/**
13+
* Returns the current position of this line reader.
14+
*/
15+
getCurrentPos(): number;
1116
}

0 commit comments

Comments
 (0)