Skip to content

Commit 34edcca

Browse files
authored
feat: added new experimental encoding detection (#831)
1 parent 33e22e0 commit 34edcca

File tree

3 files changed

+35
-1
lines changed

3 files changed

+35
-1
lines changed

package.json

+7
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
"watch:css": "yarn run build:css -w"
5050
},
5151
"dependencies": {
52+
"chardet": "^0.8.0",
5253
"dayjs": "^1.8.20",
5354
"minimatch": "^3.0.4",
5455
"original-fs": "^1.0.0",
@@ -59,6 +60,7 @@
5960
"devDependencies": {
6061
"@semantic-release/changelog": "^5.0.0",
6162
"@semantic-release/git": "^9.0.0",
63+
"@types/chardet": "^0.8.0",
6264
"@types/glob": "^7.1.1",
6365
"@types/mocha": "^7.0.0",
6466
"@types/node": "^12.11.7",
@@ -1246,6 +1248,11 @@
12461248
"type": "boolean",
12471249
"description": "Set to ignore externals definitions on update (add --ignore-externals)",
12481250
"default": true
1251+
},
1252+
"svn.experimental.detect_encoding": {
1253+
"type": "boolean",
1254+
"description": "Try the experimental encoding detection",
1255+
"default": false
12491256
}
12501257
}
12511258
}

src/encoding.ts

+16-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import { jschardet } from "./vscodeModules";
2+
import * as chardet from "chardet";
3+
import { configuration } from "./helpers/configuration";
24

35
if (jschardet.Constants) {
46
jschardet.Constants.MINIMUM_THRESHOLD = 0.2;
@@ -51,7 +53,20 @@ export function detectEncoding(buffer: Buffer): string | null {
5153
return result;
5254
}
5355

54-
const detected = jschardet.detect(buffer);
56+
const experimental = configuration.get<boolean>(
57+
"experimental.detect_encoding",
58+
false
59+
);
60+
if (experimental) {
61+
const detected = chardet.detect(buffer);
62+
if (detected) {
63+
return detected.replace(/[^a-zA-Z0-9]/g, "").toLocaleLowerCase();
64+
}
65+
66+
return null;
67+
}
68+
69+
const detected = jschardet.detect(buffer.slice(0, 512 * 128)); // ensure to limit buffer for guessing due to https://github.com/aadsm/jschardet/issues/53
5570

5671
if (!detected || !detected.encoding || detected.confidence < 0.8) {
5772
return null;

yarn.lock

+12
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,13 @@
247247
resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-1.0.0.tgz#9c13c2574c92d4503b005feca8f2e16cc1611506"
248248
integrity sha512-KYyTT/T6ALPkIRd2Ge080X/BsXvy9O0hcWTtMWkPvwAwF99+vn6Dv4GzrFT/Nn1LePr+FFDbRXXlqmsy9lw2zA==
249249

250+
"@types/chardet@^0.8.0":
251+
version "0.8.0"
252+
resolved "https://registry.yarnpkg.com/@types/chardet/-/chardet-0.8.0.tgz#40932a9d751bb1ff22e7403312faaf81755ad432"
253+
integrity sha512-0PFX0r+bt2W6np4tZzF2Gh28pPLTM7lgjLMs0DJnV/Y4rPI3kLJCwtwRTb4aGl1iHmuF8TEmizzfJOw/7XQRfw==
254+
dependencies:
255+
"@types/node" "*"
256+
250257
"@types/color-name@^1.1.1":
251258
version "1.1.1"
252259
resolved "https://registry.yarnpkg.com/@types/color-name/-/color-name-1.1.1.tgz#1c1261bbeaa10a8055bbc5d8ab84b7b2afc846a0"
@@ -901,6 +908,11 @@ chardet@^0.7.0:
901908
resolved "https://registry.yarnpkg.com/chardet/-/chardet-0.7.0.tgz#90094849f0937f2eedc2425d0d28a9e5f0cbad9e"
902909
integrity sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA==
903910

911+
chardet@^0.8.0:
912+
version "0.8.0"
913+
resolved "https://registry.yarnpkg.com/chardet/-/chardet-0.8.0.tgz#215e9e457296aa88fb0c38b010fd7a7e20482ed3"
914+
integrity sha512-fRAe54sDSPvCz9I3puKUoUpLBEIUjlwBoNyNcD2eAiP5Ybw2iXnrT7w15hfkNywosXFNllWwvOKsxl7UUCKQaQ==
915+
904916
cheerio@^1.0.0-rc.1:
905917
version "1.0.0-rc.3"
906918
resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.3.tgz#094636d425b2e9c0f4eb91a46c05630c9a1a8bf6"

0 commit comments

Comments
 (0)