Skip to content

Commit 706dbc1

Browse files
authored
fix: added experimental encoding priority list (#835)
1 parent 609d44d commit 706dbc1

File tree

2 files changed

+29
-6
lines changed

2 files changed

+29
-6
lines changed

package.json

+8
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,14 @@
10711071
"description": "Try the experimental encoding detection",
10721072
"default": false
10731073
},
1074+
"svn.experimental.encoding_priority": {
1075+
"type":"array",
1076+
"description": "Priority of encoding",
1077+
"default": [],
1078+
"examples": [
1079+
["UTF-8", "GB18030", "windows-1251"]
1080+
]
1081+
},
10741082
"svn.gravatar.icon_url": {
10751083
"type": "string",
10761084
"description": "Url for the gravitar icon using the <AUTHOR>, <AUTHOR_MD5> and <SIZE> placeholders",

src/encoding.ts

+21-6
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ const JSCHARDET_TO_ICONV_ENCODINGS: { [name: string]: string } = {
4646
big5: "cp950"
4747
};
4848

49+
function normaliseEncodingName(name: string): string {
50+
return name.replace(/[^a-zA-Z0-9]/g, "").toLowerCase();
51+
}
52+
4953
export function detectEncoding(buffer: Buffer): string | null {
5054
const result = detectEncodingByBOM(buffer);
5155

@@ -58,9 +62,22 @@ export function detectEncoding(buffer: Buffer): string | null {
5862
false
5963
);
6064
if (experimental) {
61-
const detected = chardet.detect(buffer);
62-
if (detected) {
63-
return detected.replace(/[^a-zA-Z0-9]/g, "").toLocaleLowerCase();
65+
const detected = chardet.detectAll(buffer);
66+
const encodingPriorities = configuration.get<string[]>(
67+
"experimental.encoding_priority",
68+
[]
69+
);
70+
71+
if (!detected) {
72+
return null;
73+
}
74+
75+
for (const pri of encodingPriorities) {
76+
for (const det of detected) {
77+
if (normaliseEncodingName(pri) === normaliseEncodingName(det.name)) {
78+
return normaliseEncodingName(det.name);
79+
}
80+
}
6481
}
6582

6683
return null;
@@ -80,9 +97,7 @@ export function detectEncoding(buffer: Buffer): string | null {
8097
return null;
8198
}
8299

83-
const normalizedEncodingName = encoding
84-
.replace(/[^a-zA-Z0-9]/g, "")
85-
.toLowerCase();
100+
const normalizedEncodingName = normaliseEncodingName(encoding);
86101
const mapped = JSCHARDET_TO_ICONV_ENCODINGS[normalizedEncodingName];
87102

88103
return mapped || normalizedEncodingName;

0 commit comments

Comments
 (0)