Skip to content

Commit 22e40f9

Browse files
edgardmessiasJohnstonCode
authored andcommitted
fix: Fixed encoding detection for gutter (close #526) (#590)
1 parent 73f0ec6 commit 22e40f9

File tree

6 files changed

+182
-61
lines changed

6 files changed

+182
-61
lines changed

package-lock.json

+32-12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
"style-check": "npx prettylint src/**/*.ts"
4747
},
4848
"dependencies": {
49-
"is-utf8": "^0.2.1",
5049
"minimatch": "^3.0.4",
5150
"original-fs": "^1.0.0",
5251
"semver": "^6.0.0",

src/common/types.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ export enum PropStatus {
190190

191191
export interface ICpOptions extends SpawnOptions {
192192
cwd?: string;
193-
encoding?: string;
193+
encoding?: string | null;
194194
log?: boolean;
195195
username?: string;
196196
password?: string;

src/encoding.ts

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import { jschardet } from "./vscodeModules";
2+
3+
jschardet.Constants.MINIMUM_THRESHOLD = 0.2;
4+
jschardet.MacCyrillicModel.mTypicalPositiveRatio += 0.001;
5+
6+
function detectEncodingByBOM(buffer: Buffer): string | null {
7+
if (!buffer || buffer.length < 2) {
8+
return null;
9+
}
10+
11+
const b0 = buffer.readUInt8(0);
12+
const b1 = buffer.readUInt8(1);
13+
14+
// UTF-16 BE
15+
if (b0 === 0xfe && b1 === 0xff) {
16+
return "utf16be";
17+
}
18+
19+
// UTF-16 LE
20+
if (b0 === 0xff && b1 === 0xfe) {
21+
return "utf16le";
22+
}
23+
24+
if (buffer.length < 3) {
25+
return null;
26+
}
27+
28+
const b2 = buffer.readUInt8(2);
29+
30+
// UTF-8
31+
if (b0 === 0xef && b1 === 0xbb && b2 === 0xbf) {
32+
return "utf8";
33+
}
34+
35+
return null;
36+
}
37+
38+
const IGNORE_ENCODINGS = ["ascii", "utf-8", "utf-16", "utf-32"];
39+
40+
const JSCHARDET_TO_ICONV_ENCODINGS: { [name: string]: string } = {
41+
ibm866: "cp866",
42+
big5: "cp950"
43+
};
44+
45+
export function detectEncoding(buffer: Buffer): string | null {
46+
const result = detectEncodingByBOM(buffer);
47+
48+
if (result) {
49+
return result;
50+
}
51+
52+
const detected = jschardet.detect(buffer);
53+
54+
if (!detected || !detected.encoding || detected.confidence < 0.8) {
55+
return null;
56+
}
57+
58+
const encoding = detected.encoding;
59+
60+
// Ignore encodings that cannot guess correctly
61+
// (http://chardet.readthedocs.io/en/latest/supported-encodings.html)
62+
if (0 <= IGNORE_ENCODINGS.indexOf(encoding.toLowerCase())) {
63+
return null;
64+
}
65+
66+
const normalizedEncodingName = encoding
67+
.replace(/[^a-zA-Z0-9]/g, "")
68+
.toLowerCase();
69+
const mapped = JSCHARDET_TO_ICONV_ENCODINGS[normalizedEncodingName];
70+
71+
return mapped || normalizedEncodingName;
72+
}

src/svn.ts

+21-31
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import * as cp from "child_process";
22
import { EventEmitter } from "events";
3-
import isUtf8 = require("is-utf8");
43
import * as proc from "process";
54
import { Readable } from "stream";
65
import {
@@ -9,12 +8,13 @@ import {
98
IExecutionResult,
109
ISvnOptions
1110
} from "./common/types";
11+
import * as encodeUtil from "./encoding";
1212
import { configuration } from "./helpers/configuration";
1313
import { parseInfoXml } from "./infoParser";
1414
import SvnError from "./svnError";
1515
import { Repository } from "./svnRepository";
1616
import { dispose, IDisposable, toDisposable } from "./util";
17-
import { iconv, jschardet } from "./vscodeModules";
17+
import { iconv } from "./vscodeModules";
1818

1919
export const svnErrorCodes: { [key: string]: string } = {
2020
AuthorizationFailed: "E170001",
@@ -102,9 +102,14 @@ export class Svn {
102102
// Force non interactive environment
103103
args.push("--non-interactive");
104104

105-
let encoding = options.encoding || "";
105+
let encoding: string | undefined | null = options.encoding;
106106
delete options.encoding;
107107

108+
// SVN with '--xml' always return 'UTF-8', and jschardet detects this encoding: 'TIS-620'
109+
if (args.includes("--xml")) {
110+
encoding = "utf8";
111+
}
112+
108113
const defaults: cp.SpawnOptions = {
109114
env: proc.env
110115
};
@@ -156,35 +161,20 @@ export class Svn {
156161

157162
dispose(disposables);
158163

159-
// SVN with '--xml' always return 'UTF-8', and jschardet detects this encoding: 'TIS-620'
160-
if (args.includes("--xml")) {
161-
encoding = "utf8";
162-
} else if (encoding === "") {
163-
encoding = "utf8"; // Initial encoding
164-
165-
const defaultEncoding = configuration.get<string>("default.encoding");
166-
if (defaultEncoding) {
167-
if (!iconv.encodingExists(defaultEncoding)) {
168-
this.logOutput(
169-
"svn.default.encoding: Invalid Parameter: '" +
170-
defaultEncoding +
171-
"'.\n"
172-
);
173-
} else if (!isUtf8(stdout)) {
174-
encoding = defaultEncoding;
175-
}
176-
} else {
177-
jschardet.MacCyrillicModel.mTypicalPositiveRatio += 0.001;
178-
179-
const encodingGuess = jschardet.detect(stdout);
180-
181-
if (
182-
encodingGuess.confidence > 0.8 &&
183-
iconv.encodingExists(encodingGuess.encoding)
184-
) {
185-
encoding = encodingGuess.encoding;
186-
}
164+
if (!encoding) {
165+
encoding = encodeUtil.detectEncoding(stdout);
166+
}
167+
168+
// if not detected
169+
if (!encoding) {
170+
encoding = configuration.get<string>("default.encoding");
171+
}
172+
173+
if (!iconv.encodingExists(encoding)) {
174+
if (encoding) {
175+
console.warn(`SVN: The encoding "${encoding}" is invalid`);
187176
}
177+
encoding = "utf8";
188178
}
189179

190180
const decodedStdout = iconv.decode(stdout, encoding);

0 commit comments

Comments
 (0)