From 4b0cb913d9ac999d60337677f4b0aca7bc7c4dcc Mon Sep 17 00:00:00 2001 From: Blaine Bublitz Date: Mon, 18 Apr 2022 14:20:26 -0700 Subject: [PATCH 1/5] lib: ensure `TextDecoder` only removes `utf8` BOM on `utf8` encoding --- lib/internal/encoding.js | 1 + .../test-whatwg-encoding-custom-textdecoder.js | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index def0e9223b84c7..d1ae814ae1fbe6 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -512,6 +512,7 @@ function makeTextDecoderJS() { this[kHandle].write(input); if (result.length > 0 && + this[kEncoding] === 'utf-8' && !this[kBOMSeen] && !(this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM)) { // If the very first result in the stream is a BOM, and we are not diff --git a/test/parallel/test-whatwg-encoding-custom-textdecoder.js b/test/parallel/test-whatwg-encoding-custom-textdecoder.js index 1fa65164c70678..451a08dbe94bf9 100644 --- a/test/parallel/test-whatwg-encoding-custom-textdecoder.js +++ b/test/parallel/test-whatwg-encoding-custom-textdecoder.js @@ -34,6 +34,21 @@ assert(TextDecoder); }); } +// Test TextDecoder, UTF-16LE, fatal: false, ignoreBOM: false +{ + ['utf-16', 'utf-16le'].forEach((i) => { + // This is a utf16le buffer with a utf8 BOM, + // which should not be removed + const buf = Buffer.from([0xef, 0xbb, 0xbf, 0x74, 0x00, 0x65, + 0x00, 0x73, 0x00, 0x74, 0x00, 0xac, + 0x20]) + const dec = new TextDecoder(i); + assert.strictEqual(dec.encoding, 'utf-16-le'); + const res = dec.decode(buf); + assert.strictEqual(res, '\ufefftest€'); + }); +} + // Test TextDecoder, UTF-8, fatal: false, ignoreBOM: true { ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { From cdb76ff34b3020102b560d19265af9fcd0383124 Mon Sep 17 00:00:00 2001 From: Blaine Bublitz Date: Mon, 18 Apr 2022 14:56:29 -0700 Subject: [PATCH 2/5] linting --- test/parallel/test-whatwg-encoding-custom-textdecoder.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/parallel/test-whatwg-encoding-custom-textdecoder.js b/test/parallel/test-whatwg-encoding-custom-textdecoder.js index 451a08dbe94bf9..007ebec8b226b6 100644 --- a/test/parallel/test-whatwg-encoding-custom-textdecoder.js +++ b/test/parallel/test-whatwg-encoding-custom-textdecoder.js @@ -40,8 +40,8 @@ assert(TextDecoder); // This is a utf16le buffer with a utf8 BOM, // which should not be removed const buf = Buffer.from([0xef, 0xbb, 0xbf, 0x74, 0x00, 0x65, - 0x00, 0x73, 0x00, 0x74, 0x00, 0xac, - 0x20]) + 0x00, 0x73, 0x00, 0x74, 0x00, 0xac, + 0x20]); const dec = new TextDecoder(i); assert.strictEqual(dec.encoding, 'utf-16-le'); const res = dec.decode(buf); From 9388ab4622600d8b29541eecad5889a5f0aa4c85 Mon Sep 17 00:00:00 2001 From: Blaine Bublitz Date: Mon, 18 Apr 2022 15:32:50 -0700 Subject: [PATCH 3/5] typo in encoding --- test/parallel/test-whatwg-encoding-custom-textdecoder.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/parallel/test-whatwg-encoding-custom-textdecoder.js b/test/parallel/test-whatwg-encoding-custom-textdecoder.js index 007ebec8b226b6..4afcbc29f9b982 100644 --- a/test/parallel/test-whatwg-encoding-custom-textdecoder.js +++ b/test/parallel/test-whatwg-encoding-custom-textdecoder.js @@ -43,7 +43,7 @@ assert(TextDecoder); 0x00, 0x73, 0x00, 0x74, 0x00, 0xac, 0x20]); const dec = new TextDecoder(i); - assert.strictEqual(dec.encoding, 'utf-16-le'); + assert.strictEqual(dec.encoding, 'utf-16le'); const res = dec.decode(buf); assert.strictEqual(res, '\ufefftest€'); }); From 72ea0dc0633a5fdafd1480ea6cd13a0fe8c7448e Mon Sep 17 00:00:00 2001 From: Blaine Bublitz Date: Tue, 19 Apr 2022 11:16:48 -0700 Subject: [PATCH 4/5] Use correctly decoded string --- test/parallel/test-whatwg-encoding-custom-textdecoder.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/parallel/test-whatwg-encoding-custom-textdecoder.js b/test/parallel/test-whatwg-encoding-custom-textdecoder.js index 4afcbc29f9b982..b36a3770a6989d 100644 --- a/test/parallel/test-whatwg-encoding-custom-textdecoder.js +++ b/test/parallel/test-whatwg-encoding-custom-textdecoder.js @@ -45,7 +45,7 @@ assert(TextDecoder); const dec = new TextDecoder(i); assert.strictEqual(dec.encoding, 'utf-16le'); const res = dec.decode(buf); - assert.strictEqual(res, '\ufefftest€'); + assert.strictEqual(res, '믯璿攀猀琀가'); }); } From be430d586e3108018196dd323b42d9fdaa84f74c Mon Sep 17 00:00:00 2001 From: Blaine Bublitz Date: Tue, 19 Apr 2022 14:23:59 -0700 Subject: [PATCH 5/5] might need encoding for Buffer.from --- test/parallel/test-whatwg-encoding-custom-textdecoder.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/parallel/test-whatwg-encoding-custom-textdecoder.js b/test/parallel/test-whatwg-encoding-custom-textdecoder.js index b36a3770a6989d..9b6c59ff118da1 100644 --- a/test/parallel/test-whatwg-encoding-custom-textdecoder.js +++ b/test/parallel/test-whatwg-encoding-custom-textdecoder.js @@ -41,7 +41,7 @@ assert(TextDecoder); // which should not be removed const buf = Buffer.from([0xef, 0xbb, 0xbf, 0x74, 0x00, 0x65, 0x00, 0x73, 0x00, 0x74, 0x00, 0xac, - 0x20]); + 0x20], 'utf16le'); const dec = new TextDecoder(i); assert.strictEqual(dec.encoding, 'utf-16le'); const res = dec.decode(buf);