From c48c4ec6f062769725d9c7bf1e0a16d8c1230f2f Mon Sep 17 00:00:00 2001 From: Sergey Nechaev Date: Tue, 13 Aug 2024 01:25:11 +0700 Subject: [PATCH] Percent decode --- .../web.url-search-params.constructor.js | 141 +++++++++++++++--- tests/unit-global/web.url-search-params.js | 28 ++++ tests/unit-pure/web.url-search-params.js | 28 ++++ 3 files changed, 177 insertions(+), 20 deletions(-) diff --git a/packages/core-js/modules/web.url-search-params.constructor.js b/packages/core-js/modules/web.url-search-params.constructor.js index e9e3af50d934..2ce425a0605b 100644 --- a/packages/core-js/modules/web.url-search-params.constructor.js +++ b/packages/core-js/modules/web.url-search-params.constructor.js @@ -1,9 +1,11 @@ 'use strict'; // TODO: in core-js@4, move /modules/ dependencies to public entries for better optimization by tools like `preset-env` require('../modules/es.array.iterator'); +require('../modules/es.string.from-code-point'); var $ = require('../internals/export'); var globalThis = require('../internals/global-this'); var safeGetBuiltIn = require('../internals/safe-get-built-in'); +var getBuiltIn = require('../internals/get-built-in'); var call = require('../internals/function-call'); var uncurryThis = require('../internals/function-uncurry-this'); var DESCRIPTORS = require('../internals/descriptors'); @@ -31,6 +33,8 @@ var validateArgumentsLength = require('../internals/validate-arguments-length'); var wellKnownSymbol = require('../internals/well-known-symbol'); var arraySort = require('../internals/array-sort'); +var $includes = require('../internals/array-includes').includes; + var ITERATOR = wellKnownSymbol('iterator'); var URL_SEARCH_PARAMS = 'URLSearchParams'; var URL_SEARCH_PARAMS_ITERATOR = URL_SEARCH_PARAMS + 'Iterator'; @@ -43,9 +47,7 @@ var NativeRequest = safeGetBuiltIn('Request'); var Headers = safeGetBuiltIn('Headers'); var RequestPrototype = NativeRequest && NativeRequest.prototype; var HeadersPrototype = Headers && Headers.prototype; -var RegExp = globalThis.RegExp; var TypeError = globalThis.TypeError; -var decodeURIComponent = globalThis.decodeURIComponent; var encodeURIComponent = globalThis.encodeURIComponent; var charAt = uncurryThis(''.charAt); var join = uncurryThis([].join); @@ -56,32 +58,131 @@ var splice = uncurryThis([].splice); var split = uncurryThis(''.split); var stringSlice = uncurryThis(''.slice); -var plus = /\+/g; -var sequences = Array(4); +var FALLBACK_REPLACER = '\uFFFD'; +var charCodeAt = uncurryThis(''.charCodeAt); +var substring = uncurryThis(''.substring); +var indexOf = uncurryThis(''.indexOf); +var fromCharCode = String.fromCharCode; +var fromCodePoint = getBuiltIn('String', 'fromCodePoint'); +var $parseInt = parseInt; + +var parseHexOctet = function (string, start) { + return $parseInt(stringSlice(string, start, start + 2), 16); +}; -var percentSequence = function (bytes) { - return sequences[bytes - 1] || (sequences[bytes - 1] = RegExp('((?:%[\\da-f]{2}){' + bytes + '})', 'gi')); +var getLeadingOnes = function (octet) { + var binString = $toString(octet, 2); + return indexOf(binString, '0') !== -1 ? indexOf(binString, '0') : binString.length; }; -var percentDecode = function (sequence) { - try { - return decodeURIComponent(sequence); - } catch (error) { - return sequence; +var utf8Decode = function (octets) { + var len = octets.length; + var codePoint = null; + + switch (len) { + case 1: + codePoint = octets[0]; + break; + case 2: + codePoint = (octets[0] & 0x1F) << 6 | (octets[1] & 0x3F); + break; + case 3: + codePoint = (octets[0] & 0x0F) << 12 | (octets[1] & 0x3F) << 6 | (octets[2] & 0x3F); + break; + case 4: + codePoint = (octets[0] & 0x07) << 18 | (octets[1] & 0x3F) << 12 | (octets[2] & 0x3F) << 6 | (octets[3] & 0x3F); + break; } + + return codePoint > 0x10FFFF ? null : codePoint; }; -var deserialize = function (it) { - var result = replace(it, plus, ' '); - var bytes = 4; - try { - return decodeURIComponent(result); - } catch (error) { - while (bytes) { - result = replace(result, percentSequence(bytes--), percentDecode); +/* eslint-disable max-statements -- TODO */ +var decode = function (input, preserveEscapeSet) { + var length = input.length; + var result = ''; + var i = 0; + + while (i < length) { + var charCode = charCodeAt(input, i); + var decodedChar = input[i]; + + if (charCode === 0x25) { + if (i + 3 > length) { + result += FALLBACK_REPLACER; + break; + } + + var escapeSequence = substring(input, i, i + 3); + var octet = parseHexOctet(input, i + 1); + + if (isNaN(octet)) { + result += FALLBACK_REPLACER; + i += 3; + continue; + } + + i += 2; + var byteSequenceLength = getLeadingOnes(octet); + + if (byteSequenceLength === 0) { + var asciiChar = fromCharCode(octet); + decodedChar = $includes(preserveEscapeSet, asciiChar, undefined) ? escapeSequence : asciiChar; + } else { + if (byteSequenceLength === 1 || byteSequenceLength > 4) { + result += FALLBACK_REPLACER; + i++; + continue; + } + + var octets = [octet]; + var sequenceIndex = 1; + + while (sequenceIndex < byteSequenceLength) { + i++; + if (i + 3 > length) { + result += FALLBACK_REPLACER; + break; + } + if (charCodeAt(input, i) !== 0x25) { + result += FALLBACK_REPLACER; + break; + } + var nextByte = parseHexOctet(input, i + 1); + if (isNaN(nextByte)) { + result += FALLBACK_REPLACER; + i += 3; + break; + } + octets.push(nextByte); + i += 2; + sequenceIndex++; + } + + if (octets.length !== byteSequenceLength) { + result += FALLBACK_REPLACER; + continue; + } + + var codePoint = utf8Decode(octets); + if (codePoint === null) { + result += FALLBACK_REPLACER; + } else { + decodedChar = fromCodePoint(codePoint); + } + } } - return result; + + result += decodedChar; + i++; } + + return result; +}; + +var deserialize = function (it) { + var preserveEscapeSet = ['%', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '#']; + return decode(it, preserveEscapeSet); }; var find = /[!'()~]|%20/g; diff --git a/tests/unit-global/web.url-search-params.js b/tests/unit-global/web.url-search-params.js index 2e74b74a4c07..f4aa62e33d4b 100644 --- a/tests/unit-global/web.url-search-params.js +++ b/tests/unit-global/web.url-search-params.js @@ -100,6 +100,34 @@ QUnit.test('URLSearchParams', assert => { params = new URLSearchParams(params.toString()); assert.same(params.get('query'), '+15555555555', 'parse encoded +'); + params = new URLSearchParams('b=%2sf%2a'); + assert.same(params.get('b'), '%2sf*', 'parse encoded %2sf%2a'); + params = new URLSearchParams('b=%%2a'); + assert.same(params.get('b'), '%*', 'parse encoded b=%%2a'); + + params = new URLSearchParams('a=b\u2384'); + assert.same(params.get('a'), 'b\u2384', 'parse \u2384'); + params = new URLSearchParams('a\u2384b=c'); + assert.same(params.get('a\u2384b'), 'c', 'parse \u2384'); + + params = new URLSearchParams('a=b%e2%8e%84'); + assert.same(params.get('a'), 'b\u2384', 'parse b%e2%8e%84'); + params = new URLSearchParams('a%e2%8e%84b=c'); + assert.same(params.get('a\u2384b'), 'c', 'parse b%e2%8e%84'); + + params = new URLSearchParams('a=b\uD83D\uDCA9c'); + assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse \uD83D\uDCA9'); + params = new URLSearchParams('a\uD83D\uDCA9b=c'); + assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse \uD83D\uDCA9'); + + params = new URLSearchParams('a=b%f0%9f%92%a9c'); + assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse %f0%9f%92%a9'); + params = new URLSearchParams('a%f0%9f%92%a9b=c'); + assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse %f0%9f%92%a9'); + + assert.same(String(new URLSearchParams('%C2')), '%EF%BF%BD='); + assert.same(String(new URLSearchParams('%F0%9F%D0%90')), '%EF%BF%BD%D0%90='); + const testData = [ { input: '?a=%', output: [['a', '%']], name: 'handling %' }, { input: { '+': '%C2' }, output: [['+', '%C2']], name: 'object with +' }, diff --git a/tests/unit-pure/web.url-search-params.js b/tests/unit-pure/web.url-search-params.js index aa50dd9d3a3c..8d5dc59fd48a 100644 --- a/tests/unit-pure/web.url-search-params.js +++ b/tests/unit-pure/web.url-search-params.js @@ -102,6 +102,34 @@ QUnit.test('URLSearchParams', assert => { params = new URLSearchParams(params.toString()); assert.same(params.get('query'), '+15555555555', 'parse encoded +'); + params = new URLSearchParams('b=%2sf%2a'); + assert.same(params.get('b'), '%2sf*', 'parse encoded %2sf%2a'); + params = new URLSearchParams('b=%%2a'); + assert.same(params.get('b'), '%*', 'parse encoded b=%%2a'); + + params = new URLSearchParams('a=b\u2384'); + assert.same(params.get('a'), 'b\u2384', 'parse \u2384'); + params = new URLSearchParams('a\u2384b=c'); + assert.same(params.get('a\u2384b'), 'c', 'parse \u2384'); + + params = new URLSearchParams('a=b%e2%8e%84'); + assert.same(params.get('a'), 'b\u2384', 'parse b%e2%8e%84'); + params = new URLSearchParams('a%e2%8e%84b=c'); + assert.same(params.get('a\u2384b'), 'c', 'parse b%e2%8e%84'); + + params = new URLSearchParams('a=b\uD83D\uDCA9c'); + assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse \uD83D\uDCA9'); + params = new URLSearchParams('a\uD83D\uDCA9b=c'); + assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse \uD83D\uDCA9'); + + params = new URLSearchParams('a=b%f0%9f%92%a9c'); + assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse %f0%9f%92%a9'); + params = new URLSearchParams('a%f0%9f%92%a9b=c'); + assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse %f0%9f%92%a9'); + + assert.same(String(new URLSearchParams('%C2')), '%EF%BF%BD='); + assert.same(String(new URLSearchParams('%F0%9F%D0%90')), '%EF%BF%BD%D0%90='); + const testData = [ { input: '?a=%', output: [['a', '%']], name: 'handling %' }, { input: { '+': '%C2' }, output: [['+', '%C2']], name: 'object with +' },