Skip to content

Commit

Permalink
Percent decode
Browse files Browse the repository at this point in the history
  • Loading branch information
slowcheetah committed Aug 12, 2024
1 parent 76460ce commit c48c4ec
Show file tree
Hide file tree
Showing 3 changed files with 177 additions and 20 deletions.
141 changes: 121 additions & 20 deletions packages/core-js/modules/web.url-search-params.constructor.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
'use strict';
// TODO: in core-js@4, move /modules/ dependencies to public entries for better optimization by tools like `preset-env`
require('../modules/es.array.iterator');
require('../modules/es.string.from-code-point');
var $ = require('../internals/export');
var globalThis = require('../internals/global-this');
var safeGetBuiltIn = require('../internals/safe-get-built-in');
var getBuiltIn = require('../internals/get-built-in');
var call = require('../internals/function-call');
var uncurryThis = require('../internals/function-uncurry-this');
var DESCRIPTORS = require('../internals/descriptors');
Expand Down Expand Up @@ -31,6 +33,8 @@ var validateArgumentsLength = require('../internals/validate-arguments-length');
var wellKnownSymbol = require('../internals/well-known-symbol');
var arraySort = require('../internals/array-sort');

var $includes = require('../internals/array-includes').includes;

var ITERATOR = wellKnownSymbol('iterator');
var URL_SEARCH_PARAMS = 'URLSearchParams';
var URL_SEARCH_PARAMS_ITERATOR = URL_SEARCH_PARAMS + 'Iterator';
Expand All @@ -43,9 +47,7 @@ var NativeRequest = safeGetBuiltIn('Request');
var Headers = safeGetBuiltIn('Headers');
var RequestPrototype = NativeRequest && NativeRequest.prototype;
var HeadersPrototype = Headers && Headers.prototype;
var RegExp = globalThis.RegExp;
var TypeError = globalThis.TypeError;
var decodeURIComponent = globalThis.decodeURIComponent;
var encodeURIComponent = globalThis.encodeURIComponent;
var charAt = uncurryThis(''.charAt);
var join = uncurryThis([].join);
Expand All @@ -56,32 +58,131 @@ var splice = uncurryThis([].splice);
var split = uncurryThis(''.split);
var stringSlice = uncurryThis(''.slice);

var plus = /\+/g;
var sequences = Array(4);
var FALLBACK_REPLACER = '\uFFFD';
var charCodeAt = uncurryThis(''.charCodeAt);
var substring = uncurryThis(''.substring);
var indexOf = uncurryThis(''.indexOf);
var fromCharCode = String.fromCharCode;
var fromCodePoint = getBuiltIn('String', 'fromCodePoint');
var $parseInt = parseInt;

var parseHexOctet = function (string, start) {
return $parseInt(stringSlice(string, start, start + 2), 16);
};

var percentSequence = function (bytes) {
return sequences[bytes - 1] || (sequences[bytes - 1] = RegExp('((?:%[\\da-f]{2}){' + bytes + '})', 'gi'));
var getLeadingOnes = function (octet) {
var binString = $toString(octet, 2);
return indexOf(binString, '0') !== -1 ? indexOf(binString, '0') : binString.length;
};

var percentDecode = function (sequence) {
try {
return decodeURIComponent(sequence);
} catch (error) {
return sequence;
var utf8Decode = function (octets) {
var len = octets.length;
var codePoint = null;

switch (len) {
case 1:
codePoint = octets[0];
break;
case 2:
codePoint = (octets[0] & 0x1F) << 6 | (octets[1] & 0x3F);
break;
case 3:
codePoint = (octets[0] & 0x0F) << 12 | (octets[1] & 0x3F) << 6 | (octets[2] & 0x3F);
break;
case 4:
codePoint = (octets[0] & 0x07) << 18 | (octets[1] & 0x3F) << 12 | (octets[2] & 0x3F) << 6 | (octets[3] & 0x3F);
break;
}

return codePoint > 0x10FFFF ? null : codePoint;
};

var deserialize = function (it) {
var result = replace(it, plus, ' ');
var bytes = 4;
try {
return decodeURIComponent(result);
} catch (error) {
while (bytes) {
result = replace(result, percentSequence(bytes--), percentDecode);
/* eslint-disable max-statements -- TODO */
var decode = function (input, preserveEscapeSet) {
var length = input.length;
var result = '';
var i = 0;

while (i < length) {
var charCode = charCodeAt(input, i);
var decodedChar = input[i];

if (charCode === 0x25) {
if (i + 3 > length) {
result += FALLBACK_REPLACER;
break;
}

var escapeSequence = substring(input, i, i + 3);
var octet = parseHexOctet(input, i + 1);

if (isNaN(octet)) {
result += FALLBACK_REPLACER;
i += 3;
continue;
}

i += 2;
var byteSequenceLength = getLeadingOnes(octet);

if (byteSequenceLength === 0) {
var asciiChar = fromCharCode(octet);
decodedChar = $includes(preserveEscapeSet, asciiChar, undefined) ? escapeSequence : asciiChar;
} else {
if (byteSequenceLength === 1 || byteSequenceLength > 4) {
result += FALLBACK_REPLACER;
i++;
continue;
}

var octets = [octet];
var sequenceIndex = 1;

while (sequenceIndex < byteSequenceLength) {
i++;
if (i + 3 > length) {
result += FALLBACK_REPLACER;
break;
}
if (charCodeAt(input, i) !== 0x25) {
result += FALLBACK_REPLACER;
break;
}
var nextByte = parseHexOctet(input, i + 1);
if (isNaN(nextByte)) {
result += FALLBACK_REPLACER;
i += 3;
break;
}
octets.push(nextByte);
i += 2;
sequenceIndex++;
}

if (octets.length !== byteSequenceLength) {
result += FALLBACK_REPLACER;
continue;
}

var codePoint = utf8Decode(octets);
if (codePoint === null) {
result += FALLBACK_REPLACER;
} else {
decodedChar = fromCodePoint(codePoint);
}
}
}
return result;

result += decodedChar;
i++;
}

return result;
};

var deserialize = function (it) {
var preserveEscapeSet = ['%', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '#'];
return decode(it, preserveEscapeSet);
};

var find = /[!'()~]|%20/g;
Expand Down
28 changes: 28 additions & 0 deletions tests/unit-global/web.url-search-params.js
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,34 @@ QUnit.test('URLSearchParams', assert => {
params = new URLSearchParams(params.toString());
assert.same(params.get('query'), '+15555555555', 'parse encoded +');

params = new URLSearchParams('b=%2sf%2a');
assert.same(params.get('b'), '%2sf*', 'parse encoded %2sf%2a');
params = new URLSearchParams('b=%%2a');
assert.same(params.get('b'), '%*', 'parse encoded b=%%2a');

params = new URLSearchParams('a=b\u2384');
assert.same(params.get('a'), 'b\u2384', 'parse \u2384');
params = new URLSearchParams('a\u2384b=c');
assert.same(params.get('a\u2384b'), 'c', 'parse \u2384');

params = new URLSearchParams('a=b%e2%8e%84');
assert.same(params.get('a'), 'b\u2384', 'parse b%e2%8e%84');
params = new URLSearchParams('a%e2%8e%84b=c');
assert.same(params.get('a\u2384b'), 'c', 'parse b%e2%8e%84');

params = new URLSearchParams('a=b\uD83D\uDCA9c');
assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse \uD83D\uDCA9');
params = new URLSearchParams('a\uD83D\uDCA9b=c');
assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse \uD83D\uDCA9');

params = new URLSearchParams('a=b%f0%9f%92%a9c');
assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse %f0%9f%92%a9');
params = new URLSearchParams('a%f0%9f%92%a9b=c');
assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse %f0%9f%92%a9');

assert.same(String(new URLSearchParams('%C2')), '%EF%BF%BD=');
assert.same(String(new URLSearchParams('%F0%9F%D0%90')), '%EF%BF%BD%D0%90=');

const testData = [
{ input: '?a=%', output: [['a', '%']], name: 'handling %' },
{ input: { '+': '%C2' }, output: [['+', '%C2']], name: 'object with +' },
Expand Down
28 changes: 28 additions & 0 deletions tests/unit-pure/web.url-search-params.js
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,34 @@ QUnit.test('URLSearchParams', assert => {
params = new URLSearchParams(params.toString());
assert.same(params.get('query'), '+15555555555', 'parse encoded +');

params = new URLSearchParams('b=%2sf%2a');
assert.same(params.get('b'), '%2sf*', 'parse encoded %2sf%2a');
params = new URLSearchParams('b=%%2a');
assert.same(params.get('b'), '%*', 'parse encoded b=%%2a');

params = new URLSearchParams('a=b\u2384');
assert.same(params.get('a'), 'b\u2384', 'parse \u2384');
params = new URLSearchParams('a\u2384b=c');
assert.same(params.get('a\u2384b'), 'c', 'parse \u2384');

params = new URLSearchParams('a=b%e2%8e%84');
assert.same(params.get('a'), 'b\u2384', 'parse b%e2%8e%84');
params = new URLSearchParams('a%e2%8e%84b=c');
assert.same(params.get('a\u2384b'), 'c', 'parse b%e2%8e%84');

params = new URLSearchParams('a=b\uD83D\uDCA9c');
assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse \uD83D\uDCA9');
params = new URLSearchParams('a\uD83D\uDCA9b=c');
assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse \uD83D\uDCA9');

params = new URLSearchParams('a=b%f0%9f%92%a9c');
assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse %f0%9f%92%a9');
params = new URLSearchParams('a%f0%9f%92%a9b=c');
assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse %f0%9f%92%a9');

assert.same(String(new URLSearchParams('%C2')), '%EF%BF%BD=');
assert.same(String(new URLSearchParams('%F0%9F%D0%90')), '%EF%BF%BD%D0%90=');

const testData = [
{ input: '?a=%', output: [['a', '%']], name: 'handling %' },
{ input: { '+': '%C2' }, output: [['+', '%C2']], name: 'object with +' },
Expand Down

0 comments on commit c48c4ec

Please sign in to comment.