From 60da4b085ea707ff9cf11dca679b7ad6289323d0 Mon Sep 17 00:00:00 2001
From: joyeecheung <joyeec9h3@gmail.com>
Date: Thu, 1 Dec 2016 19:11:43 -0600
Subject: [PATCH] test, url: improve escaping in url.parse

- rename variables in autoEscapeStr so they are easier to understand
- comment the escaping algorithm
- increase coverage for autoEscapeStr
---
 lib/url.js                | 136 ++++++++++++++++++++------------------
 test/parallel/test-url.js |  14 ++++
 2 files changed, 85 insertions(+), 65 deletions(-)

diff --git a/lib/url.js b/lib/url.js
index 5f87c0c36c43e5..03a9fb03dfcca7 100644
--- a/lib/url.js
+++ b/lib/url.js
@@ -437,105 +437,111 @@ function validateHostname(self, rest, hostname) {
   }
 }
 
+// Automatically escape all delimiters and unwise characters from RFC 2396.
+// Also escape single quotes in case of an XSS attack.
+// Return undefined if the string doesn't need escaping,
+// otherwise return the escaped string.
 function autoEscapeStr(rest) {
-  var newRest = '';
-  var lastPos = 0;
+  var escaped = '';
+  var lastEscapedPos = 0;
   for (var i = 0; i < rest.length; ++i) {
-    // Automatically escape all delimiters and unwise characters from RFC 2396
-    // Also escape single quotes in case of an XSS attack
+    // Manual switching is faster than using a Map/Object.
+    // `escaped` contains substring up to the last escaped cahracter.
     switch (rest.charCodeAt(i)) {
       case 9:   // '\t'
-        if (i - lastPos > 0)
-          newRest += rest.slice(lastPos, i);
-        newRest += '%09';
-        lastPos = i + 1;
+        // Concat if there are ordinary characters in the middle.
+        if (i > lastEscapedPos)
+          escaped += rest.slice(lastEscapedPos, i);
+        escaped += '%09';
+        lastEscapedPos = i + 1;
         break;
       case 10:  // '\n'
-        if (i - lastPos > 0)
-          newRest += rest.slice(lastPos, i);
-        newRest += '%0A';
-        lastPos = i + 1;
+        if (i > lastEscapedPos)
+          escaped += rest.slice(lastEscapedPos, i);
+        escaped += '%0A';
+        lastEscapedPos = i + 1;
         break;
       case 13:  // '\r'
-        if (i - lastPos > 0)
-          newRest += rest.slice(lastPos, i);
-        newRest += '%0D';
-        lastPos = i + 1;
+        if (i > lastEscapedPos)
+          escaped += rest.slice(lastEscapedPos, i);
+        escaped += '%0D';
+        lastEscapedPos = i + 1;
         break;
       case 32:  // ' '
-        if (i - lastPos > 0)
-          newRest += rest.slice(lastPos, i);
-        newRest += '%20';
-        lastPos = i + 1;
+        if (i > lastEscapedPos)
+          escaped += rest.slice(lastEscapedPos, i);
+        escaped += '%20';
+        lastEscapedPos = i + 1;
         break;
       case 34:  // '"'
-        if (i - lastPos > 0)
-          newRest += rest.slice(lastPos, i);
-        newRest += '%22';
-        lastPos = i + 1;
+        if (i > lastEscapedPos)
+          escaped += rest.slice(lastEscapedPos, i);
+        escaped += '%22';
+        lastEscapedPos = i + 1;
         break;
       case 39:  // '\''
-        if (i - lastPos > 0)
-          newRest += rest.slice(lastPos, i);
-        newRest += '%27';
-        lastPos = i + 1;
+        if (i > lastEscapedPos)
+          escaped += rest.slice(lastEscapedPos, i);
+        escaped += '%27';
+        lastEscapedPos = i + 1;
         break;
       case 60:  // '<'
-        if (i - lastPos > 0)
-          newRest += rest.slice(lastPos, i);
-        newRest += '%3C';
-        lastPos = i + 1;
+        if (i > lastEscapedPos)
+          escaped += rest.slice(lastEscapedPos, i);
+        escaped += '%3C';
+        lastEscapedPos = i + 1;
         break;
       case 62:  // '>'
-        if (i - lastPos > 0)
-          newRest += rest.slice(lastPos, i);
-        newRest += '%3E';
-        lastPos = i + 1;
+        if (i > lastEscapedPos)
+          escaped += rest.slice(lastEscapedPos, i);
+        escaped += '%3E';
+        lastEscapedPos = i + 1;
         break;
       case 92:  // '\\'
-        if (i - lastPos > 0)
-          newRest += rest.slice(lastPos, i);
-        newRest += '%5C';
-        lastPos = i + 1;
+        if (i > lastEscapedPos)
+          escaped += rest.slice(lastEscapedPos, i);
+        escaped += '%5C';
+        lastEscapedPos = i + 1;
         break;
       case 94:  // '^'
-        if (i - lastPos > 0)
-          newRest += rest.slice(lastPos, i);
-        newRest += '%5E';
-        lastPos = i + 1;
+        if (i > lastEscapedPos)
+          escaped += rest.slice(lastEscapedPos, i);
+        escaped += '%5E';
+        lastEscapedPos = i + 1;
         break;
       case 96:  // '`'
-        if (i - lastPos > 0)
-          newRest += rest.slice(lastPos, i);
-        newRest += '%60';
-        lastPos = i + 1;
+        if (i > lastEscapedPos)
+          escaped += rest.slice(lastEscapedPos, i);
+        escaped += '%60';
+        lastEscapedPos = i + 1;
         break;
       case 123: // '{'
-        if (i - lastPos > 0)
-          newRest += rest.slice(lastPos, i);
-        newRest += '%7B';
-        lastPos = i + 1;
+        if (i > lastEscapedPos)
+          escaped += rest.slice(lastEscapedPos, i);
+        escaped += '%7B';
+        lastEscapedPos = i + 1;
         break;
       case 124: // '|'
-        if (i - lastPos > 0)
-          newRest += rest.slice(lastPos, i);
-        newRest += '%7C';
-        lastPos = i + 1;
+        if (i > lastEscapedPos)
+          escaped += rest.slice(lastEscapedPos, i);
+        escaped += '%7C';
+        lastEscapedPos = i + 1;
         break;
       case 125: // '}'
-        if (i - lastPos > 0)
-          newRest += rest.slice(lastPos, i);
-        newRest += '%7D';
-        lastPos = i + 1;
+        if (i > lastEscapedPos)
+          escaped += rest.slice(lastEscapedPos, i);
+        escaped += '%7D';
+        lastEscapedPos = i + 1;
         break;
     }
   }
-  if (lastPos === 0)
+  if (lastEscapedPos === 0)  // Nothing has been escaped.
     return;
-  if (lastPos < rest.length)
-    return newRest + rest.slice(lastPos);
-  else
-    return newRest;
+  // There are ordinary characters at the end.
+  if (lastEscapedPos < rest.length)
+    return escaped + rest.slice(lastEscapedPos);
+  else  // The last character is escaped.
+    return escaped;
 }
 
 // format a parsed object into a url string
diff --git a/test/parallel/test-url.js b/test/parallel/test-url.js
index 8bbc7194732368..c97caa36429a9c 100644
--- a/test/parallel/test-url.js
+++ b/test/parallel/test-url.js
@@ -834,6 +834,20 @@ var parseTests = {
     query: '@c'
   },
 
+  'http://a.b/\tbc\ndr\ref g"hq\'j<kl>?mn\\op^q=r`99{st|uv}wz': {
+    protocol: 'http:',
+    slashes: true,
+    host: 'a.b',
+    port: null,
+    hostname: 'a.b',
+    hash: null,
+    pathname: '/%09bc%0Adr%0Def%20g%22hq%27j%3Ckl%3E',
+    path: '/%09bc%0Adr%0Def%20g%22hq%27j%3Ckl%3E?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz',
+    search: '?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz',
+    query: 'mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz',
+    href: 'http://a.b/%09bc%0Adr%0Def%20g%22hq%27j%3Ckl%3E?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz'
+  },
+
   'http://a\r" \t\n<\'b:b@c\r\nd/e?f': {
     protocol: 'http:',
     slashes: true,