From 677e6f3439b91769a9b54c18afe5c136c14d9e8a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pol=20Valletb=C3=B3?= <pol.valletbo@glovoapp.com>
Date: Wed, 11 Oct 2023 13:09:12 +0200
Subject: [PATCH 1/5] fix: use unescape_byte function for Byte literals

---
 crates/parser/src/lexed_str.rs                |  2 +-
 .../lexer/err/byte_char_literals.rast         | 28 ++++++++-----------
 .../test_data/lexer/err/byte_char_literals.rs |  3 --
 3 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs
index 8e8bdce1eef26..84cedc1fa3f0f 100644
--- a/crates/parser/src/lexed_str.rs
+++ b/crates/parser/src/lexed_str.rs
@@ -274,7 +274,7 @@ impl<'a> Converter<'a> {
                     let text = &self.res.text[self.offset + 2..][..len - 2];
                     let i = text.rfind('\'').unwrap();
                     let text = &text[..i];
-                    if let Err(e) = rustc_lexer::unescape::unescape_char(text) {
+                    if let Err(e) = rustc_lexer::unescape::unescape_byte(text) {
                         err = error_to_diagnostic_message(e, Mode::Byte);
                     }
                 }
diff --git a/crates/parser/test_data/lexer/err/byte_char_literals.rast b/crates/parser/test_data/lexer/err/byte_char_literals.rast
index 24892bc239486..7603c9099daad 100644
--- a/crates/parser/test_data/lexer/err/byte_char_literals.rast
+++ b/crates/parser/test_data/lexer/err/byte_char_literals.rast
@@ -22,9 +22,9 @@ BYTE "b'\\'a'" error: character literal may only contain one codepoint
 WHITESPACE "\n"
 BYTE "b'\\0a'" error: character literal may only contain one codepoint
 WHITESPACE "\n"
-BYTE "b'\\u{0}x'" error: character literal may only contain one codepoint
+BYTE "b'\\u{0}x'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{1F63b}}'" error: character literal may only contain one codepoint
+BYTE "b'\\u{1F63b}}'" error: unicode escape in byte string
 WHITESPACE "\n"
 BYTE "b'\\v'" error: unknown byte escape
 WHITESPACE "\n"
@@ -50,12 +50,6 @@ BYTE "b'\\x🦀'" error: invalid character in numeric character escape
 WHITESPACE "\n"
 BYTE "b'\\xtt'" error: invalid character in numeric character escape
 WHITESPACE "\n"
-BYTE "b'\\xff'" error: out of range hex escape
-WHITESPACE "\n"
-BYTE "b'\\xFF'" error: out of range hex escape
-WHITESPACE "\n"
-BYTE "b'\\x80'" error: out of range hex escape
-WHITESPACE "\n"
 BYTE "b'\\u'" error: incorrect unicode escape sequence
 WHITESPACE "\n"
 BYTE "b'\\u[0123]'" error: incorrect unicode escape sequence
@@ -72,21 +66,21 @@ BYTE "b'\\u{_0000}'" error: invalid start of unicode escape
 WHITESPACE "\n"
 BYTE "b'\\u{0000000}'" error: overlong unicode escape
 WHITESPACE "\n"
-BYTE "b'\\u{FFFFFF}'" error: invalid unicode character escape
+BYTE "b'\\u{FFFFFF}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
+BYTE "b'\\u{ffffff}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
+BYTE "b'\\u{ffffff}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{DC00}'" error: invalid unicode character escape
+BYTE "b'\\u{DC00}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{DDDD}'" error: invalid unicode character escape
+BYTE "b'\\u{DDDD}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{DFFF}'" error: invalid unicode character escape
+BYTE "b'\\u{DFFF}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{D800}'" error: invalid unicode character escape
+BYTE "b'\\u{D800}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{DAAA}'" error: invalid unicode character escape
+BYTE "b'\\u{DAAA}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{DBFF}'" error: invalid unicode character escape
+BYTE "b'\\u{DBFF}'" error: unicode escape in byte string
 WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/byte_char_literals.rs b/crates/parser/test_data/lexer/err/byte_char_literals.rs
index 9f2f4309e7692..b2d06e490bd6f 100644
--- a/crates/parser/test_data/lexer/err/byte_char_literals.rs
+++ b/crates/parser/test_data/lexer/err/byte_char_literals.rs
@@ -25,9 +25,6 @@ b'\xx'
 b'\xы'
 b'\x🦀'
 b'\xtt'
-b'\xff'
-b'\xFF'
-b'\x80'
 b'\u'
 b'\u[0123]'
 b'\u{0x}'

From e1aeb7fa794e228ca9099ac7679e8a1d0b22238a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pol=20Valletb=C3=B3?= <pol.valletbo@glovoapp.com>
Date: Wed, 11 Oct 2023 15:25:52 +0200
Subject: [PATCH 2/5] fix: handle errors for string byte string and c_string

---
 crates/parser/src/lexed_str.rs                | 42 ++++++++++++++++++-
 .../test_data/lexer/err/byte_strings.rast     | 28 +++++++++++++
 .../test_data/lexer/err/byte_strings.rs       | 14 +++++++
 .../parser/test_data/lexer/err/c_strings.rast | 28 +++++++++++++
 .../parser/test_data/lexer/err/c_strings.rs   | 14 +++++++
 .../parser/test_data/lexer/err/strings.rast   | 28 +++++++++++++
 crates/parser/test_data/lexer/err/strings.rs  | 14 +++++++
 7 files changed, 167 insertions(+), 1 deletion(-)
 create mode 100644 crates/parser/test_data/lexer/err/byte_strings.rast
 create mode 100644 crates/parser/test_data/lexer/err/byte_strings.rs
 create mode 100644 crates/parser/test_data/lexer/err/c_strings.rast
 create mode 100644 crates/parser/test_data/lexer/err/c_strings.rs
 create mode 100644 crates/parser/test_data/lexer/err/strings.rast
 create mode 100644 crates/parser/test_data/lexer/err/strings.rs

diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs
index 84cedc1fa3f0f..c2e25daf37f6e 100644
--- a/crates/parser/src/lexed_str.rs
+++ b/crates/parser/src/lexed_str.rs
@@ -8,7 +8,10 @@
 //! Note that these tokens, unlike the tokens we feed into the parser, do
 //! include info about comments and whitespace.
 
-use rustc_dependencies::lexer as rustc_lexer;
+use rustc_dependencies::lexer::{
+    self as rustc_lexer,
+    unescape::{unescape_c_string, unescape_literal},
+};
 
 use std::ops;
 
@@ -284,18 +287,45 @@ impl<'a> Converter<'a> {
             rustc_lexer::LiteralKind::Str { terminated } => {
                 if !terminated {
                     err = "Missing trailing `\"` symbol to terminate the string literal";
+                } else {
+                    let text = &self.res.text[self.offset + 1..][..len - 1];
+                    let i = text.rfind('"').unwrap();
+                    let text = &text[..i];
+                    rustc_lexer::unescape::unescape_literal(text, Mode::Str, &mut |_, res| {
+                        if let Err(e) = res {
+                            err = error_to_diagnostic_message(e, Mode::Str);
+                        }
+                    });
                 }
                 STRING
             }
             rustc_lexer::LiteralKind::ByteStr { terminated } => {
                 if !terminated {
                     err = "Missing trailing `\"` symbol to terminate the byte string literal";
+                } else {
+                    let text = &self.res.text[self.offset + 2..][..len - 2];
+                    let i = text.rfind('"').unwrap();
+                    let text = &text[..i];
+                    rustc_lexer::unescape::unescape_literal(text, Mode::ByteStr, &mut |_, res| {
+                        if let Err(e) = res {
+                            err = error_to_diagnostic_message(e, Mode::ByteStr);
+                        }
+                    })
                 }
                 BYTE_STRING
             }
             rustc_lexer::LiteralKind::CStr { terminated } => {
                 if !terminated {
                     err = "Missing trailing `\"` symbol to terminate the string literal";
+                } else {
+                    let text = &self.res.text[self.offset + 2..][..len - 2];
+                    let i = text.rfind('"').unwrap();
+                    let text = &text[..i];
+                    rustc_lexer::unescape::unescape_c_string(text, Mode::CStr, &mut |_, res| {
+                        if let Err(e) = res {
+                            err = error_to_diagnostic_message(e, Mode::CStr);
+                        }
+                    })
                 }
                 C_STRING
             }
@@ -360,3 +390,13 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
         EscapeError::MultipleSkippedLinesWarning => "",
     }
 }
+
+fn fill_unescape_string_error(text: &str, mode: Mode, mut error_message: &str) {
+
+    rustc_lexer::unescape::unescape_c_string(text, mode, &mut |_, res| {
+        if let Err(e) = res {
+            error_message = error_to_diagnostic_message(e, mode);
+        }
+    });
+}
+
diff --git a/crates/parser/test_data/lexer/err/byte_strings.rast b/crates/parser/test_data/lexer/err/byte_strings.rast
new file mode 100644
index 0000000000000..e8d8ff8cefb42
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/byte_strings.rast
@@ -0,0 +1,28 @@
+BYTE_STRING "b\"\\💩\"" error: unknown byte escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\●\"" error: unknown byte escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{_0000}\"" error: invalid start of unicode escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{0000000}\"" error: overlong unicode escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{FFFFFF}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DC00}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DDDD}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DFFF}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{D800}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DAAA}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DBFF}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\xы\"" error: invalid character in numeric character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/byte_strings.rs b/crates/parser/test_data/lexer/err/byte_strings.rs
new file mode 100644
index 0000000000000..e74847137b1ea
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/byte_strings.rs
@@ -0,0 +1,14 @@
+b"\💩"
+b"\●"
+b"\u{_0000}"
+b"\u{0000000}"
+b"\u{FFFFFF}"
+b"\u{ffffff}"
+b"\u{ffffff}"
+b"\u{DC00}"
+b"\u{DDDD}"
+b"\u{DFFF}"
+b"\u{D800}"
+b"\u{DAAA}"
+b"\u{DBFF}"
+b"\xы"
diff --git a/crates/parser/test_data/lexer/err/c_strings.rast b/crates/parser/test_data/lexer/err/c_strings.rast
new file mode 100644
index 0000000000000..1b4424ba5c781
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/c_strings.rast
@@ -0,0 +1,28 @@
+C_STRING "c\"\\💩\"" error: unknown character escape
+WHITESPACE "\n"
+C_STRING "c\"\\●\"" error: unknown character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{_0000}\"" error: invalid start of unicode escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{0000000}\"" error: overlong unicode escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{FFFFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DC00}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DDDD}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{D800}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DAAA}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DBFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\xы\"" error: invalid character in numeric character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/c_strings.rs b/crates/parser/test_data/lexer/err/c_strings.rs
new file mode 100644
index 0000000000000..1b78ffc28a00d
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/c_strings.rs
@@ -0,0 +1,14 @@
+c"\💩"
+c"\●"
+c"\u{_0000}"
+c"\u{0000000}"
+c"\u{FFFFFF}"
+c"\u{ffffff}"
+c"\u{ffffff}"
+c"\u{DC00}"
+c"\u{DDDD}"
+c"\u{DFFF}"
+c"\u{D800}"
+c"\u{DAAA}"
+c"\u{DBFF}"
+c"\xы"
diff --git a/crates/parser/test_data/lexer/err/strings.rast b/crates/parser/test_data/lexer/err/strings.rast
new file mode 100644
index 0000000000000..0cd1747208e4d
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/strings.rast
@@ -0,0 +1,28 @@
+STRING "\"\\💩\"" error: unknown character escape
+WHITESPACE "\n"
+STRING "\"\\●\"" error: unknown character escape
+WHITESPACE "\n"
+STRING "\"\\u{_0000}\"" error: invalid start of unicode escape
+WHITESPACE "\n"
+STRING "\"\\u{0000000}\"" error: overlong unicode escape
+WHITESPACE "\n"
+STRING "\"\\u{FFFFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DC00}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DDDD}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{D800}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DAAA}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DBFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\xы\"" error: invalid character in numeric character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/strings.rs b/crates/parser/test_data/lexer/err/strings.rs
new file mode 100644
index 0000000000000..2499516d3fa9f
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/strings.rs
@@ -0,0 +1,14 @@
+"\💩"
+"\●"
+"\u{_0000}"
+"\u{0000000}"
+"\u{FFFFFF}"
+"\u{ffffff}"
+"\u{ffffff}"
+"\u{DC00}"
+"\u{DDDD}"
+"\u{DFFF}"
+"\u{D800}"
+"\u{DAAA}"
+"\u{DBFF}"
+"\xы"

From b769f34f6371b13f7ce81cefe65579911331eb16 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pol=20Valletb=C3=B3?= <pol.valletbo@glovoapp.com>
Date: Wed, 11 Oct 2023 15:45:45 +0200
Subject: [PATCH 3/5] chore: move common code to function

---
 crates/parser/src/lexed_str.rs | 46 ++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs
index c2e25daf37f6e..4d322f21ae70a 100644
--- a/crates/parser/src/lexed_str.rs
+++ b/crates/parser/src/lexed_str.rs
@@ -291,11 +291,7 @@ impl<'a> Converter<'a> {
                     let text = &self.res.text[self.offset + 1..][..len - 1];
                     let i = text.rfind('"').unwrap();
                     let text = &text[..i];
-                    rustc_lexer::unescape::unescape_literal(text, Mode::Str, &mut |_, res| {
-                        if let Err(e) = res {
-                            err = error_to_diagnostic_message(e, Mode::Str);
-                        }
-                    });
+                    err = unescape_string_error_message(text, Mode::Str);
                 }
                 STRING
             }
@@ -306,11 +302,7 @@ impl<'a> Converter<'a> {
                     let text = &self.res.text[self.offset + 2..][..len - 2];
                     let i = text.rfind('"').unwrap();
                     let text = &text[..i];
-                    rustc_lexer::unescape::unescape_literal(text, Mode::ByteStr, &mut |_, res| {
-                        if let Err(e) = res {
-                            err = error_to_diagnostic_message(e, Mode::ByteStr);
-                        }
-                    })
+                    err = unescape_string_error_message(text, Mode::ByteStr);
                 }
                 BYTE_STRING
             }
@@ -321,11 +313,7 @@ impl<'a> Converter<'a> {
                     let text = &self.res.text[self.offset + 2..][..len - 2];
                     let i = text.rfind('"').unwrap();
                     let text = &text[..i];
-                    rustc_lexer::unescape::unescape_c_string(text, Mode::CStr, &mut |_, res| {
-                        if let Err(e) = res {
-                            err = error_to_diagnostic_message(e, Mode::CStr);
-                        }
-                    })
+                    err = unescape_string_error_message(text, Mode::CStr);
                 }
                 C_STRING
             }
@@ -391,12 +379,26 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
     }
 }
 
-fn fill_unescape_string_error(text: &str, mode: Mode, mut error_message: &str) {
-
-    rustc_lexer::unescape::unescape_c_string(text, mode, &mut |_, res| {
-        if let Err(e) = res {
-            error_message = error_to_diagnostic_message(e, mode);
+fn unescape_string_error_message(text: &str, mode: Mode) -> &'static str {
+    let mut error_message = "";
+    match mode {
+        Mode::CStr => {
+            rustc_lexer::unescape::unescape_c_string(text, mode, &mut |_, res| {
+                if let Err(e) = res {
+                    error_message = error_to_diagnostic_message(e, mode);
+                }
+            });
+        }
+        Mode::ByteStr | Mode::Str => {
+            rustc_lexer::unescape::unescape_literal(text, mode, &mut |_, res| {
+                if let Err(e) = res {
+                    error_message = error_to_diagnostic_message(e, mode);
+                }
+            });
         }
-    });
+        _ => {
+            // Other Modes are not supported yet or do not apply
+        }
+    }
+    error_message
 }
-

From 4b281ffdf2c0315722729ec090f74c0d49feca1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pol=20Valletb=C3=B3?= <pol.valletbo@glovoapp.com>
Date: Wed, 11 Oct 2023 15:52:05 +0200
Subject: [PATCH 4/5] chore: format imports

---
 crates/parser/src/lexed_str.rs | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs
index 4d322f21ae70a..13189b8bd003d 100644
--- a/crates/parser/src/lexed_str.rs
+++ b/crates/parser/src/lexed_str.rs
@@ -7,11 +7,9 @@
 //!
 //! Note that these tokens, unlike the tokens we feed into the parser, do
 //! include info about comments and whitespace.
+//
 
-use rustc_dependencies::lexer::{
-    self as rustc_lexer,
-    unescape::{unescape_c_string, unescape_literal},
-};
+use rustc_dependencies::lexer as rustc_lexer;
 
 use std::ops;
 

From 6845c80a2fd52e2d8c58bda0e55c39c4bb836ad1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pol=20Valletb=C3=B3?= <pol.valletbo@glovoapp.com>
Date: Wed, 11 Oct 2023 15:52:22 +0200
Subject: [PATCH 5/5] fix: format

---
 crates/parser/src/lexed_str.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs
index 13189b8bd003d..b9e7566fdf9bc 100644
--- a/crates/parser/src/lexed_str.rs
+++ b/crates/parser/src/lexed_str.rs
@@ -7,7 +7,6 @@
 //!
 //! Note that these tokens, unlike the tokens we feed into the parser, do
 //! include info about comments and whitespace.
-//
 
 use rustc_dependencies::lexer as rustc_lexer;