diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index c4c85570484cf..f5b15ca5d54c2 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -3,11 +3,11 @@ use crate::ast::{self, LitKind, MetaItemLit, StrStyle}; use crate::token::{self, Token}; use rustc_lexer::unescape::{ - byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit, - Mode, + byte_from_char, unescape_c_string, unescape_literal, CStrUnit, EscapeError, Mode, }; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; +use std::ops::Range; use std::{ascii, fmt, str}; // Escapes a string, represented as a symbol. Reuses the original symbol, @@ -33,6 +33,14 @@ pub fn escape_byte_str_symbol(bytes: &[u8]) -> Symbol { #[derive(Debug)] pub enum LitError { LexerError, + EscapeError { + mode: Mode, + // Length before the string content, e.g. 1 for "a", 5 for br##"a"## + prefix_len: u32, + // The range is the byte range of the bad character, using a zero index. + range: Range, + err: EscapeError, + }, InvalidSuffix, InvalidIntSuffix, InvalidFloatSuffix, @@ -41,155 +49,253 @@ pub enum LitError { } impl LitKind { - /// Converts literal token into a semantic literal. - pub fn from_token_lit(lit: token::Lit) -> Result { + /// Converts literal token into a semantic literal. The return value has + /// two parts: + /// - The `Result` indicates success or failure. + /// - The `Vec` contains all found errors and warnings. + /// + /// If we only had to deal with errors, we could use the more obvious + /// `Result>`; on failure the caller would just + /// (optionally) print errors and take the error path and stop early. But + /// it's possible to succeed with zero errors and one or more warnings, and + /// in that case the caller should (optionally) print the warnings, but + /// also proceed with a valid `LitKind`. This return type facilitates that. + pub fn from_token_lit_with_errs(lit: token::Lit) -> (Result, Vec) { let token::Lit { kind, symbol, suffix } = lit; if suffix.is_some() && !kind.may_have_suffix() { - return Err(LitError::InvalidSuffix); + // Note: we return a single error here. We could instead continue + // processing, possibly returning multiple errors. + return (Err(()), vec![LitError::InvalidSuffix]); } - Ok(match kind { + let mut errs = vec![]; + let mut has_fatal = false; + + let res = match kind { token::Bool => { assert!(symbol.is_bool_lit()); - LitKind::Bool(symbol == kw::True) + Ok(LitKind::Bool(symbol == kw::True)) } token::Byte => { - return unescape_byte(symbol.as_str()) - .map(LitKind::Byte) - .map_err(|_| LitError::LexerError); + let mode = Mode::Byte; + let mut res = None; + unescape_literal(symbol.as_str(), mode, &mut |range, unescaped_char| { + match unescaped_char { + Ok(c) => res = Some(c), + Err(err) => { + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2, // b' + range, + err, + }); + } + } + }); + if !has_fatal { Ok(LitKind::Byte(byte_from_char(res.unwrap()))) } else { Err(()) } } token::Char => { - return unescape_char(symbol.as_str()) - .map(LitKind::Char) - .map_err(|_| LitError::LexerError); + let mode = Mode::Char; + let mut res = None; + unescape_literal(symbol.as_str(), mode, &mut |range, unescaped_char| { + match unescaped_char { + Ok(c) => res = Some(c), + Err(err) => { + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 1, // ' + range, + err, + }); + } + } + }); + if !has_fatal { Ok(LitKind::Char(res.unwrap())) } else { Err(()) } } // There are some valid suffixes for integer and float literals, // so all the handling is done internally. - token::Integer => return integer_lit(symbol, suffix), - token::Float => return float_lit(symbol, suffix), + token::Integer => { + return match integer_lit(symbol, suffix) { + Ok(lit_kind) => (Ok(lit_kind), vec![]), + Err(err) => (Err(()), vec![err]), + }; + } + token::Float => { + return match float_lit(symbol, suffix) { + Ok(lit_kind) => (Ok(lit_kind), vec![]), + Err(err) => (Err(()), vec![err]), + }; + } token::Str => { // If there are no characters requiring special treatment we can // reuse the symbol from the token. Otherwise, we must generate a // new symbol because the string in the LitKind is different to the // string in the token. + let mode = Mode::Str; let s = symbol.as_str(); // Vanilla strings are so common we optimize for the common case where no chars // requiring special behaviour are present. - let symbol = if s.contains(['\\', '\r']) { + if s.contains(['\\', '\r']) { let mut buf = String::with_capacity(s.len()); - let mut error = Ok(()); // Force-inlining here is aggressive but the closure is // called on every char in the string, so it can be // hot in programs with many long strings. unescape_literal( s, - Mode::Str, + mode, &mut #[inline(always)] - |_, unescaped_char| match unescaped_char { + |range, unescaped_char| match unescaped_char { Ok(c) => buf.push(c), Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 1, // " + range, + err, + }); } }, ); - error?; - Symbol::intern(&buf) + if !has_fatal { + Ok(LitKind::Str(Symbol::intern(&buf), ast::StrStyle::Cooked)) + } else { + Err(()) + } } else { - symbol - }; - LitKind::Str(symbol, ast::StrStyle::Cooked) + Ok(LitKind::Str(symbol, ast::StrStyle::Cooked)) + } } token::StrRaw(n) => { // Raw strings have no escapes, so we only need to check for invalid chars, and we // can reuse the symbol on success. - let mut error = Ok(()); - unescape_literal(symbol.as_str(), Mode::RawStr, &mut |_, unescaped_char| { - match unescaped_char { - Ok(_) => {} - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } - } + let mode = Mode::RawStr; + let s = symbol.as_str(); + unescape_literal(s, mode, &mut |range, unescaped_char| match unescaped_char { + Ok(_) => {} + Err(err) => { + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2 + n as u32, // r", r#", r##", etc. + range, + err, + }); } }); - error?; - LitKind::Str(symbol, ast::StrStyle::Raw(n)) + if !has_fatal { Ok(LitKind::Str(symbol, ast::StrStyle::Raw(n))) } else { Err(()) } } token::ByteStr => { + let mode = Mode::ByteStr; let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - let mut error = Ok(()); - unescape_literal(s, Mode::ByteStr, &mut |_, c| match c { + unescape_literal(s, mode, &mut |range, c| match c { Ok(c) => buf.push(byte_from_char(c)), Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2, // b" + range, + err, + }); } }); - error?; - LitKind::ByteStr(buf.into(), StrStyle::Cooked) + if !has_fatal { + Ok(LitKind::ByteStr(buf.into(), StrStyle::Cooked)) + } else { + Err(()) + } } token::ByteStrRaw(n) => { // Raw strings have no escapes, so we only need to check for invalid chars, and we // can convert the symbol directly to a `Lrc` on success. + let mode = Mode::RawByteStr; let s = symbol.as_str(); - let mut error = Ok(()); - unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c { + unescape_literal(s, mode, &mut |range, c| match c { Ok(_) => {} Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 3 + n as u32, // br", br#", br##", etc. + range, + err, + }); } }); - LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n)) + if !has_fatal { + Ok(LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n))) + } else { + Err(()) + } } token::CStr => { + let mode = Mode::CStr; let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - let mut error = Ok(()); - unescape_c_string(s, Mode::CStr, &mut |_span, c| match c { + unescape_c_string(s, mode, &mut |range, c| match c { Ok(CStrUnit::Byte(b)) => buf.push(b), Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8), Ok(CStrUnit::Char(c)) => { buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) } Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2, // c" + range, + err, + }); } }); - error?; - buf.push(0); - LitKind::CStr(buf.into(), StrStyle::Cooked) + if !has_fatal { + buf.push(0); + Ok(LitKind::CStr(buf.into(), StrStyle::Cooked)) + } else { + Err(()) + } } token::CStrRaw(n) => { // Raw strings have no escapes, so we only need to check for invalid chars, and we - // can convert the symbol directly to a `Lrc` on success. + // can convert the symbol directly to a `Lrc` (after appending a nul char) on + // success. + let mode = Mode::RawCStr; let s = symbol.as_str(); - let mut error = Ok(()); - unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c { + unescape_c_string(s, mode, &mut |range, c| match c { Ok(_) => {} Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 3 + n as u32, // cr", cr#", cr##", etc. + range, + err, + }); } }); - error?; - let mut buf = s.to_owned().into_bytes(); - buf.push(0); - LitKind::CStr(buf.into(), StrStyle::Raw(n)) + if !has_fatal { + let mut buf = s.to_owned().into_bytes(); + buf.push(0); + Ok(LitKind::CStr(buf.into(), StrStyle::Raw(n))) + } else { + Err(()) + } } - token::Err => LitKind::Err, - }) + token::Err => Ok(LitKind::Err), + }; + (res, errs) + } + + // Use this one for call sites where we don't need to print error messages + // about invalid literals. + pub fn from_token_lit(lit: token::Lit) -> Result { + LitKind::from_token_lit_with_errs(lit).0 } } @@ -257,14 +363,26 @@ impl fmt::Display for LitKind { } impl MetaItemLit { - /// Converts a token literal into a meta item literal. - pub fn from_token_lit(token_lit: token::Lit, span: Span) -> Result { - Ok(MetaItemLit { + /// Converts a token literal into a meta item literal. See + /// `LitKind::from_token_lit` for an explanation of the return type. + pub fn from_token_lit_with_errs( + token_lit: token::Lit, + span: Span, + ) -> (Result, Vec) { + let (lit, errs) = LitKind::from_token_lit_with_errs(token_lit); + let lit = lit.map(|kind| MetaItemLit { symbol: token_lit.symbol, suffix: token_lit.suffix, - kind: LitKind::from_token_lit(token_lit)?, + kind, span, - }) + }); + (lit, errs) + } + + // Use this one for call sites where we don't need to print error messages + // about invalid literals. + pub fn from_token_lit(token_lit: token::Lit, span: Span) -> Result { + MetaItemLit::from_token_lit_with_errs(token_lit, span).0 } /// Cheaply converts a meta item literal into a token literal. diff --git a/compiler/rustc_ast_lowering/src/expr.rs b/compiler/rustc_ast_lowering/src/expr.rs index b60028119ceb7..1127acf62968e 100644 --- a/compiler/rustc_ast_lowering/src/expr.rs +++ b/compiler/rustc_ast_lowering/src/expr.rs @@ -14,7 +14,7 @@ use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_hir as hir; use rustc_hir::def::{DefKind, Res}; use rustc_middle::span_bug; -use rustc_parse::parser::report_lit_error; +use rustc_parse::parser::token_lit_to_lit_kind_and_report_errs; use rustc_span::source_map::{respan, Spanned}; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use rustc_span::DUMMY_SP; @@ -119,13 +119,12 @@ impl<'hir> LoweringContext<'_, 'hir> { hir::ExprKind::Unary(op, ohs) } ExprKind::Lit(token_lit) => { - let lit_kind = match LitKind::from_token_lit(*token_lit) { - Ok(lit_kind) => lit_kind, - Err(err) => { - report_lit_error(&self.tcx.sess.parse_sess, err, *token_lit, e.span); - LitKind::Err - } - }; + let lit_kind = token_lit_to_lit_kind_and_report_errs( + &self.tcx.sess.parse_sess, + *token_lit, + e.span, + ) + .unwrap_or(LitKind::Err); let lit = self.arena.alloc(respan(self.lower_span(e.span), lit_kind)); hir::ExprKind::Lit(lit) } diff --git a/compiler/rustc_builtin_macros/src/concat.rs b/compiler/rustc_builtin_macros/src/concat.rs index 6c83e8868bd31..e926418b7d065 100644 --- a/compiler/rustc_builtin_macros/src/concat.rs +++ b/compiler/rustc_builtin_macros/src/concat.rs @@ -1,7 +1,7 @@ use rustc_ast as ast; use rustc_ast::tokenstream::TokenStream; use rustc_expand::base::{self, DummyResult}; -use rustc_parse::parser::report_lit_error; +use rustc_parse::parser::token_lit_to_lit_kind_and_report_errs; use rustc_span::symbol::Symbol; use crate::errors; @@ -19,44 +19,43 @@ pub fn expand_concat( let mut has_errors = false; for e in es { match e.kind { - ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { - Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => { - accumulator.push_str(s.as_str()); - } - Ok(ast::LitKind::Char(c)) => { - accumulator.push(c); - } - Ok(ast::LitKind::Int(i, _)) => { - accumulator.push_str(&i.to_string()); - } - Ok(ast::LitKind::Bool(b)) => { - accumulator.push_str(&b.to_string()); - } - Ok(ast::LitKind::CStr(..)) => { - cx.emit_err(errors::ConcatCStrLit { span: e.span }); - has_errors = true; - } - Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => { - cx.emit_err(errors::ConcatBytestr { span: e.span }); - has_errors = true; - } - Ok(ast::LitKind::Err) => { - has_errors = true; - } - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span); - has_errors = true; + ast::ExprKind::Lit(token_lit) => { + match token_lit_to_lit_kind_and_report_errs(&cx.sess.parse_sess, token_lit, e.span) + { + Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => { + accumulator.push_str(s.as_str()); + } + Ok(ast::LitKind::Char(c)) => { + accumulator.push(c); + } + Ok(ast::LitKind::Int(i, _)) => { + accumulator.push_str(&i.to_string()); + } + Ok(ast::LitKind::Bool(b)) => { + accumulator.push_str(&b.to_string()); + } + Ok(ast::LitKind::CStr(..)) => { + cx.emit_err(errors::ConcatCStrLit { span: e.span }); + has_errors = true; + } + Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => { + cx.emit_err(errors::ConcatBytestr { span: e.span }); + has_errors = true; + } + Ok(ast::LitKind::Err) | Err(()) => { + has_errors = true; + } } - }, + } // We also want to allow negative numeric literals. ast::ExprKind::Unary(ast::UnOp::Neg, ref expr) if let ast::ExprKind::Lit(token_lit) = expr.kind => { - match ast::LitKind::from_token_lit(token_lit) { + match token_lit_to_lit_kind_and_report_errs(&cx.sess.parse_sess, token_lit, e.span) + { Ok(ast::LitKind::Int(i, _)) => accumulator.push_str(&format!("-{i}")), Ok(ast::LitKind::Float(f, _)) => accumulator.push_str(&format!("-{f}")), - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span); + Err(()) => { has_errors = true; } _ => missing_literal.push(e.span), diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs index 4ae328160f0c8..f4d3bd458fc19 100644 --- a/compiler/rustc_builtin_macros/src/concat_bytes.rs +++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs @@ -1,7 +1,7 @@ use rustc_ast as ast; use rustc_ast::{ptr::P, tokenstream::TokenStream}; use rustc_expand::base::{self, DummyResult}; -use rustc_parse::parser::report_lit_error; +use rustc_parse::parser::token_lit_to_lit_kind_and_report_errs; use rustc_span::Span; use crate::errors; @@ -17,7 +17,7 @@ fn invalid_type_err( ConcatBytesInvalid, ConcatBytesInvalidSuggestion, ConcatBytesNonU8, ConcatBytesOob, }; let snippet = cx.sess.source_map().span_to_snippet(span).ok(); - match ast::LitKind::from_token_lit(token_lit) { + match token_lit_to_lit_kind_and_report_errs(&cx.sess.parse_sess, token_lit, span) { Ok(ast::LitKind::CStr(_, _)) => { // Avoid ambiguity in handling of terminal `NUL` by refusing to // concatenate C string literals as bytes. @@ -60,9 +60,7 @@ fn invalid_type_err( cx.emit_err(ConcatBytesNonU8 { span }); } Ok(ast::LitKind::ByteStr(..) | ast::LitKind::Byte(_)) => unreachable!(), - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, span); - } + Err(()) => {} } } diff --git a/compiler/rustc_expand/src/base.rs b/compiler/rustc_expand/src/base.rs index c9bbea47185b7..f28387dd9c0dd 100644 --- a/compiler/rustc_expand/src/base.rs +++ b/compiler/rustc_expand/src/base.rs @@ -1235,26 +1235,30 @@ pub fn expr_to_spanned_string<'a>( let expr = cx.expander().fully_expand_fragment(AstFragment::Expr(expr)).make_expr(); Err(match expr.kind { - ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { - Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)), - Ok(ast::LitKind::ByteStr(..)) => { - let mut err = cx.struct_span_err(expr.span, err_msg); - let span = expr.span.shrink_to_lo(); - err.span_suggestion( - span.with_hi(span.lo() + BytePos(1)), - "consider removing the leading `b`", - "", - Applicability::MaybeIncorrect, - ); - Some((err, true)) - } - Ok(ast::LitKind::Err) => None, - Err(err) => { - parser::report_lit_error(&cx.sess.parse_sess, err, token_lit, expr.span); - None - } - _ => Some((cx.struct_span_err(expr.span, err_msg), false)), - }, + ast::ExprKind::Lit(token_lit) => { + let res = match parser::token_lit_to_lit_kind_and_report_errs( + &cx.sess.parse_sess, + token_lit, + expr.span, + ) { + Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)), + Ok(ast::LitKind::ByteStr(..)) => { + let mut err = cx.struct_span_err(expr.span, err_msg); + let span = expr.span.shrink_to_lo(); + err.span_suggestion( + span.with_hi(span.lo() + BytePos(1)), + "consider removing the leading `b`", + "", + Applicability::MaybeIncorrect, + ); + Some((err, true)) + } + Ok(ast::LitKind::Err) => None, + Err(()) => None, + _ => Some((cx.struct_span_err(expr.span, err_msg), false)), + }; + res + } ast::ExprKind::Err => None, _ => Some((cx.struct_span_err(expr.span, err_msg), false)), }) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 7c8065f3cb9b9..4941531e3ce44 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -347,7 +347,7 @@ where // them in the range computation. while let Some(c) = chars.next() { let start = src.len() - chars.as_str().len() - c.len_utf8(); - let res = match c { + let res: Result = match c { '\\' => { match chars.clone().next() { Some('\n') => { diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index b1dc1f98777a7..40a1e8703554f 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -1,5 +1,3 @@ -use std::ops::Range; - use crate::errors; use crate::lexer::unicode_chars::UNICODE_ARRAY; use crate::make_unclosed_delims_error; @@ -8,7 +6,6 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey}; -use rustc_lexer::unescape::{self, EscapeError, Mode}; use rustc_lexer::{Base, DocStyle, RawStrError}; use rustc_lexer::{Cursor, LiteralKind}; use rustc_session::lint::builtin::{ @@ -21,10 +18,10 @@ use rustc_span::{edition::Edition, BytePos, Pos, Span}; mod diagnostics; mod tokentrees; -mod unescape_error_reporting; +pub(crate) mod unescape_error_reporting; mod unicode_chars; -use unescape_error_reporting::{emit_unescape_error, escaped_char}; +use unescape_error_reporting::escaped_char; // This type is used a lot. Make sure it doesn't unintentionally get bigger. // @@ -409,7 +406,7 @@ impl<'a> StringReader<'a> { error_code!(E0762), ) } - self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' ' + self.cook_quoted(token::Char, start, end, 1, 1) // ' ' } rustc_lexer::LiteralKind::Byte { terminated } => { if !terminated { @@ -419,7 +416,7 @@ impl<'a> StringReader<'a> { error_code!(E0763), ) } - self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' ' + self.cook_quoted(token::Byte, start, end, 2, 1) // b' ' } rustc_lexer::LiteralKind::Str { terminated } => { if !terminated { @@ -429,7 +426,7 @@ impl<'a> StringReader<'a> { error_code!(E0765), ) } - self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " " + self.cook_quoted(token::Str, start, end, 1, 1) // " " } rustc_lexer::LiteralKind::ByteStr { terminated } => { if !terminated { @@ -439,7 +436,7 @@ impl<'a> StringReader<'a> { error_code!(E0766), ) } - self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " + self.cook_quoted(token::ByteStr, start, end, 2, 1) // b" " } rustc_lexer::LiteralKind::CStr { terminated } => { if !terminated { @@ -449,13 +446,13 @@ impl<'a> StringReader<'a> { error_code!(E0767), ) } - self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" " + self.cook_quoted(token::CStr, start, end, 2, 1) // c" " } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::StrRaw(n_hashes); - self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "## + self.cook_quoted(kind, start, end, 2 + n, 1 + n) // r##" "## } else { self.report_raw_str_error(start, 1); } @@ -464,7 +461,7 @@ impl<'a> StringReader<'a> { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::ByteStrRaw(n_hashes); - self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "## + self.cook_quoted(kind, start, end, 3 + n, 1 + n) // br##" "## } else { self.report_raw_str_error(start, 2); } @@ -473,7 +470,7 @@ impl<'a> StringReader<'a> { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::CStrRaw(n_hashes); - self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "## + self.cook_quoted(kind, start, end, 3 + n, 1 + n) // cr##" "## } else { self.report_raw_str_error(start, 2); } @@ -693,82 +690,18 @@ impl<'a> StringReader<'a> { self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num }); } - fn cook_common( + fn cook_quoted( &self, kind: token::LitKind, - mode: Mode, start: BytePos, end: BytePos, prefix_len: u32, postfix_len: u32, - unescape: fn(&str, Mode, &mut dyn FnMut(Range, Result<(), EscapeError>)), ) -> (token::LitKind, Symbol) { - let mut has_fatal_err = false; let content_start = start + BytePos(prefix_len); let content_end = end - BytePos(postfix_len); let lit_content = self.str_from_to(content_start, content_end); - unescape(lit_content, mode, &mut |range, result| { - // Here we only check for errors. The actual unescaping is done later. - if let Err(err) = result { - let span_with_quotes = self.mk_sp(start, end); - let (start, end) = (range.start as u32, range.end as u32); - let lo = content_start + BytePos(start); - let hi = lo + BytePos(end - start); - let span = self.mk_sp(lo, hi); - if err.is_fatal() { - has_fatal_err = true; - } - emit_unescape_error( - &self.sess.span_diagnostic, - lit_content, - span_with_quotes, - span, - mode, - range, - err, - ); - } - }); - - // We normally exclude the quotes for the symbol, but for errors we - // include it because it results in clearer error messages. - if !has_fatal_err { - (kind, Symbol::intern(lit_content)) - } else { - (token::Err, self.symbol_from_to(start, end)) - } - } - - fn cook_quoted( - &self, - kind: token::LitKind, - mode: Mode, - start: BytePos, - end: BytePos, - prefix_len: u32, - postfix_len: u32, - ) -> (token::LitKind, Symbol) { - self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_literal(src, mode, &mut |span, result| { - callback(span, result.map(drop)) - }) - }) - } - - fn cook_c_string( - &self, - kind: token::LitKind, - mode: Mode, - start: BytePos, - end: BytePos, - prefix_len: u32, - postfix_len: u32, - ) -> (token::LitKind, Symbol) { - self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_c_string(src, mode, &mut |span, result| { - callback(span, result.map(drop)) - }) - }) + (kind, Symbol::intern(lit_content)) } } diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 65a46ec6c476b..ea8c588cc03cc 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -11,26 +11,32 @@ use crate::errors::{MoreThanOneCharNote, MoreThanOneCharSugg, NoBraceUnicodeSub, pub(crate) fn emit_unescape_error( handler: &Handler, - // interior part of the literal, without quotes + // interior part of the literal, between quotes lit: &str, // full span of the literal, including quotes and any prefix full_lit_span: Span, - // span of the error part of the literal - err_span: Span, mode: Mode, + prefix_len: u32, // range of the error inside `lit` range: Range, error: EscapeError, ) { + let (start, end) = (range.start as u32, range.end as u32); + let lo = full_lit_span.lo() + BytePos(prefix_len) + BytePos(start); + let hi = lo + BytePos(end - start); + let err_span = full_lit_span.with_lo(lo).with_hi(hi); + debug!( - "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}", - lit, full_lit_span, mode, range, error + "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}, {:?}", + lit, full_lit_span, err_span, mode, range, error ); + let last_char = || { let c = lit[range.clone()].chars().next_back().unwrap(); let span = err_span.with_lo(err_span.hi() - BytePos(c.len_utf8() as u32)); (c, span) }; + match error { EscapeError::LoneSurrogateUnicodeEscape => { handler diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index e48e1c1a7be74..dfd8a47ad1d95 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -8,6 +8,7 @@ use super::{ }; use crate::errors; +use crate::lexer::unescape_error_reporting::emit_unescape_error; use crate::maybe_recover_from_interpolated_ty_qpath; use ast::mut_visit::{noop_visit_expr, MutVisitor}; use ast::{CoroutineKind, GenBlockKind, Pat, Path, PathSegment}; @@ -2047,27 +2048,30 @@ impl<'a> Parser<'a> { let recovered = self.recover_after_dot(); let token = recovered.as_ref().unwrap_or(&self.token); match token::Lit::from_token(token) { - Some(lit) => { - match MetaItemLit::from_token_lit(lit, token.span) { + Some(token_lit) => { + let err_span = token.uninterpolated_span(); + let lit = token_lit_to_meta_item_lit_and_report_errs( + self.sess, token_lit, token.span, err_span, + ); + + let res = match lit { Ok(lit) => { self.bump(); - Some(lit) + lit } - Err(err) => { - let span = token.uninterpolated_span(); + Err(()) => { self.bump(); - report_lit_error(self.sess, err, lit, span); // Pack possible quotes and prefixes from the original literal into // the error literal's symbol so they can be pretty-printed faithfully. - let suffixless_lit = token::Lit::new(lit.kind, lit.symbol, None); + let suffixless_lit = + token::Lit::new(token_lit.kind, token_lit.symbol, None); let symbol = Symbol::intern(&suffixless_lit.to_string()); - let lit = token::Lit::new(token::Err, symbol, lit.suffix); - Some( - MetaItemLit::from_token_lit(lit, span) - .unwrap_or_else(|_| unreachable!()), - ) + let lit = token::Lit::new(token::Err, symbol, token_lit.suffix); + MetaItemLit::from_token_lit(lit, err_span) + .unwrap_or_else(|_| unreachable!()) } - } + }; + Some(res) } None => None, } @@ -3667,7 +3671,34 @@ impl<'a> Parser<'a> { } } -pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { +// Use this for call sites where we need to print errors about invalid literals. +pub fn token_lit_to_lit_kind_and_report_errs( + sess: &ParseSess, + token_lit: token::Lit, + span: Span, +) -> Result { + let (lit, errs) = ast::LitKind::from_token_lit_with_errs(token_lit); + for err in errs { + report_lit_error(sess, err, token_lit, span); + } + lit +} + +// Use this for call sites where we need to print errors about invalid literals. +pub fn token_lit_to_meta_item_lit_and_report_errs( + sess: &ParseSess, + token_lit: token::Lit, + lit_span: Span, + err_span: Span, +) -> Result { + let (lit, errs) = ast::MetaItemLit::from_token_lit_with_errs(token_lit, lit_span); + for err in errs { + report_lit_error(sess, err, token_lit, err_span); + } + lit +} + +fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { // Checks if `s` looks like i32 or u1234 etc. fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) @@ -3700,6 +3731,17 @@ pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: // `LexerError` is an error, but it was already reported // by lexer, so here we don't report it the second time. LitError::LexerError => {} + LitError::EscapeError { mode, prefix_len, range, err } => { + emit_unescape_error( + &sess.span_diagnostic, + symbol.as_str(), + span, + mode, + prefix_len, + range, + err, + ); + } LitError::InvalidSuffix => { if let Some(suffix) = suffix { sess.emit_err(errors::InvalidLiteralSuffix { span, kind: kind.descr(), suffix }); diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 2acbf0e948c7b..95548bc28c721 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -11,7 +11,9 @@ mod stmt; mod ty; use crate::lexer::UnmatchedDelim; -pub use crate::parser::expr::report_lit_error; +pub use crate::parser::expr::{ + token_lit_to_lit_kind_and_report_errs, token_lit_to_meta_item_lit_and_report_errs, +}; pub use attr_wrapper::AttrWrapper; pub use diagnostics::AttemptLocalParseRecovery; pub(crate) use expr::ForbiddenLetReason; diff --git a/compiler/rustc_parse/src/validate_attr.rs b/compiler/rustc_parse/src/validate_attr.rs index 81055431f649d..13c6a2ae22fe2 100644 --- a/compiler/rustc_parse/src/validate_attr.rs +++ b/compiler/rustc_parse/src/validate_attr.rs @@ -1,14 +1,13 @@ //! Meta-syntax validation logic of attributes for post-expansion. -use crate::{errors, parse_in}; +use crate::{errors, parse_in, parser}; use rustc_ast::token::Delimiter; use rustc_ast::tokenstream::DelimSpan; -use rustc_ast::MetaItemKind; use rustc_ast::{self as ast, AttrArgs, AttrArgsEq, Attribute, DelimArgs, MetaItem}; +use rustc_ast::{LitKind, MetaItemKind, MetaItemLit}; use rustc_errors::{Applicability, FatalError, PResult}; use rustc_feature::{AttributeTemplate, BuiltinAttribute, BUILTIN_ATTRIBUTE_MAP}; -use rustc_session::errors::report_lit_error; use rustc_session::lint::builtin::ILL_FORMED_ATTRIBUTE_INPUT; use rustc_session::parse::ParseSess; use rustc_span::{sym, Span, Symbol}; @@ -52,8 +51,10 @@ pub fn parse_meta<'a>(sess: &'a ParseSess, attr: &Attribute) -> PResult<'a, Meta } AttrArgs::Eq(_, AttrArgsEq::Ast(expr)) => { if let ast::ExprKind::Lit(token_lit) = expr.kind { - let res = ast::MetaItemLit::from_token_lit(token_lit, expr.span); - let res = match res { + let lit = parser::token_lit_to_meta_item_lit_and_report_errs( + sess, token_lit, expr.span, expr.span, + ); + match lit { Ok(lit) => { if token_lit.suffix.is_some() { let mut err = sess.span_diagnostic.struct_span_err( @@ -69,18 +70,16 @@ pub fn parse_meta<'a>(sess: &'a ParseSess, attr: &Attribute) -> PResult<'a, Meta MetaItemKind::NameValue(lit) } } - Err(err) => { - report_lit_error(sess, err, token_lit, expr.span); - let lit = ast::MetaItemLit { + Err(()) => { + let lit = MetaItemLit { symbol: token_lit.symbol, suffix: token_lit.suffix, - kind: ast::LitKind::Err, + kind: LitKind::Err, span: expr.span, }; MetaItemKind::NameValue(lit) } - }; - res + } } else { // Example cases: // - `#[foo = 1+1]`: results in `ast::ExprKind::BinOp`. diff --git a/tests/rustdoc-ui/ignore-block-help.rs b/tests/rustdoc-ui/ignore-block-help.rs index 86f6a2868fb56..fb27d954f9a5a 100644 --- a/tests/rustdoc-ui/ignore-block-help.rs +++ b/tests/rustdoc-ui/ignore-block-help.rs @@ -1,10 +1,10 @@ // check-pass /// ```ignore (to-prevent-tidy-error) -/// let heart = '❤️'; +/// let unterminated = ' /// ``` //~^^^ WARNING could not parse code block //~| NOTE on by default -//~| NOTE character literal may only contain one codepoint +//~| NOTE unterminated character literal //~| HELP `ignore` code blocks require valid Rust code pub struct X; diff --git a/tests/rustdoc-ui/ignore-block-help.stderr b/tests/rustdoc-ui/ignore-block-help.stderr index a30ea51dd8a7f..f5ed287a99834 100644 --- a/tests/rustdoc-ui/ignore-block-help.stderr +++ b/tests/rustdoc-ui/ignore-block-help.stderr @@ -3,7 +3,7 @@ warning: could not parse code block as Rust code | LL | /// ```ignore (to-prevent-tidy-error) | _____^ -LL | | /// let heart = '❤️'; +LL | | /// let unterminated = ' LL | | /// ``` | |_______^ | @@ -12,7 +12,7 @@ help: `ignore` code blocks require valid Rust code for syntax highlighting; mark | LL | /// ```ignore (to-prevent-tidy-error) | ^^^ - = note: error from rustc: character literal may only contain one codepoint + = note: error from rustc: unterminated character literal = note: `#[warn(rustdoc::invalid_rust_codeblocks)]` on by default warning: 1 warning emitted diff --git a/tests/ui/fmt/format-string-error-2.stderr b/tests/ui/fmt/format-string-error-2.stderr index dfd24bf60ad52..50ead59e4e911 100644 --- a/tests/ui/fmt/format-string-error-2.stderr +++ b/tests/ui/fmt/format-string-error-2.stderr @@ -1,9 +1,3 @@ -error: incorrect unicode escape sequence - --> $DIR/format-string-error-2.rs:77:20 - | -LL | println!("\x7B}\u8 {", 1); - | ^^^ help: format of unicode escape sequences uses braces: `\u{8}` - error: invalid format string: expected `'}'`, found `'a'` --> $DIR/format-string-error-2.rs:5:5 | @@ -155,6 +149,12 @@ LL | println!("\x7B}\u{8} {", 1); | = note: if you intended to print `{`, you can escape it using `{{` +error: incorrect unicode escape sequence + --> $DIR/format-string-error-2.rs:77:20 + | +LL | println!("\x7B}\u8 {", 1); + | ^^^ help: format of unicode escape sequences uses braces: `\u{8}` + error: invalid format string: unmatched `}` found --> $DIR/format-string-error-2.rs:81:21 | diff --git a/tests/ui/lexer/error-stage.rs b/tests/ui/lexer/error-stage.rs index c8d88f745a1f0..5edb334c109dd 100644 --- a/tests/ui/lexer/error-stage.rs +++ b/tests/ui/lexer/error-stage.rs @@ -1,3 +1,5 @@ +// edition:2021 + // This test is about the treatment of invalid literals. In particular, some // literals are only considered invalid if they survive to HIR lowering. // @@ -41,6 +43,11 @@ // https://doc.rust-lang.org/reference/tokens.html#integer-literals says that // literals like `128_i8` and `256_u8` "are too big for their type, but are // still valid tokens". +// +// String literals, etc. +// --------------------- +// There are various ways that char, byte, and string literals can be invalid, +// mostly involving invalid escape sequences. macro_rules! sink { ($($x:tt;)*) => {()} @@ -48,7 +55,7 @@ macro_rules! sink { // The invalid literals are ignored because the macro consumes them. Except for // `0b10.0f32` because it's a lexer error. -const _: () = sink! { +const c1: () = sink! { "string"any_suffix; // OK 10u123; // OK 10.0f123; // OK @@ -60,7 +67,7 @@ const _: () = sink! { // The invalid literals used to cause errors, but this was changed by #102944. // Except for `0b010.0f32`, because it's a lexer error. #[cfg(FALSE)] -fn configured_out() { +fn configured_out1() { "string"any_suffix; // OK 10u123; // OK 10.0f123; // OK @@ -70,7 +77,7 @@ fn configured_out() { } // All the invalid literals cause errors. -fn main() { +fn f1() { "string"any_suffix; //~ ERROR suffixes on string literals are invalid 10u123; //~ ERROR invalid width `123` for integer literal 10.0f123; //~ ERROR invalid width `123` for float literal @@ -78,3 +85,36 @@ fn main() { 0b10.0f32; //~ ERROR binary float literal is not supported 999340282366920938463463374607431768211455999; //~ ERROR integer literal is too large } + +// These invalid literals used to cause errors, but this was changed by #118699. +const c2: () = sink! { + ''; + b'ab'; + "\a"; + b"\xzz"; + "\u20"; + c"\u{999999}"; +}; + +// These invalid literals used to cause errors, but this was changed by #118699. +#[cfg(FALSE)] +fn configured_out2() { + ''; + b'ab'; + "\a"; + b"\xzz"; + "\u20"; + c"\u{999999}"; +} + +// These invalid literals cause errors. +fn f2() { + ''; //~ ERROR empty character literal + b'ab'; //~ ERROR character literal may only contain one codepoint + "\a"; //~ ERROR unknown character escape: `a` + b"\xzz"; //~ ERROR invalid character in numeric character escape + "\u20"; //~ ERROR incorrect unicode escape sequence + c"\u{999999}"; //~ ERROR invalid unicode character escape +} + +fn main() {} diff --git a/tests/ui/lexer/error-stage.stderr b/tests/ui/lexer/error-stage.stderr index ecbdb14dc868e..f3572600ac193 100644 --- a/tests/ui/lexer/error-stage.stderr +++ b/tests/ui/lexer/error-stage.stderr @@ -1,29 +1,29 @@ error: binary float literal is not supported - --> $DIR/error-stage.rs:56:5 + --> $DIR/error-stage.rs:63:5 | LL | 0b10.0f32; | ^^^^^^ error: binary float literal is not supported - --> $DIR/error-stage.rs:68:5 + --> $DIR/error-stage.rs:75:5 | LL | 0b10.0f32; | ^^^^^^ error: binary float literal is not supported - --> $DIR/error-stage.rs:78:5 + --> $DIR/error-stage.rs:85:5 | LL | 0b10.0f32; | ^^^^^^ error: suffixes on string literals are invalid - --> $DIR/error-stage.rs:74:5 + --> $DIR/error-stage.rs:81:5 | LL | "string"any_suffix; | ^^^^^^^^^^^^^^^^^^ invalid suffix `any_suffix` error: invalid width `123` for integer literal - --> $DIR/error-stage.rs:75:5 + --> $DIR/error-stage.rs:82:5 | LL | 10u123; | ^^^^^^ @@ -31,7 +31,7 @@ LL | 10u123; = help: valid widths are 8, 16, 32, 64 and 128 error: invalid width `123` for float literal - --> $DIR/error-stage.rs:76:5 + --> $DIR/error-stage.rs:83:5 | LL | 10.0f123; | ^^^^^^^^ @@ -39,18 +39,69 @@ LL | 10.0f123; = help: valid widths are 32 and 64 error: binary float literal is not supported - --> $DIR/error-stage.rs:77:5 + --> $DIR/error-stage.rs:84:5 | LL | 0b10f32; | ^^^^^^^ not supported error: integer literal is too large - --> $DIR/error-stage.rs:79:5 + --> $DIR/error-stage.rs:86:5 | LL | 999340282366920938463463374607431768211455999; | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = note: value exceeds limit of `340282366920938463463374607431768211455` -error: aborting due to 8 previous errors +error: empty character literal + --> $DIR/error-stage.rs:112:6 + | +LL | ''; + | ^ empty character literal + +error: character literal may only contain one codepoint + --> $DIR/error-stage.rs:113:5 + | +LL | b'ab'; + | ^^^^^ + | +help: if you meant to write a byte string literal, use double quotes + | +LL | b"ab"; + | ~~~~~ + +error: unknown character escape: `a` + --> $DIR/error-stage.rs:114:7 + | +LL | "\a"; + | ^ unknown character escape + | + = help: for more information, visit +help: if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal + | +LL | r"\a"; + | ~~~~~ + +error: invalid character in numeric character escape: `z` + --> $DIR/error-stage.rs:115:9 + | +LL | b"\xzz"; + | ^ invalid character in numeric character escape + +error: incorrect unicode escape sequence + --> $DIR/error-stage.rs:116:6 + | +LL | "\u20"; + | ^^^- + | | + | help: format of unicode escape sequences uses braces: `\u{20}` + +error: invalid unicode character escape + --> $DIR/error-stage.rs:117:7 + | +LL | c"\u{999999}"; + | ^^^^^^^^^^ invalid escape + | + = help: unicode escape must be at most 10FFFF + +error: aborting due to 14 previous errors diff --git a/tests/ui/lexer/lex-bad-char-literals-7.rs b/tests/ui/lexer/lex-bad-char-literals-7.rs index c675df2f3ccd0..55484a610141b 100644 --- a/tests/ui/lexer/lex-bad-char-literals-7.rs +++ b/tests/ui/lexer/lex-bad-char-literals-7.rs @@ -7,7 +7,4 @@ fn main() { // Next two are OK, but may befool error recovery let _ = '/'; let _ = b'/'; - - let _ = ' hello // here's a comment - //~^ ERROR: unterminated character literal } diff --git a/tests/ui/lexer/lex-bad-char-literals-7.stderr b/tests/ui/lexer/lex-bad-char-literals-7.stderr index 255b9c6899999..16ba7676932fd 100644 --- a/tests/ui/lexer/lex-bad-char-literals-7.stderr +++ b/tests/ui/lexer/lex-bad-char-literals-7.stderr @@ -10,12 +10,5 @@ error: empty unicode escape LL | let _: char = '\u{}'; | ^^^^ this escape must have at least 1 hex digit -error[E0762]: unterminated character literal - --> $DIR/lex-bad-char-literals-7.rs:11:13 - | -LL | let _ = ' hello // here's a comment - | ^^^^^^^^ - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors -For more information about this error, try `rustc --explain E0762`. diff --git a/tests/ui/lexer/lex-bad-char-literals-8.rs b/tests/ui/lexer/lex-bad-char-literals-8.rs new file mode 100644 index 0000000000000..6c8cbd3a82a85 --- /dev/null +++ b/tests/ui/lexer/lex-bad-char-literals-8.rs @@ -0,0 +1,4 @@ +fn main() { + let _ = ' hello // here's a comment + //~^ ERROR: unterminated character literal +} diff --git a/tests/ui/lexer/lex-bad-char-literals-8.stderr b/tests/ui/lexer/lex-bad-char-literals-8.stderr new file mode 100644 index 0000000000000..04c95df0d0601 --- /dev/null +++ b/tests/ui/lexer/lex-bad-char-literals-8.stderr @@ -0,0 +1,9 @@ +error[E0762]: unterminated character literal + --> $DIR/lex-bad-char-literals-8.rs:2:13 + | +LL | let _ = ' hello // here's a comment + | ^^^^^^^^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0762`. diff --git a/tests/ui/parser/byte-literals-2.rs b/tests/ui/parser/byte-literals-2.rs new file mode 100644 index 0000000000000..fb9e2ac69944a --- /dev/null +++ b/tests/ui/parser/byte-literals-2.rs @@ -0,0 +1,3 @@ +pub fn main() { + b'a //~ ERROR unterminated byte constant [E0763] +} diff --git a/tests/ui/parser/byte-literals-2.stderr b/tests/ui/parser/byte-literals-2.stderr new file mode 100644 index 0000000000000..f0e042ad605db --- /dev/null +++ b/tests/ui/parser/byte-literals-2.stderr @@ -0,0 +1,9 @@ +error[E0763]: unterminated byte constant + --> $DIR/byte-literals-2.rs:2:6 + | +LL | b'a + | ^^^^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0763`. diff --git a/tests/ui/parser/byte-literals.rs b/tests/ui/parser/byte-literals.rs index 896dc1a1a5fba..963a0bb608d84 100644 --- a/tests/ui/parser/byte-literals.rs +++ b/tests/ui/parser/byte-literals.rs @@ -8,5 +8,4 @@ pub fn main() { b' '; //~ ERROR byte constant must be escaped b'''; //~ ERROR byte constant must be escaped b'é'; //~ ERROR non-ASCII character in byte literal - b'a //~ ERROR unterminated byte constant [E0763] } diff --git a/tests/ui/parser/byte-literals.stderr b/tests/ui/parser/byte-literals.stderr index 5b414c8927e2c..97805e01db49f 100644 --- a/tests/ui/parser/byte-literals.stderr +++ b/tests/ui/parser/byte-literals.stderr @@ -43,12 +43,5 @@ help: if you meant to use the unicode code point for 'é', use a \xHH escape LL | b'\xE9'; | ~~~~ -error[E0763]: unterminated byte constant - --> $DIR/byte-literals.rs:11:6 - | -LL | b'a - | ^^^^ - -error: aborting due to 7 previous errors +error: aborting due to 6 previous errors -For more information about this error, try `rustc --explain E0763`. diff --git a/tests/ui/parser/byte-string-literals-2.rs b/tests/ui/parser/byte-string-literals-2.rs new file mode 100644 index 0000000000000..7eb52b854e358 --- /dev/null +++ b/tests/ui/parser/byte-string-literals-2.rs @@ -0,0 +1,3 @@ +pub fn main() { + b"a //~ ERROR unterminated double quote byte string +} diff --git a/tests/ui/parser/byte-string-literals-2.stderr b/tests/ui/parser/byte-string-literals-2.stderr new file mode 100644 index 0000000000000..6fdb3c64ba783 --- /dev/null +++ b/tests/ui/parser/byte-string-literals-2.stderr @@ -0,0 +1,11 @@ +error[E0766]: unterminated double quote byte string + --> $DIR/byte-string-literals-2.rs:2:6 + | +LL | b"a + | ______^ +LL | | } + | |__^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0766`. diff --git a/tests/ui/parser/byte-string-literals.rs b/tests/ui/parser/byte-string-literals.rs index 30a4f50c4e40b..c14488dcb6689 100644 --- a/tests/ui/parser/byte-string-literals.rs +++ b/tests/ui/parser/byte-string-literals.rs @@ -5,5 +5,4 @@ pub fn main() { b"\x0Z"; //~ ERROR invalid character in numeric character escape: `Z` b"é"; //~ ERROR non-ASCII character in byte string literal br##"é"##; //~ ERROR non-ASCII character in raw byte string literal - b"a //~ ERROR unterminated double quote byte string } diff --git a/tests/ui/parser/byte-string-literals.stderr b/tests/ui/parser/byte-string-literals.stderr index 655b6998e85ff..2a2830c346825 100644 --- a/tests/ui/parser/byte-string-literals.stderr +++ b/tests/ui/parser/byte-string-literals.stderr @@ -37,14 +37,5 @@ error: non-ASCII character in raw byte string literal LL | br##"é"##; | ^ must be ASCII -error[E0766]: unterminated double quote byte string - --> $DIR/byte-string-literals.rs:8:6 - | -LL | b"a - | ______^ -LL | | } - | |__^ - -error: aborting due to 6 previous errors +error: aborting due to 5 previous errors -For more information about this error, try `rustc --explain E0766`. diff --git a/tests/ui/parser/issues/issue-62913.rs b/tests/ui/parser/issues/issue-62913.rs index a55ef5ac71030..c77ef61a97b10 100644 --- a/tests/ui/parser/issues/issue-62913.rs +++ b/tests/ui/parser/issues/issue-62913.rs @@ -1,4 +1,5 @@ -"\u\\" -//~^ ERROR incorrect unicode escape sequence -//~| ERROR invalid trailing slash in literal -//~| ERROR expected item, found `"\u\"` +fn main() { + _ = "\u\\"; + //~^ ERROR incorrect unicode escape sequence + //~| ERROR invalid trailing slash in literal +} diff --git a/tests/ui/parser/issues/issue-62913.stderr b/tests/ui/parser/issues/issue-62913.stderr index c33e46837287f..bee6dd4580037 100644 --- a/tests/ui/parser/issues/issue-62913.stderr +++ b/tests/ui/parser/issues/issue-62913.stderr @@ -1,24 +1,16 @@ error: incorrect unicode escape sequence - --> $DIR/issue-62913.rs:1:2 + --> $DIR/issue-62913.rs:2:10 | -LL | "\u\" - | ^^^ incorrect unicode escape sequence +LL | _ = "\u\"; + | ^^^ incorrect unicode escape sequence | = help: format of unicode escape sequences is `\u{...}` error: invalid trailing slash in literal - --> $DIR/issue-62913.rs:1:5 + --> $DIR/issue-62913.rs:2:13 | -LL | "\u\" - | ^ invalid trailing slash in literal +LL | _ = "\u\"; + | ^ invalid trailing slash in literal -error: expected item, found `"\u\"` - --> $DIR/issue-62913.rs:1:1 - | -LL | "\u\" - | ^^^^^^ expected item - | - = note: for a full list of items that can appear in modules, see - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors diff --git a/tests/ui/parser/macro/literals-are-validated-before-expansion.rs b/tests/ui/parser/macro/literals-are-validated-before-expansion.rs deleted file mode 100644 index c3fc754b5567f..0000000000000 --- a/tests/ui/parser/macro/literals-are-validated-before-expansion.rs +++ /dev/null @@ -1,10 +0,0 @@ -macro_rules! black_hole { - ($($tt:tt)*) => {} -} - -fn main() { - black_hole! { '\u{FFFFFF}' } - //~^ ERROR: invalid unicode character escape - black_hole! { "this is surrogate: \u{DAAA}" } - //~^ ERROR: invalid unicode character escape -} diff --git a/tests/ui/parser/macro/literals-are-validated-before-expansion.stderr b/tests/ui/parser/macro/literals-are-validated-before-expansion.stderr deleted file mode 100644 index e874f62497ea8..0000000000000 --- a/tests/ui/parser/macro/literals-are-validated-before-expansion.stderr +++ /dev/null @@ -1,18 +0,0 @@ -error: invalid unicode character escape - --> $DIR/literals-are-validated-before-expansion.rs:6:20 - | -LL | black_hole! { '\u{FFFFFF}' } - | ^^^^^^^^^^ invalid escape - | - = help: unicode escape must be at most 10FFFF - -error: invalid unicode character escape - --> $DIR/literals-are-validated-before-expansion.rs:8:39 - | -LL | black_hole! { "this is surrogate: \u{DAAA}" } - | ^^^^^^^^ invalid escape - | - = help: unicode escape must not be a surrogate - -error: aborting due to 2 previous errors - diff --git a/tests/ui/parser/raw/raw-byte-string-literals-2.rs b/tests/ui/parser/raw/raw-byte-string-literals-2.rs new file mode 100644 index 0000000000000..8ffda513dbf6f --- /dev/null +++ b/tests/ui/parser/raw/raw-byte-string-literals-2.rs @@ -0,0 +1,3 @@ +pub fn main() { + br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation +} diff --git a/tests/ui/parser/raw/raw-byte-string-literals-2.stderr b/tests/ui/parser/raw/raw-byte-string-literals-2.stderr new file mode 100644 index 0000000000000..b4151eeef7017 --- /dev/null +++ b/tests/ui/parser/raw/raw-byte-string-literals-2.stderr @@ -0,0 +1,8 @@ +error: found invalid character; only `#` is allowed in raw string delimitation: ~ + --> $DIR/raw-byte-string-literals-2.rs:2:5 + | +LL | br##~"a"~##; + | ^^^^^ + +error: aborting due to 1 previous error + diff --git a/tests/ui/parser/raw/raw-byte-string-literals.rs b/tests/ui/parser/raw/raw-byte-string-literals.rs index 1b859fee596ad..3f91c381a9039 100644 --- a/tests/ui/parser/raw/raw-byte-string-literals.rs +++ b/tests/ui/parser/raw/raw-byte-string-literals.rs @@ -3,5 +3,4 @@ pub fn main() { br"a "; //~ ERROR bare CR not allowed in raw string br"é"; //~ ERROR non-ASCII character in raw byte string literal - br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation } diff --git a/tests/ui/parser/raw/raw-byte-string-literals.stderr b/tests/ui/parser/raw/raw-byte-string-literals.stderr index a2f27d1ed70ae..2a4073243cbca 100644 --- a/tests/ui/parser/raw/raw-byte-string-literals.stderr +++ b/tests/ui/parser/raw/raw-byte-string-literals.stderr @@ -10,11 +10,5 @@ error: non-ASCII character in raw byte string literal LL | br"é"; | ^ must be ASCII -error: found invalid character; only `#` is allowed in raw string delimitation: ~ - --> $DIR/raw-byte-string-literals.rs:6:5 - | -LL | br##~"a"~##; - | ^^^^^ - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors diff --git a/tests/ui/parser/unicode-control-codepoints.stderr b/tests/ui/parser/unicode-control-codepoints.stderr index fc071a9419142..806e222507f6e 100644 --- a/tests/ui/parser/unicode-control-codepoints.stderr +++ b/tests/ui/parser/unicode-control-codepoints.stderr @@ -1,87 +1,3 @@ -error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:26 - | -LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); - | ^^^^^^^^ unicode escape in byte string - | - = help: unicode escape sequences cannot be used as a byte or in a byte string - -error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:35 - | -LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); - | ^^^^^^^^ unicode escape in byte string - | - = help: unicode escape sequences cannot be used as a byte or in a byte string - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:26 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{202e}' - | -help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes - | -LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:30 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{2066}' - | -help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes - | -LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:41 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{2069}' - | -help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes - | -LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:43 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{2066}' - | -help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes - | -LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:29 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{202e}' - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:33 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{2066}' - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:44 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{2069}' - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:46 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{2066}' - error: unicode codepoint changing visible direction of text present in comment --> $DIR/unicode-control-codepoints.rs:2:5 | @@ -188,5 +104,89 @@ LL | | * ''); */fn bar() {} = note: if their presence wasn't intentional, you can remove them = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' +error: unicode escape in byte string + --> $DIR/unicode-control-codepoints.rs:6:26 + | +LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); + | ^^^^^^^^ unicode escape in byte string + | + = help: unicode escape sequences cannot be used as a byte or in a byte string + +error: unicode escape in byte string + --> $DIR/unicode-control-codepoints.rs:6:35 + | +LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); + | ^^^^^^^^ unicode escape in byte string + | + = help: unicode escape sequences cannot be used as a byte or in a byte string + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:26 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{202e}' + | +help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes + | +LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:30 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{2066}' + | +help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes + | +LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:41 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{2069}' + | +help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes + | +LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:43 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{2066}' + | +help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes + | +LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:29 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{202e}' + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:33 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2066}' + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:44 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2069}' + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:46 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2066}' + error: aborting due to 17 previous errors