Skip to content

Commit

Permalink
Rollup merge of #134366 - harrisonkaiser:no-break-space, r=davidtwco
Browse files Browse the repository at this point in the history
Fix logical error with what text is considered whitespace.

There appears to be a logical issue around what counts as leading white-space. There is code which does a subtraction assuming that no errors will be reported inside the leading whitespace. However we compute the length of that whitespace with std::char::is_whitespace and not rustc_lexer::is_whitespace. The former will include a no-break space while later will excluded it. We can only safely make the assumption that no errors will be reported  in whitespace if it is all "Rust Standard" whitespace. Indeed an error does occur in unicode whitespace if it contains a no-break space. In that case the subtraction will cause a ICE (for a compiler in debug mode) as described in #132918.
  • Loading branch information
DianQK authored Dec 20, 2024
2 parents 8a1f803 + 1e33dd1 commit 1652e3a
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 2 deletions.
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3722,6 +3722,7 @@ dependencies = [
"rustc_fluent_macro",
"rustc_hir",
"rustc_index",
"rustc_lexer",
"rustc_lint_defs",
"rustc_macros",
"rustc_serialize",
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_errors/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ rustc_error_messages = { path = "../rustc_error_messages" }
rustc_fluent_macro = { path = "../rustc_fluent_macro" }
rustc_hir = { path = "../rustc_hir" }
rustc_index = { path = "../rustc_index" }
rustc_lexer = { path = "../rustc_lexer" }
rustc_lint_defs = { path = "../rustc_lint_defs" }
rustc_macros = { path = "../rustc_macros" }
rustc_serialize = { path = "../rustc_serialize" }
Expand Down
10 changes: 8 additions & 2 deletions compiler/rustc_errors/src/emitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use derive_setters::Setters;
use rustc_data_structures::fx::{FxHashMap, FxIndexMap, FxIndexSet};
use rustc_data_structures::sync::{DynSend, IntoDynSyncSend, Lrc};
use rustc_error_messages::{FluentArgs, SpanLabel};
use rustc_lexer;
use rustc_lint_defs::pluralize;
use rustc_span::hygiene::{ExpnKind, MacroKind};
use rustc_span::source_map::SourceMap;
Expand Down Expand Up @@ -1698,9 +1699,14 @@ impl HumanEmitter {
if let Some(source_string) =
line.line_index.checked_sub(1).and_then(|l| file.get_line(l))
{
// Whitespace can only be removed (aka considered leading)
// if the lexer considers it whitespace.
// non-rustc_lexer::is_whitespace() chars are reported as an
// error (ex. no-break-spaces \u{a0}), and thus can't be considered
// for removal during error reporting.
let leading_whitespace = source_string
.chars()
.take_while(|c| c.is_whitespace())
.take_while(|c| rustc_lexer::is_whitespace(*c))
.map(|c| {
match c {
// Tabs are displayed as 4 spaces
Expand All @@ -1709,7 +1715,7 @@ impl HumanEmitter {
}
})
.sum();
if source_string.chars().any(|c| !c.is_whitespace()) {
if source_string.chars().any(|c| !rustc_lexer::is_whitespace(c)) {
whitespace_margin = min(whitespace_margin, leading_whitespace);
}
}
Expand Down
11 changes: 11 additions & 0 deletions tests/ui/errors/emitter-overflow-bad-whitespace.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Invalid whitespace (not listed here: https://doc.rust-lang.org/reference/whitespace.html
// e.g. \u{a0}) before any other syntax on the line should not cause any integer overflow
// in the emitter, even when the terminal width causes the line to be truncated.
//
// issue #132918

//@ check-fail
//@ needs-rustc-debug-assertions
//@ compile-flags: --diagnostic-width=1
                                        fn main() { return; }
//~^ ERROR unknown start of token: \u{a0}
13 changes: 13 additions & 0 deletions tests/ui/errors/emitter-overflow-bad-whitespace.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
error: unknown start of token: \u{a0}
--> $DIR/emitter-overflow-bad-whitespace.rs:10:1
|
LL |     ...
| ^
|
help: Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
|
LL |                                       fn main() { return; }
| +

error: aborting due to 1 previous error

0 comments on commit 1652e3a

Please sign in to comment.