Skip to content

Commit

Permalink
AstGen: improve error for invalid bytes in strings and comments
Browse files Browse the repository at this point in the history
  • Loading branch information
WillLillis authored Feb 5, 2025
1 parent d72f3d3 commit cf059ee
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 4 deletions.
17 changes: 17 additions & 0 deletions lib/std/zig/Ast.zig
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,19 @@ pub fn renderError(tree: Ast, parse_error: Error, stream: anytype) !void {
return stream.writeAll("for input is not captured");
},

.invalid_byte => {
const tok_slice = tree.source[tree.tokens.items(.start)[parse_error.token]..];
return stream.print("{s} contains invalid byte: '{'}'", .{
switch (tok_slice[0]) {
'\'' => "character literal",
'"', '\\' => "string literal",
'/' => "comment",
else => unreachable,
},
std.zig.fmtEscapes(tok_slice[parse_error.extra.offset..][0..1]),
});
},

.expected_token => {
const found_tag = token_tags[parse_error.token + @intFromBool(parse_error.token_is_prev)];
const expected_symbol = parse_error.extra.expected_tag.symbol();
Expand Down Expand Up @@ -2926,6 +2939,7 @@ pub const Error = struct {
extra: union {
none: void,
expected_tag: Token.Tag,
offset: usize,
} = .{ .none = {} },

pub const Tag = enum {
Expand Down Expand Up @@ -2996,6 +3010,9 @@ pub const Error = struct {

/// `expected_tag` is populated.
expected_token,

/// `offset` is populated
invalid_byte,
};
};

Expand Down
33 changes: 33 additions & 0 deletions lib/std/zig/AstGen.zig
Original file line number Diff line number Diff line change
Expand Up @@ -14017,6 +14017,39 @@ fn lowerAstErrors(astgen: *AstGen) !void {
var notes: std.ArrayListUnmanaged(u32) = .empty;
defer notes.deinit(gpa);

const token_starts = tree.tokens.items(.start);
const token_tags = tree.tokens.items(.tag);
const parse_err = tree.errors[0];
const tok = parse_err.token + @intFromBool(parse_err.token_is_prev);
const tok_start = token_starts[tok];
const start_char = tree.source[tok_start];

if (token_tags[tok] == .invalid and
(start_char == '\"' or start_char == '\'' or start_char == '/' or mem.startsWith(u8, tree.source[tok_start..], "\\\\")))
{
const tok_len: u32 = @intCast(tree.tokenSlice(tok).len);
const tok_end = tok_start + tok_len;
const bad_off = blk: {
var idx = tok_start;
while (idx < tok_end) : (idx += 1) {
switch (tree.source[idx]) {
0x00...0x09, 0x0b...0x1f, 0x7f => break,
else => {},
}
}
break :blk idx - tok_start;
};

const err: Ast.Error = .{
.tag = Ast.Error.Tag.invalid_byte,
.token = tok,
.extra = .{ .offset = bad_off },
};
msg.clearRetainingCapacity();
try tree.renderError(err, msg.writer(gpa));
return try astgen.appendErrorTokNotesOff(tok, bad_off, "{s}", .{msg.items}, notes.items);
}

var cur_err = tree.errors[0];
for (tree.errors[1..]) |err| {
if (err.is_note) {
Expand Down
2 changes: 1 addition & 1 deletion test/cases/compile_errors/normal_string_with_newline.zig
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ b";
// backend=stage2
// target=native
//
// :1:13: error: expected expression, found 'invalid token'
// :1:15: error: string literal contains invalid byte: '\n'
8 changes: 8 additions & 0 deletions test/cases/compile_errors/tab_inside_comment.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Some comment
export fn entry() void {}

// error
// backend=stage2
// target=native
//
// :1:8: error: comment contains invalid byte: '\t'
8 changes: 8 additions & 0 deletions test/cases/compile_errors/tab_inside_doc_comment.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/// Some doc comment
export fn entry() void {}

// error
// backend=stage2
// target=native
//
// :1:13: error: comment contains invalid byte: '\t'
13 changes: 13 additions & 0 deletions test/cases/compile_errors/tab_inside_multiline_string.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
export fn entry() void {
const foo =
\\const S = struct {
\\ // hello
\\}
;
_ = foo;
}
// error
// backend=stage2
// target=native
//
// :4:11: error: string literal contains invalid byte: '\t'
10 changes: 10 additions & 0 deletions test/cases/compile_errors/tab_inside_string.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
export fn entry() void {
const foo = " hello";
_ = foo;
}

// error
// backend=stage2
// target=native
//
// :2:18: error: string literal contains invalid byte: '\t'
6 changes: 3 additions & 3 deletions test/compile_errors.zig
Original file line number Diff line number Diff line change
Expand Up @@ -217,23 +217,23 @@ pub fn addCases(ctx: *Cases, b: *std.Build) !void {
const case = ctx.obj("invalid byte in string", b.graph.host);

case.addError("_ = \"\x01Q\";", &[_][]const u8{
":1:5: error: expected expression, found 'invalid token'",
":1:6: error: string literal contains invalid byte: '\\x01'",
});
}

{
const case = ctx.obj("invalid byte in comment", b.graph.host);

case.addError("//\x01Q", &[_][]const u8{
":1:1: error: expected type expression, found 'invalid token'",
":1:3: error: comment contains invalid byte: '\\x01'",
});
}

{
const case = ctx.obj("control character in character literal", b.graph.host);

case.addError("const c = '\x01';", &[_][]const u8{
":1:11: error: expected expression, found 'invalid token'",
":1:12: error: character literal contains invalid byte: '\\x01'",
});
}

Expand Down

0 comments on commit cf059ee

Please sign in to comment.