From 5e0073c898b74f03f0c0e7f8cb859b9ac719bf43 Mon Sep 17 00:00:00 2001 From: David Rubin Date: Tue, 17 Dec 2024 03:46:56 -0800 Subject: [PATCH 01/22] ubsan: add a basic runtime --- lib/std/std.zig | 1 + lib/std/ubsan.zig | 390 ++++++++++++++++++++++++++++++++++++++++++++ src/Compilation.zig | 2 +- 3 files changed, 392 insertions(+), 1 deletion(-) create mode 100644 lib/std/ubsan.zig diff --git a/lib/std/std.zig b/lib/std/std.zig index 558710015c09..638b52b21732 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -44,6 +44,7 @@ pub const Thread = @import("Thread.zig"); pub const Treap = @import("treap.zig").Treap; pub const Tz = tz.Tz; pub const Uri = @import("Uri.zig"); +pub const ubsan = @import("ubsan.zig"); pub const array_hash_map = @import("array_hash_map.zig"); pub const atomic = @import("atomic.zig"); diff --git a/lib/std/ubsan.zig b/lib/std/ubsan.zig new file mode 100644 index 000000000000..83652795eafb --- /dev/null +++ b/lib/std/ubsan.zig @@ -0,0 +1,390 @@ +//! Minimal UBSan Runtime + +const std = @import("std"); +const builtin = @import("builtin"); +const assert = std.debug.assert; + +const SourceLocation = extern struct { + file_name: ?[*:0]const u8, + line: u32, + col: u32, +}; + +const TypeDescriptor = extern struct { + kind: Kind, + info: Info, + // name: [?:0]u8 + + const Kind = enum(u16) { + integer = 0x0000, + float = 0x0001, + unknown = 0xFFFF, + }; + + const Info = extern union { + integer: packed struct(u16) { + signed: bool, + bit_width: u15, + }, + }; + + fn getIntegerSize(desc: TypeDescriptor) u64 { + assert(desc.kind == .integer); + const bit_width = desc.info.integer.bit_width; + return @as(u64, 1) << @intCast(bit_width); + } + + fn isSigned(desc: TypeDescriptor) bool { + return desc.kind == .integer and desc.info.integer.signed; + } + + fn getName(desc: *const TypeDescriptor) [:0]const u8 { + return std.mem.span(@as([*:0]const u8, @ptrCast(desc)) + @sizeOf(TypeDescriptor)); + } +}; + +const ValueHandle = *const opaque { + fn getValue(handle: ValueHandle, data: anytype) Value { + return .{ .handle = handle, .type_descriptor = data.type_descriptor }; + } +}; + +const Value = extern struct { + type_descriptor: *const TypeDescriptor, + handle: ValueHandle, + + fn getUnsignedInteger(value: Value) u128 { + assert(!value.type_descriptor.isSigned()); + const size = value.type_descriptor.getIntegerSize(); + const max_inline_size = @bitSizeOf(ValueHandle); + if (size <= max_inline_size) { + return @intFromPtr(value.handle); + } + + return switch (size) { + 64 => @as(*const u64, @alignCast(@ptrCast(value.handle))).*, + 128 => @as(*const u128, @alignCast(@ptrCast(value.handle))).*, + else => unreachable, + }; + } + + fn getSignedInteger(value: Value) i128 { + assert(value.type_descriptor.isSigned()); + const size = value.type_descriptor.getIntegerSize(); + const max_inline_size = @bitSizeOf(ValueHandle); + if (size <= max_inline_size) { + const extra_bits: u6 = @intCast(max_inline_size - size); + const handle: i64 = @bitCast(@intFromPtr(value.handle)); + return (handle << extra_bits) >> extra_bits; + } + return switch (size) { + 64 => @as(*const i64, @alignCast(@ptrCast(value.handle))).*, + 128 => @as(*const i128, @alignCast(@ptrCast(value.handle))).*, + else => unreachable, + }; + } + + fn isMinusOne(value: Value) bool { + return value.type_descriptor.isSigned() and + value.getSignedInteger() == -1; + } + + fn isNegative(value: Value) bool { + return value.type_descriptor.isSigned() and + value.getSignedInteger() < 0; + } + + fn getPositiveInteger(value: Value) u128 { + if (value.type_descriptor.isSigned()) { + const signed = value.getSignedInteger(); + assert(signed >= 0); + return @intCast(signed); + } else { + return value.getUnsignedInteger(); + } + } + + pub fn format( + value: Value, + comptime fmt: []const u8, + _: std.fmt.FormatOptions, + writer: anytype, + ) !void { + comptime assert(fmt.len == 0); + + switch (value.type_descriptor.kind) { + .integer => { + if (value.type_descriptor.isSigned()) { + try writer.print("{}", .{value.getSignedInteger()}); + } else { + try writer.print("{}", .{value.getUnsignedInteger()}); + } + }, + .float => @panic("TODO: write float"), + .unknown => try writer.writeAll("(unknown)"), + } + } +}; + +const OverflowData = extern struct { + loc: SourceLocation, + type_descriptor: *const TypeDescriptor, +}; + +fn overflowHandler( + comptime sym_name: []const u8, + comptime operator: []const u8, +) void { + const S = struct { + fn handler( + data: *OverflowData, + lhs_handle: ValueHandle, + rhs_handle: ValueHandle, + ) callconv(.C) noreturn { + const lhs = lhs_handle.getValue(data); + const rhs = rhs_handle.getValue(data); + + const is_signed = data.type_descriptor.isSigned(); + const fmt = "{s} integer overflow: " ++ "{} " ++ + operator ++ " {} cannot be represented in type {s}\n"; + + logMessage(fmt, .{ + if (is_signed) "signed" else "unsigned", + lhs, + rhs, + data.type_descriptor.getName(), + }); + } + }; + + exportHandler(&S.handler, sym_name, true); +} + +fn negationHandler( + data: *const OverflowData, + old_value_handle: ValueHandle, +) callconv(.C) noreturn { + const old_value = old_value_handle.getValue(data); + logMessage( + "negation of {} cannot be represented in type {s}\n", + .{ old_value, data.type_descriptor.getName() }, + ); +} + +fn divRemHandler( + data: *const OverflowData, + lhs_handle: ValueHandle, + rhs_handle: ValueHandle, +) callconv(.C) noreturn { + const is_signed = data.type_descriptor.isSigned(); + const lhs = lhs_handle.getValue(data); + const rhs = rhs_handle.getValue(data); + + if (is_signed and rhs.getSignedInteger() == -1) { + logMessage( + "division of {} by -1 cannot be represented in type {s}\n", + .{ lhs, data.type_descriptor.getName() }, + ); + } else logMessage("division by zero\n", .{}); +} + +const AlignmentAssumptionData = extern struct { + loc: SourceLocation, + assumption_loc: SourceLocation, + type_descriptor: *const TypeDescriptor, +}; + +fn alignmentAssumptionHandler( + data: *const AlignmentAssumptionData, + pointer: ValueHandle, + alignment: ValueHandle, + maybe_offset: ?ValueHandle, +) callconv(.C) noreturn { + _ = pointer; + // TODO: add the hint here? + // const real_pointer = @intFromPtr(pointer) - @intFromPtr(maybe_offset); + // const lsb = @ctz(real_pointer); + // const actual_alignment = @as(u64, 1) << @intCast(lsb); + // const mask = @intFromPtr(alignment) - 1; + // const misalignment_offset = real_pointer & mask; + // _ = actual_alignment; + // _ = misalignment_offset; + + if (maybe_offset) |offset| { + logMessage( + "assumption of {} byte alignment (with offset of {} byte) for pointer of type {s} failed\n", + .{ alignment.getValue(data), @intFromPtr(offset), data.type_descriptor.getName() }, + ); + } else { + logMessage( + "assumption of {} byte alignment for pointer of type {s} failed\n", + .{ alignment.getValue(data), data.type_descriptor.getName() }, + ); + } +} + +const ShiftOobData = extern struct { + loc: SourceLocation, + lhs_type: *const TypeDescriptor, + rhs_type: *const TypeDescriptor, +}; + +fn shiftOob( + data: *const ShiftOobData, + lhs_handle: ValueHandle, + rhs_handle: ValueHandle, +) callconv(.C) noreturn { + const lhs: Value = .{ .handle = lhs_handle, .type_descriptor = data.lhs_type }; + const rhs: Value = .{ .handle = rhs_handle, .type_descriptor = data.rhs_type }; + + if (rhs.isNegative() or + rhs.getPositiveInteger() >= data.lhs_type.getIntegerSize()) + { + if (rhs.isNegative()) { + logMessage("shift exponent {} is negative\n", .{rhs}); + } else { + logMessage( + "shift exponent {} is too large for {}-bit type {s}\n", + .{ rhs, data.lhs_type.getIntegerSize(), data.lhs_type.getName() }, + ); + } + } else { + if (lhs.isNegative()) { + logMessage("left shift of negative value {}\n", .{lhs}); + } else { + logMessage( + "left shift of {} by {} places cannot be represented in type {s}\n", + .{ lhs, rhs, data.lhs_type.getName() }, + ); + } + } +} + +const OutOfBoundsData = extern struct { + loc: SourceLocation, + array_type: *const TypeDescriptor, + index_type: *const TypeDescriptor, +}; + +fn outOfBounds(data: *const OutOfBoundsData, index_handle: ValueHandle) callconv(.C) noreturn { + const index: Value = .{ .handle = index_handle, .type_descriptor = data.index_type }; + logMessage( + "index {} out of bounds for type {s}\n", + .{ index, data.array_type.getName() }, + ); +} + +const PointerOverflowData = extern struct { + loc: SourceLocation, +}; + +fn pointerOverflow( + _: *const PointerOverflowData, + base: usize, + result: usize, +) callconv(.C) noreturn { + if (base == 0) { + if (result == 0) { + logMessage("applying zero offset to null pointer\n", .{}); + } else { + logMessage("applying non-zero offset {} to null pointer\n", .{result}); + } + } else { + if (result == 0) { + logMessage( + "applying non-zero offset to non-null pointer 0x{x} produced null pointer\n", + .{base}, + ); + } else { + @panic("TODO"); + } + } +} + +const TypeMismatchData = extern struct { + loc: SourceLocation, + type_descriptor: *const TypeDescriptor, + log_alignment: u8, + kind: enum(u8) { + load, + store, + reference_binding, + member_access, + member_call, + constructor_call, + downcast_pointer, + downcast_reference, + upcast, + upcast_to_virtual_base, + nonnull_assign, + dynamic_operation, + }, +}; + +fn simpleHandler( + comptime sym_name: []const u8, + comptime error_name: []const u8, + comptime abort: bool, +) void { + const S = struct { + fn handler() callconv(.C) noreturn { + logMessage("{s}", .{error_name}); + } + }; + exportHandler(&S.handler, sym_name, abort); +} + +inline fn logMessage(comptime fmt: []const u8, args: anytype) noreturn { + std.debug.print(fmt, args); + std.debug.dumpCurrentStackTrace(@returnAddress()); + std.posix.abort(); +} + +fn exportHandler( + handler: anytype, + comptime sym_name: []const u8, + comptime abort: bool, +) void { + const linkage = if (builtin.is_test) .internal else .weak; + { + const N = "__ubsan_handle_" ++ sym_name; + @export(handler, .{ .name = N, .linkage = linkage }); + } + if (abort) { + const N = "__ubsan_handle_" ++ sym_name ++ "_abort"; + @export(handler, .{ .name = N, .linkage = linkage }); + } +} + +comptime { + overflowHandler("add_overflow", "+"); + overflowHandler("sub_overflow", "-"); + overflowHandler("mul_overflow", "*"); + exportHandler(&negationHandler, "negate_overflow", true); + exportHandler(&divRemHandler, "divrem_overflow", true); + exportHandler(&alignmentAssumptionHandler, "alignment_assumption", true); + exportHandler(&shiftOob, "shift_out_of_bounds", true); + exportHandler(&outOfBounds, "out_of_bounds", true); + exportHandler(&pointerOverflow, "pointer_overflow", true); + + simpleHandler("type_mismatch_v1", "type-mismatch-v1", true); + simpleHandler("builtin_unreachable", "builtin-unreachable", false); + simpleHandler("missing_return", "missing-return", false); + simpleHandler("vla_bound_not_positive", "vla-bound-not-positive", true); + simpleHandler("float_cast_overflow", "float-cast-overflow", true); + simpleHandler("load_invalid_value", "load-invalid-value", true); + simpleHandler("invalid_builtin", "invalid-builtin", true); + simpleHandler("function_type_mismatch", "function-type-mismatch", true); + simpleHandler("implicit_conversion", "implicit-conversion", true); + simpleHandler("nonnull_arg", "nonnull-arg", true); + simpleHandler("nonnull_return", "nonnull-return", true); + simpleHandler("nullability_arg", "nullability-arg", true); + simpleHandler("nullability_return", "nullability-return", true); + simpleHandler("cfi_check_fail", "cfi-check-fail", true); + simpleHandler("function_type_mismatch_v1", "function-type-mismatch-v1", true); + + // these checks are nearly impossible to duplicate in zig, as they rely on nuances + // in the Itanium C++ ABI. + simpleHandler("dynamic_type_cache_miss", "dynamic-type-cache-miss", true); + simpleHandler("vptr_type_cache", "vptr-type-cache", true); +} diff --git a/src/Compilation.zig b/src/Compilation.zig index bec6696c92f0..4d475f77548f 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -5916,7 +5916,7 @@ pub fn addCCArgs( // These args have to be added after the `-fsanitize` arg or // they won't take effect. if (mod.sanitize_c) { - try argv.append("-fsanitize-trap=undefined"); + try argv.append("-fno-sanitize=vptr"); // It is very common, and well-defined, for a pointer on one side of a C ABI // to have a different but compatible element type. Examples include: // `char*` vs `uint8_t*` on a system with 8-bit bytes From eef8d4ff4fff849c7b214bed05e49e7906cc7809 Mon Sep 17 00:00:00 2001 From: David Rubin Date: Tue, 17 Dec 2024 04:29:39 -0800 Subject: [PATCH 02/22] ubsan: switch to using `std.builtin.panicExtra` to log errors --- lib/std/ubsan.zig | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/lib/std/ubsan.zig b/lib/std/ubsan.zig index 83652795eafb..cbfb2bbc62db 100644 --- a/lib/std/ubsan.zig +++ b/lib/std/ubsan.zig @@ -146,7 +146,7 @@ fn overflowHandler( const is_signed = data.type_descriptor.isSigned(); const fmt = "{s} integer overflow: " ++ "{} " ++ - operator ++ " {} cannot be represented in type {s}\n"; + operator ++ " {} cannot be represented in type {s}"; logMessage(fmt, .{ if (is_signed) "signed" else "unsigned", @@ -166,7 +166,7 @@ fn negationHandler( ) callconv(.C) noreturn { const old_value = old_value_handle.getValue(data); logMessage( - "negation of {} cannot be represented in type {s}\n", + "negation of {} cannot be represented in type {s}", .{ old_value, data.type_descriptor.getName() }, ); } @@ -182,10 +182,10 @@ fn divRemHandler( if (is_signed and rhs.getSignedInteger() == -1) { logMessage( - "division of {} by -1 cannot be represented in type {s}\n", + "division of {} by -1 cannot be represented in type {s}", .{ lhs, data.type_descriptor.getName() }, ); - } else logMessage("division by zero\n", .{}); + } else logMessage("division by zero", .{}); } const AlignmentAssumptionData = extern struct { @@ -212,12 +212,12 @@ fn alignmentAssumptionHandler( if (maybe_offset) |offset| { logMessage( - "assumption of {} byte alignment (with offset of {} byte) for pointer of type {s} failed\n", + "assumption of {} byte alignment (with offset of {} byte) for pointer of type {s} failed", .{ alignment.getValue(data), @intFromPtr(offset), data.type_descriptor.getName() }, ); } else { logMessage( - "assumption of {} byte alignment for pointer of type {s} failed\n", + "assumption of {} byte alignment for pointer of type {s} failed", .{ alignment.getValue(data), data.type_descriptor.getName() }, ); } @@ -241,19 +241,19 @@ fn shiftOob( rhs.getPositiveInteger() >= data.lhs_type.getIntegerSize()) { if (rhs.isNegative()) { - logMessage("shift exponent {} is negative\n", .{rhs}); + logMessage("shift exponent {} is negative", .{rhs}); } else { logMessage( - "shift exponent {} is too large for {}-bit type {s}\n", + "shift exponent {} is too large for {}-bit type {s}", .{ rhs, data.lhs_type.getIntegerSize(), data.lhs_type.getName() }, ); } } else { if (lhs.isNegative()) { - logMessage("left shift of negative value {}\n", .{lhs}); + logMessage("left shift of negative value {}", .{lhs}); } else { logMessage( - "left shift of {} by {} places cannot be represented in type {s}\n", + "left shift of {} by {} places cannot be represented in type {s}", .{ lhs, rhs, data.lhs_type.getName() }, ); } @@ -269,7 +269,7 @@ const OutOfBoundsData = extern struct { fn outOfBounds(data: *const OutOfBoundsData, index_handle: ValueHandle) callconv(.C) noreturn { const index: Value = .{ .handle = index_handle, .type_descriptor = data.index_type }; logMessage( - "index {} out of bounds for type {s}\n", + "index {} out of bounds for type {s}", .{ index, data.array_type.getName() }, ); } @@ -285,14 +285,14 @@ fn pointerOverflow( ) callconv(.C) noreturn { if (base == 0) { if (result == 0) { - logMessage("applying zero offset to null pointer\n", .{}); + logMessage("applying zero offset to null pointer", .{}); } else { - logMessage("applying non-zero offset {} to null pointer\n", .{result}); + logMessage("applying non-zero offset {} to null pointer", .{result}); } } else { if (result == 0) { logMessage( - "applying non-zero offset to non-null pointer 0x{x} produced null pointer\n", + "applying non-zero offset to non-null pointer 0x{x} produced null pointer", .{base}, ); } else { @@ -335,9 +335,7 @@ fn simpleHandler( } inline fn logMessage(comptime fmt: []const u8, args: anytype) noreturn { - std.debug.print(fmt, args); - std.debug.dumpCurrentStackTrace(@returnAddress()); - std.posix.abort(); + std.debug.panicExtra(null, @returnAddress(), fmt, args); } fn exportHandler( From c27b7973c9a54cd4ae0f8c9aa489fca50e13542b Mon Sep 17 00:00:00 2001 From: David Rubin Date: Wed, 18 Dec 2024 08:19:06 -0800 Subject: [PATCH 03/22] Compilation: use the minimal runtime in `ReleaseSafe` --- lib/std/ubsan.zig | 80 +++++++++++++++++++++++++++++++-------------- src/Compilation.zig | 4 +++ 2 files changed, 60 insertions(+), 24 deletions(-) diff --git a/lib/std/ubsan.zig b/lib/std/ubsan.zig index cbfb2bbc62db..42da3c4cd5e4 100644 --- a/lib/std/ubsan.zig +++ b/lib/std/ubsan.zig @@ -321,17 +321,12 @@ const TypeMismatchData = extern struct { }, }; -fn simpleHandler( - comptime sym_name: []const u8, - comptime error_name: []const u8, - comptime abort: bool, -) void { - const S = struct { +fn SimpleHandler(comptime error_name: []const u8) type { + return struct { fn handler() callconv(.C) noreturn { logMessage("{s}", .{error_name}); } }; - exportHandler(&S.handler, sym_name, abort); } inline fn logMessage(comptime fmt: []const u8, args: anytype) noreturn { @@ -354,6 +349,31 @@ fn exportHandler( } } +fn exportMinimal( + handler: anytype, + comptime sym_name: []const u8, + comptime abort: bool, +) void { + const linkage = if (builtin.is_test) .internal else .weak; + { + const N = "__ubsan_handle_" ++ sym_name ++ "_minimal"; + @export(handler, .{ .name = N, .linkage = linkage }); + } + if (abort) { + const N = "__ubsan_handle_" ++ sym_name ++ "_minimal_abort"; + @export(handler, .{ .name = N, .linkage = linkage }); + } +} + +fn exportHelper( + comptime err_name: []const u8, + comptime sym_name: []const u8, + comptime abort: bool, +) void { + exportHandler(&SimpleHandler(err_name).handler, sym_name, abort); + exportMinimal(&SimpleHandler(err_name).handler, sym_name, abort); +} + comptime { overflowHandler("add_overflow", "+"); overflowHandler("sub_overflow", "-"); @@ -365,24 +385,36 @@ comptime { exportHandler(&outOfBounds, "out_of_bounds", true); exportHandler(&pointerOverflow, "pointer_overflow", true); - simpleHandler("type_mismatch_v1", "type-mismatch-v1", true); - simpleHandler("builtin_unreachable", "builtin-unreachable", false); - simpleHandler("missing_return", "missing-return", false); - simpleHandler("vla_bound_not_positive", "vla-bound-not-positive", true); - simpleHandler("float_cast_overflow", "float-cast-overflow", true); - simpleHandler("load_invalid_value", "load-invalid-value", true); - simpleHandler("invalid_builtin", "invalid-builtin", true); - simpleHandler("function_type_mismatch", "function-type-mismatch", true); - simpleHandler("implicit_conversion", "implicit-conversion", true); - simpleHandler("nonnull_arg", "nonnull-arg", true); - simpleHandler("nonnull_return", "nonnull-return", true); - simpleHandler("nullability_arg", "nullability-arg", true); - simpleHandler("nullability_return", "nullability-return", true); - simpleHandler("cfi_check_fail", "cfi-check-fail", true); - simpleHandler("function_type_mismatch_v1", "function-type-mismatch-v1", true); + exportMinimal("add-overflow", "add_overflow", true); + exportMinimal("sub-overflow", "sub_overflow", true); + exportMinimal("mul-overflow", "mul_overflow", true); + exportMinimal("negation-handler", "negate_overflow", true); + exportMinimal("divrem-handler", "divrem_overflow", true); + exportMinimal("alignment-assumption-handler", "alignment_assumption", true); + exportMinimal("shift-oob", "shift_out_of_bounds", true); + exportMinimal("out-of-bounds", "out_of_bounds", true); + exportMinimal("pointer-overflow", "pointer_overflow", true); + + exportHandler(&SimpleHandler("type-mismatch-v1").handler, "type_mismatch_v1", true); + exportMinimal(&SimpleHandler("type-mismatch").handler, "type_mismatch", true); + + exportHelper("builtin-unreachable", "builtin_unreachable", true); + exportHelper("missing-return", "missing_return", false); + exportHelper("vla-bound-not-positive", "vla_bound_not_positive", true); + exportHelper("float-cast-overflow", "float_cast_overflow", true); + exportHelper("load-invalid-value", "load_invalid_value", true); + exportHelper("invalid-builtin", "invalid_builtin", true); + exportHelper("function-type-mismatch", "function_type_mismatch", true); + exportHelper("implicit-conversion", "implicit_conversion", true); + exportHelper("nonnull-arg", "nonnull_arg", true); + exportHelper("nonnull-return", "nonnull_return", true); + exportHelper("nullability-arg", "nullability_arg", true); + exportHelper("nullability-return", "nullability_return", true); + exportHelper("cfi-check-fail", "cfi_check_fail", true); + exportHelper("function-type-mismatch-v1", "function_type_mismatch_v1", true); // these checks are nearly impossible to duplicate in zig, as they rely on nuances // in the Itanium C++ ABI. - simpleHandler("dynamic_type_cache_miss", "dynamic-type-cache-miss", true); - simpleHandler("vptr_type_cache", "vptr-type-cache", true); + // exportHelper("dynamic_type_cache_miss", "dynamic-type-cache-miss", true); + // exportHelper("vptr_type_cache", "vptr-type-cache", true); } diff --git a/src/Compilation.zig b/src/Compilation.zig index 4d475f77548f..473006c2ea99 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -5925,6 +5925,10 @@ pub fn addCCArgs( // Without this flag, Clang would invoke UBSAN when such an extern // function was called. try argv.append("-fno-sanitize=function"); + + if (mod.optimize_mode == .ReleaseSafe) { + try argv.append("-fsanitize-minimal-runtime"); + } } } From babee5f73c01c220e3fb3901eb1a70149f94258b Mon Sep 17 00:00:00 2001 From: David Rubin Date: Wed, 25 Dec 2024 03:52:12 -0800 Subject: [PATCH 04/22] ubsan: implement some more checks --- lib/std/ubsan.zig | 145 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 117 insertions(+), 28 deletions(-) diff --git a/lib/std/ubsan.zig b/lib/std/ubsan.zig index 42da3c4cd5e4..0e2518947995 100644 --- a/lib/std/ubsan.zig +++ b/lib/std/ubsan.zig @@ -140,7 +140,7 @@ fn overflowHandler( data: *OverflowData, lhs_handle: ValueHandle, rhs_handle: ValueHandle, - ) callconv(.C) noreturn { + ) callconv(.c) noreturn { const lhs = lhs_handle.getValue(data); const rhs = rhs_handle.getValue(data); @@ -163,7 +163,7 @@ fn overflowHandler( fn negationHandler( data: *const OverflowData, old_value_handle: ValueHandle, -) callconv(.C) noreturn { +) callconv(.c) noreturn { const old_value = old_value_handle.getValue(data); logMessage( "negation of {} cannot be represented in type {s}", @@ -175,7 +175,7 @@ fn divRemHandler( data: *const OverflowData, lhs_handle: ValueHandle, rhs_handle: ValueHandle, -) callconv(.C) noreturn { +) callconv(.c) noreturn { const is_signed = data.type_descriptor.isSigned(); const lhs = lhs_handle.getValue(data); const rhs = rhs_handle.getValue(data); @@ -199,7 +199,7 @@ fn alignmentAssumptionHandler( pointer: ValueHandle, alignment: ValueHandle, maybe_offset: ?ValueHandle, -) callconv(.C) noreturn { +) callconv(.c) noreturn { _ = pointer; // TODO: add the hint here? // const real_pointer = @intFromPtr(pointer) - @intFromPtr(maybe_offset); @@ -233,7 +233,7 @@ fn shiftOob( data: *const ShiftOobData, lhs_handle: ValueHandle, rhs_handle: ValueHandle, -) callconv(.C) noreturn { +) callconv(.c) noreturn { const lhs: Value = .{ .handle = lhs_handle, .type_descriptor = data.lhs_type }; const rhs: Value = .{ .handle = rhs_handle, .type_descriptor = data.rhs_type }; @@ -266,7 +266,7 @@ const OutOfBoundsData = extern struct { index_type: *const TypeDescriptor, }; -fn outOfBounds(data: *const OutOfBoundsData, index_handle: ValueHandle) callconv(.C) noreturn { +fn outOfBounds(data: *const OutOfBoundsData, index_handle: ValueHandle) callconv(.c) noreturn { const index: Value = .{ .handle = index_handle, .type_descriptor = data.index_type }; logMessage( "index {} out of bounds for type {s}", @@ -282,7 +282,7 @@ fn pointerOverflow( _: *const PointerOverflowData, base: usize, result: usize, -) callconv(.C) noreturn { +) callconv(.c) noreturn { if (base == 0) { if (result == 0) { logMessage("applying zero offset to null pointer", .{}); @@ -318,12 +318,100 @@ const TypeMismatchData = extern struct { upcast_to_virtual_base, nonnull_assign, dynamic_operation, + + fn getName(kind: @This()) []const u8 { + return switch (kind) { + .load => "load of", + .store => "store of", + .reference_binding => "reference binding to", + .member_access => "member access within", + .member_call => "member call on", + .constructor_call => "constructor call on", + .downcast_pointer, .downcast_reference => "downcast of", + .upcast => "upcast of", + .upcast_to_virtual_base => "cast to virtual base of", + .nonnull_assign => "_Nonnull binding to", + .dynamic_operation => "dynamic operation on", + }; + } }, }; +fn typeMismatch( + data: *const TypeMismatchData, + pointer: ?ValueHandle, +) callconv(.c) noreturn { + const alignment = @as(usize, 1) << @intCast(data.log_alignment); + const handle: usize = @intFromPtr(pointer); + + if (pointer == null) { + logMessage( + "{s} null pointer of type {s}", + .{ data.kind.getName(), data.type_descriptor.getName() }, + ); + } else if (!std.mem.isAligned(handle, alignment)) { + logMessage( + "{s} misaligned address 0x{x} for type {s}, which requires {} byte alignment", + .{ data.kind.getName(), handle, data.type_descriptor.getName(), alignment }, + ); + } else { + logMessage( + "{s} address 0x{x} with insufficient space for an object of type {s}", + .{ data.kind.getName(), handle, data.type_descriptor.getName() }, + ); + } +} + +const UnreachableData = extern struct { + loc: SourceLocation, +}; + +fn builtinUnreachable(_: *const UnreachableData) callconv(.c) noreturn { + logMessage("execution reached an unreachable program point", .{}); +} + +fn missingReturn(_: *const UnreachableData) callconv(.c) noreturn { + logMessage("execution reached the end of a value-returning function without returning a value", .{}); +} + +const NonNullReturnData = extern struct { + attribute_loc: SourceLocation, +}; + +fn nonNullReturn(_: *const NonNullReturnData) callconv(.c) noreturn { + logMessage("null pointer returned from function declared to never return null", .{}); +} + +const NonNullArgData = extern struct { + loc: SourceLocation, + attribute_loc: SourceLocation, + arg_index: i32, +}; + +fn nonNullArg(data: *const NonNullArgData) callconv(.c) noreturn { + logMessage( + "null pointer passed as argument {}, which is declared to never be null", + .{data.arg_index}, + ); +} + +const InvalidValueData = extern struct { + loc: SourceLocation, + type_descriptor: *const TypeDescriptor, +}; + +fn loadInvalidValue( + data: *const InvalidValueData, + value_handle: ValueHandle, +) callconv(.c) noreturn { + logMessage("load of value {}, which is not valid for type {s}", .{ + value_handle.getValue(data), data.type_descriptor.getName(), + }); +} + fn SimpleHandler(comptime error_name: []const u8) type { return struct { - fn handler() callconv(.C) noreturn { + fn handler() callconv(.c) noreturn { logMessage("{s}", .{error_name}); } }; @@ -350,10 +438,11 @@ fn exportHandler( } fn exportMinimal( - handler: anytype, + err_name: anytype, comptime sym_name: []const u8, comptime abort: bool, ) void { + const handler = &SimpleHandler(err_name).handler; const linkage = if (builtin.is_test) .internal else .weak; { const N = "__ubsan_handle_" ++ sym_name ++ "_minimal"; @@ -371,7 +460,7 @@ fn exportHelper( comptime abort: bool, ) void { exportHandler(&SimpleHandler(err_name).handler, sym_name, abort); - exportMinimal(&SimpleHandler(err_name).handler, sym_name, abort); + exportMinimal(err_name, sym_name, abort); } comptime { @@ -384,35 +473,35 @@ comptime { exportHandler(&shiftOob, "shift_out_of_bounds", true); exportHandler(&outOfBounds, "out_of_bounds", true); exportHandler(&pointerOverflow, "pointer_overflow", true); + exportHandler(&typeMismatch, "type_mismatch_v1", true); + exportHandler(&builtinUnreachable, "builtin_unreachable", false); + exportHandler(&missingReturn, "missing_return", false); + exportHandler(&nonNullReturn, "nonnull_return_v1", true); + exportHandler(&nonNullArg, "nonnull_arg", true); + exportHandler(&loadInvalidValue, "load_invalid_value", true); - exportMinimal("add-overflow", "add_overflow", true); - exportMinimal("sub-overflow", "sub_overflow", true); - exportMinimal("mul-overflow", "mul_overflow", true); - exportMinimal("negation-handler", "negate_overflow", true); - exportMinimal("divrem-handler", "divrem_overflow", true); - exportMinimal("alignment-assumption-handler", "alignment_assumption", true); - exportMinimal("shift-oob", "shift_out_of_bounds", true); - exportMinimal("out-of-bounds", "out_of_bounds", true); - exportMinimal("pointer-overflow", "pointer_overflow", true); - - exportHandler(&SimpleHandler("type-mismatch-v1").handler, "type_mismatch_v1", true); - exportMinimal(&SimpleHandler("type-mismatch").handler, "type_mismatch", true); - - exportHelper("builtin-unreachable", "builtin_unreachable", true); - exportHelper("missing-return", "missing_return", false); exportHelper("vla-bound-not-positive", "vla_bound_not_positive", true); exportHelper("float-cast-overflow", "float_cast_overflow", true); - exportHelper("load-invalid-value", "load_invalid_value", true); exportHelper("invalid-builtin", "invalid_builtin", true); exportHelper("function-type-mismatch", "function_type_mismatch", true); exportHelper("implicit-conversion", "implicit_conversion", true); - exportHelper("nonnull-arg", "nonnull_arg", true); - exportHelper("nonnull-return", "nonnull_return", true); exportHelper("nullability-arg", "nullability_arg", true); exportHelper("nullability-return", "nullability_return", true); exportHelper("cfi-check-fail", "cfi_check_fail", true); exportHelper("function-type-mismatch-v1", "function_type_mismatch_v1", true); + exportMinimal("builtin-unreachable", "builtin_unreachable", false); + exportMinimal("add-overflow", "add_overflow", true); + exportMinimal("sub-overflow", "sub_overflow", true); + exportMinimal("mul-overflow", "mul_overflow", true); + exportMinimal("negation-handler", "negate_overflow", true); + exportMinimal("divrem-handler", "divrem_overflow", true); + exportMinimal("alignment-assumption-handler", "alignment_assumption", true); + exportMinimal("shift-oob", "shift_out_of_bounds", true); + exportMinimal("out-of-bounds", "out_of_bounds", true); + exportMinimal("pointer-overflow", "pointer_overflow", true); + exportMinimal("type-mismatch", "type_mismatch", true); + // these checks are nearly impossible to duplicate in zig, as they rely on nuances // in the Itanium C++ ABI. // exportHelper("dynamic_type_cache_miss", "dynamic-type-cache-miss", true); From 95720f007bb0dacc8a7cecb621c322d574c61d00 Mon Sep 17 00:00:00 2001 From: David Rubin Date: Wed, 25 Dec 2024 21:10:02 -0800 Subject: [PATCH 05/22] move libubsan to `lib/` and integrate it into `-fubsan-rt` --- lib/std/std.zig | 1 - lib/{std => }/ubsan.zig | 6 +-- src/Compilation.zig | 76 ++++++++++++++++++++++++++++++++++ src/link.zig | 7 ++++ src/link/Coff.zig | 9 ++++ src/link/Elf.zig | 10 +++++ src/link/MachO.zig | 26 +++++++++++- src/link/MachO/relocatable.zig | 4 ++ src/link/Wasm.zig | 10 +++++ src/main.zig | 6 +++ 10 files changed, 149 insertions(+), 6 deletions(-) rename lib/{std => }/ubsan.zig (98%) diff --git a/lib/std/std.zig b/lib/std/std.zig index 638b52b21732..558710015c09 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -44,7 +44,6 @@ pub const Thread = @import("Thread.zig"); pub const Treap = @import("treap.zig").Treap; pub const Tz = tz.Tz; pub const Uri = @import("Uri.zig"); -pub const ubsan = @import("ubsan.zig"); pub const array_hash_map = @import("array_hash_map.zig"); pub const atomic = @import("atomic.zig"); diff --git a/lib/std/ubsan.zig b/lib/ubsan.zig similarity index 98% rename from lib/std/ubsan.zig rename to lib/ubsan.zig index 0e2518947995..74b8f6273d47 100644 --- a/lib/std/ubsan.zig +++ b/lib/ubsan.zig @@ -73,8 +73,8 @@ const Value = extern struct { const size = value.type_descriptor.getIntegerSize(); const max_inline_size = @bitSizeOf(ValueHandle); if (size <= max_inline_size) { - const extra_bits: u6 = @intCast(max_inline_size - size); - const handle: i64 = @bitCast(@intFromPtr(value.handle)); + const extra_bits: std.math.Log2Int(usize) = @intCast(max_inline_size - size); + const handle: isize = @bitCast(@intFromPtr(value.handle)); return (handle << extra_bits) >> extra_bits; } return switch (size) { @@ -137,7 +137,7 @@ fn overflowHandler( ) void { const S = struct { fn handler( - data: *OverflowData, + data: *const OverflowData, lhs_handle: ValueHandle, rhs_handle: ValueHandle, ) callconv(.c) noreturn { diff --git a/src/Compilation.zig b/src/Compilation.zig index 473006c2ea99..e4bccdc2d8b6 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -79,6 +79,7 @@ implib_emit: ?Path, docs_emit: ?Path, root_name: [:0]const u8, include_compiler_rt: bool, +include_ubsan_rt: bool, /// Resolved into known paths, any GNU ld scripts already resolved. link_inputs: []const link.Input, /// Needed only for passing -F args to clang. @@ -226,6 +227,12 @@ libunwind_static_lib: ?CrtFile = null, /// Populated when we build the TSAN library. A Job to build this is placed in the queue /// and resolved before calling linker.flush(). tsan_lib: ?CrtFile = null, +/// Populated when we build the UBSAN library. A Job to build this is placed in the queue +/// and resolved before calling linker.flush(). +ubsan_rt_lib: ?CrtFile = null, +/// Populated when we build the UBSAN object. A Job to build this is placed in the queue +/// and resolved before calling linker.flush(). +ubsan_rt_obj: ?CrtFile = null, /// Populated when we build the libc static library. A Job to build this is placed in the queue /// and resolved before calling linker.flush(). libc_static_lib: ?CrtFile = null, @@ -283,6 +290,8 @@ digest: ?[Cache.bin_digest_len]u8 = null, const QueuedJobs = struct { compiler_rt_lib: bool = false, compiler_rt_obj: bool = false, + ubsan_rt_lib: bool = false, + ubsan_rt_obj: bool = false, fuzzer_lib: bool = false, update_builtin_zig: bool, musl_crt_file: [@typeInfo(musl.CrtFile).@"enum".fields.len]bool = @splat(false), @@ -789,6 +798,7 @@ pub const MiscTask = enum { libcxx, libcxxabi, libtsan, + libubsan, libfuzzer, wasi_libc_crt_file, compiler_rt, @@ -1064,6 +1074,7 @@ pub const CreateOptions = struct { /// Position Independent Executable. If the output mode is not an /// executable this field is ignored. want_compiler_rt: ?bool = null, + want_ubsan_rt: ?bool = null, want_lto: ?bool = null, function_sections: bool = false, data_sections: bool = false, @@ -1297,6 +1308,9 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil const include_compiler_rt = options.want_compiler_rt orelse (!options.skip_linker_dependencies and is_exe_or_dyn_lib); + const include_ubsan_rt = options.want_ubsan_rt orelse + (!options.skip_linker_dependencies and is_exe_or_dyn_lib); + if (include_compiler_rt and output_mode == .Obj) { // For objects, this mechanism relies on essentially `_ = @import("compiler-rt");` // injected into the object. @@ -1323,6 +1337,26 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil try options.root_mod.deps.putNoClobber(arena, "compiler_rt", compiler_rt_mod); } + if (include_ubsan_rt and output_mode == .Obj) { + const ubsan_rt_mod = try Package.Module.create(arena, .{ + .global_cache_directory = options.global_cache_directory, + .paths = .{ + .root = .{ + .root_dir = options.zig_lib_directory, + }, + .root_src_path = "ubsan.zig", + }, + .fully_qualified_name = "ubsan_rt", + .cc_argv = &.{}, + .inherited = .{}, + .global = options.config, + .parent = options.root_mod, + .builtin_mod = options.root_mod.getBuiltinDependency(), + .builtin_modules = null, // `builtin_mod` is set + }); + try options.root_mod.deps.putNoClobber(arena, "ubsan_rt", ubsan_rt_mod); + } + if (options.verbose_llvm_cpu_features) { if (options.root_mod.resolved_target.llvm_cpu_features) |cf| print: { const target = options.root_mod.resolved_target.result; @@ -1500,6 +1534,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil .version = options.version, .libc_installation = libc_dirs.libc_installation, .include_compiler_rt = include_compiler_rt, + .include_ubsan_rt = include_ubsan_rt, .link_inputs = options.link_inputs, .framework_dirs = options.framework_dirs, .llvm_opt_bisect_limit = options.llvm_opt_bisect_limit, @@ -1885,6 +1920,16 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil } } + if (comp.include_ubsan_rt and capable_of_building_compiler_rt) { + if (is_exe_or_dyn_lib) { + log.debug("queuing a job to build ubsan_rt_lib", .{}); + comp.job_queued_ubsan_rt_lib = true; + } else if (output_mode != .Obj) { + log.debug("queuing a job to build ubsan_rt_obj", .{}); + comp.job_queued_ubsan_rt_obj = true; + } + } + if (is_exe_or_dyn_lib and comp.config.any_fuzz and capable_of_building_compiler_rt) { log.debug("queuing a job to build libfuzzer", .{}); comp.queued_jobs.fuzzer_lib = true; @@ -1937,9 +1982,16 @@ pub fn destroy(comp: *Compilation) void { if (comp.compiler_rt_obj) |*crt_file| { crt_file.deinit(gpa); } + if (comp.ubsan_rt_lib) |*crt_file| { + crt_file.deinit(gpa); + } + if (comp.ubsan_rt_obj) |*crt_file| { + crt_file.deinit(gpa); + } if (comp.fuzzer_lib) |*crt_file| { crt_file.deinit(gpa); } + if (comp.libc_static_lib) |*crt_file| { crt_file.deinit(gpa); } @@ -2207,6 +2259,10 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void { _ = try pt.importPkg(zcu.main_mod); } + if (zcu.root_mod.deps.get("ubsan_rt")) |ubsan_rt_mod| { + _ = try pt.importPkg(ubsan_rt_mod); + } + if (zcu.root_mod.deps.get("compiler_rt")) |compiler_rt_mod| { _ = try pt.importPkg(compiler_rt_mod); } @@ -2248,6 +2304,11 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void { try comp.queueJob(.{ .analyze_mod = compiler_rt_mod }); zcu.analysis_roots.appendAssumeCapacity(compiler_rt_mod); } + + if (zcu.root_mod.deps.get("ubsan_rt")) |ubsan_rt_mod| { + try comp.queueJob(.{ .analyze_mod = ubsan_rt_mod }); + zcu.analysis_roots.appendAssumeCapacity(ubsan_rt_mod); + } } try comp.performAllTheWork(main_progress_node); @@ -2593,6 +2654,7 @@ fn addNonIncrementalStuffToCacheManifest( man.hash.add(comp.link_eh_frame_hdr); man.hash.add(comp.skip_linker_dependencies); man.hash.add(comp.include_compiler_rt); + man.hash.add(comp.include_ubsan_rt); man.hash.add(comp.rc_includes); man.hash.addListOfBytes(comp.force_undefined_symbols.keys()); man.hash.addListOfBytes(comp.framework_dirs); @@ -3683,6 +3745,14 @@ fn performAllTheWorkInner( comp.link_task_wait_group.spawnManager(buildRt, .{ comp, "fuzzer.zig", .libfuzzer, .Lib, true, &comp.fuzzer_lib, main_progress_node }); } + if (comp.queued_jobs.ubsan_rt_lib and comp.ubsan_rt_lib == null) { + comp.link_task_wait_group.spawnManager(buildRt, .{ comp, "ubsan.zig", .libubsan, .Lib, &comp.ubsan_rt_lib, main_progress_node }); + } + + if (comp.queued_jobs.ubsan_rt_obj and comp.ubsan_rt_obj == null) { + comp.link_task_wait_group.spawnManager(buildRt, .{ comp, "ubsan.zig", .libubsan, .Obj, &comp.ubsan_rt_obj, main_progress_node }); + } + if (comp.queued_jobs.glibc_shared_objects) { comp.link_task_wait_group.spawnManager(buildGlibcSharedObjects, .{ comp, main_progress_node }); } @@ -5916,7 +5986,11 @@ pub fn addCCArgs( // These args have to be added after the `-fsanitize` arg or // they won't take effect. if (mod.sanitize_c) { + // This check requires implementing the Itanium C++ ABI. + // We would make it `-fsanitize-trap=vptr`, however this check requires + // a full runtime due to the type hashing involved. try argv.append("-fno-sanitize=vptr"); + // It is very common, and well-defined, for a pointer on one side of a C ABI // to have a different but compatible element type. Examples include: // `char*` vs `uint8_t*` on a system with 8-bit bytes @@ -5926,6 +6000,8 @@ pub fn addCCArgs( // function was called. try argv.append("-fno-sanitize=function"); + // It's recommended to use the minimal runtime in production environments + // due to the security implications of the full runtime. if (mod.optimize_mode == .ReleaseSafe) { try argv.append("-fsanitize-minimal-runtime"); } diff --git a/src/link.zig b/src/link.zig index 4ab71493d086..82b4e6339798 100644 --- a/src/link.zig +++ b/src/link.zig @@ -1107,6 +1107,11 @@ pub const File = struct { else null; + const ubsan_rt_path: ?Path = if (comp.include_ubsan_rt) + comp.ubsan_rt_obj.?.full_object_path + else + null; + // This function follows the same pattern as link.Elf.linkWithLLD so if you want some // insight as to what's going on here you can read that function body which is more // well-commented. @@ -1136,6 +1141,7 @@ pub const File = struct { } try man.addOptionalFile(zcu_obj_path); try man.addOptionalFilePath(compiler_rt_path); + try man.addOptionalFilePath(ubsan_rt_path); // We don't actually care whether it's a cache hit or miss; we just need the digest and the lock. _ = try man.hit(); @@ -1181,6 +1187,7 @@ pub const File = struct { } if (zcu_obj_path) |p| object_files.appendAssumeCapacity(try arena.dupeZ(u8, p)); if (compiler_rt_path) |p| object_files.appendAssumeCapacity(try p.toStringZ(arena)); + if (ubsan_rt_path) |p| object_files.appendAssumeCapacity(try p.toStringZ(arena)); if (comp.verbose_link) { std.debug.print("ar rcs {s}", .{full_out_path_z}); diff --git a/src/link/Coff.zig b/src/link/Coff.zig index 44f07959692e..9307c23669c8 100644 --- a/src/link/Coff.zig +++ b/src/link/Coff.zig @@ -2162,6 +2162,15 @@ fn linkWithLLD(coff: *Coff, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: try argv.append(try comp.fuzzer_lib.?.full_object_path.toString(arena)); } + const ubsan_rt_path: ?Path = blk: { + if (comp.ubsan_rt_lib) |x| break :blk x.full_object_path; + if (comp.ubsan_rt_obj) |x| break :blk x.full_object_path; + break :blk null; + }; + if (ubsan_rt_path) |path| { + try argv.append(try path.toString(arena)); + } + if (is_exe_or_dyn_lib and !comp.skip_linker_dependencies) { if (!comp.config.link_libc) { if (comp.libc_static_lib) |lib| { diff --git a/src/link/Elf.zig b/src/link/Elf.zig index 990dacf67fb5..53f88101b199 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -1541,6 +1541,11 @@ fn linkWithLLD(self: *Elf, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: s if (comp.compiler_rt_obj) |x| break :blk x.full_object_path; break :blk null; }; + const ubsan_rt_path: ?Path = blk: { + if (comp.ubsan_rt_lib) |x| break :blk x.full_object_path; + if (comp.ubsan_rt_obj) |x| break :blk x.full_object_path; + break :blk null; + }; // Here we want to determine whether we can save time by not invoking LLD when the // output is unchanged. None of the linker options or the object files that are being @@ -1575,6 +1580,7 @@ fn linkWithLLD(self: *Elf, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: s } try man.addOptionalFile(module_obj_path); try man.addOptionalFilePath(compiler_rt_path); + try man.addOptionalFilePath(ubsan_rt_path); try man.addOptionalFilePath(if (comp.tsan_lib) |l| l.full_object_path else null); try man.addOptionalFilePath(if (comp.fuzzer_lib) |l| l.full_object_path else null); @@ -1974,6 +1980,10 @@ fn linkWithLLD(self: *Elf, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: s try argv.append(try lib.full_object_path.toString(arena)); } + if (ubsan_rt_path) |p| { + try argv.append(try p.toString(arena)); + } + // libc if (is_exe_or_dyn_lib and !comp.skip_linker_dependencies and diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 486bca185716..fb30788e690b 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -344,11 +344,21 @@ pub fn deinit(self: *MachO) void { self.thunks.deinit(gpa); } -pub fn flush(self: *MachO, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: std.Progress.Node) link.File.FlushError!void { +pub fn flush( + self: *MachO, + arena: Allocator, + tid: Zcu.PerThread.Id, + prog_node: std.Progress.Node, +) link.File.FlushError!void { try self.flushModule(arena, tid, prog_node); } -pub fn flushModule(self: *MachO, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: std.Progress.Node) link.File.FlushError!void { +pub fn flushModule( + self: *MachO, + arena: Allocator, + tid: Zcu.PerThread.Id, + prog_node: std.Progress.Node, +) link.File.FlushError!void { const tracy = trace(@src()); defer tracy.end(); @@ -409,6 +419,16 @@ pub fn flushModule(self: *MachO, arena: Allocator, tid: Zcu.PerThread.Id, prog_n try positionals.append(try link.openObjectInput(diags, comp.fuzzer_lib.?.full_object_path)); } + if (comp.ubsan_rt_lib) |crt_file| { + const path = crt_file.full_object_path; + self.classifyInputFile(try link.openArchiveInput(diags, path, false, false)) catch |err| + diags.addParseError(path, "failed to parse archive: {s}", .{@errorName(err)}); + } else if (comp.ubsan_rt_obj) |crt_file| { + const path = crt_file.full_object_path; + self.classifyInputFile(try link.openObjectInput(diags, path)) catch |err| + diags.addParseError(path, "failed to parse archive: {s}", .{@errorName(err)}); + } + for (positionals.items) |link_input| { self.classifyInputFile(link_input) catch |err| diags.addParseError(link_input.path().?, "failed to read input file: {s}", .{@errorName(err)}); @@ -813,6 +833,8 @@ fn dumpArgv(self: *MachO, comp: *Compilation) !void { if (comp.compiler_rt_lib) |lib| try argv.append(try lib.full_object_path.toString(arena)); if (comp.compiler_rt_obj) |obj| try argv.append(try obj.full_object_path.toString(arena)); + if (comp.ubsan_rt_lib) |lib| try argv.append(try lib.full_object_path.toString(arena)); + if (comp.ubsan_rt_obj) |obj| try argv.append(try obj.full_object_path.toString(arena)); } Compilation.dump_argv(argv.items); diff --git a/src/link/MachO/relocatable.zig b/src/link/MachO/relocatable.zig index d090a2c9adfd..6b9557dfd4f1 100644 --- a/src/link/MachO/relocatable.zig +++ b/src/link/MachO/relocatable.zig @@ -97,6 +97,10 @@ pub fn flushStaticLib(macho_file: *MachO, comp: *Compilation, module_obj_path: ? try positionals.append(try link.openObjectInput(diags, comp.compiler_rt_obj.?.full_object_path)); } + if (comp.include_ubsan_rt) { + try positionals.append(try link.openObjectInput(diags, comp.ubsan_rt_obj.?.full_object_path)); + } + for (positionals.items) |link_input| { macho_file.classifyInputFile(link_input) catch |err| diags.addParseError(link_input.path().?, "failed to read input file: {s}", .{@errorName(err)}); diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 80efa6b79ac0..dda48b09d52e 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -3879,6 +3879,11 @@ fn linkWithLLD(wasm: *Wasm, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: if (comp.compiler_rt_obj) |obj| break :blk obj.full_object_path; break :blk null; }; + const ubsan_rt_path: ?Path = blk: { + if (comp.ubsan_rt_lib) |lib| break :blk lib.full_object_path; + if (comp.ubsan_rt_obj) |obj| break :blk obj.full_object_path; + break :blk null; + }; const id_symlink_basename = "lld.id"; @@ -3901,6 +3906,7 @@ fn linkWithLLD(wasm: *Wasm, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: } try man.addOptionalFile(module_obj_path); try man.addOptionalFilePath(compiler_rt_path); + try man.addOptionalFilePath(ubsan_rt_path); man.hash.addOptionalBytes(wasm.entry_name.slice(wasm)); man.hash.add(wasm.base.stack_size); man.hash.add(wasm.base.build_id); @@ -4148,6 +4154,10 @@ fn linkWithLLD(wasm: *Wasm, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: try argv.append(try p.toString(arena)); } + if (ubsan_rt_path) |p| { + try argv.append(try p.toStringZ(arena)); + } + if (comp.verbose_link) { // Skip over our own name so that the LLD linker name is the first argv item. Compilation.dump_argv(argv.items[1..]); diff --git a/src/main.zig b/src/main.zig index 97a25a39fae3..82c20681c6bf 100644 --- a/src/main.zig +++ b/src/main.zig @@ -849,6 +849,7 @@ fn buildOutputType( var emit_h: Emit = .no; var soname: SOName = undefined; var want_compiler_rt: ?bool = null; + var want_ubsan_rt: ?bool = null; var linker_script: ?[]const u8 = null; var version_script: ?[]const u8 = null; var linker_repro: ?bool = null; @@ -1376,6 +1377,10 @@ fn buildOutputType( want_compiler_rt = true; } else if (mem.eql(u8, arg, "-fno-compiler-rt")) { want_compiler_rt = false; + } else if (mem.eql(u8, arg, "-fubsan-rt")) { + want_ubsan_rt = true; + } else if (mem.eql(u8, arg, "-fno-ubsan-rt")) { + want_ubsan_rt = false; } else if (mem.eql(u8, arg, "-feach-lib-rpath")) { create_module.each_lib_rpath = true; } else if (mem.eql(u8, arg, "-fno-each-lib-rpath")) { @@ -3504,6 +3509,7 @@ fn buildOutputType( .windows_lib_names = create_module.windows_libs.keys(), .wasi_emulated_libs = create_module.wasi_emulated_libs.items, .want_compiler_rt = want_compiler_rt, + .want_ubsan_rt = want_ubsan_rt, .hash_style = hash_style, .linker_script = linker_script, .version_script = version_script, From fc776783391b16ef77df05581412746bdb83ffa6 Mon Sep 17 00:00:00 2001 From: David Rubin Date: Thu, 26 Dec 2024 02:23:21 -0800 Subject: [PATCH 06/22] mem: add `@branchHint` to `indexOfSentinel` also seems to work around aarch64 LLVM miscompilation :thinking: --- lib/std/mem.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index be1153935af6..0fa7deadc3d0 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -1119,6 +1119,7 @@ pub fn indexOfSentinel(comptime T: type, comptime sentinel: T, p: [*:sentinel]co i += @divExact(std.mem.alignForward(usize, start_addr, block_size) - start_addr, @sizeOf(T)); } else { + @branchHint(.unlikely); // Would read over a page boundary. Per-byte at a time until aligned or found. // 0.39% chance this branch is taken for 4K pages at 16b block length. // From 590c613182d096e150dd9c882ab3bc6b20028011 Mon Sep 17 00:00:00 2001 From: David Rubin Date: Thu, 26 Dec 2024 02:27:22 -0800 Subject: [PATCH 07/22] ubsan: implement more checks --- lib/ubsan.zig | 91 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 7 deletions(-) diff --git a/lib/ubsan.zig b/lib/ubsan.zig index 74b8f6273d47..0142adddc3c3 100644 --- a/lib/ubsan.zig +++ b/lib/ubsan.zig @@ -26,6 +26,7 @@ const TypeDescriptor = extern struct { signed: bool, bit_width: u15, }, + float: u16, }; fn getIntegerSize(desc: TypeDescriptor) u64 { @@ -80,10 +81,25 @@ const Value = extern struct { return switch (size) { 64 => @as(*const i64, @alignCast(@ptrCast(value.handle))).*, 128 => @as(*const i128, @alignCast(@ptrCast(value.handle))).*, - else => unreachable, + else => @trap(), }; } + fn getFloat(value: Value) c_longdouble { + assert(value.type_descriptor.kind == .float); + const size = value.type_descriptor.info.float; + const max_inline_size = @bitSizeOf(ValueHandle); + if (size <= max_inline_size) { + return @bitCast(@intFromPtr(value.handle)); + } + return @floatCast(switch (size) { + 64 => @as(*const f64, @alignCast(@ptrCast(value.handle))).*, + 80 => @as(*const f80, @alignCast(@ptrCast(value.handle))).*, + 128 => @as(*const f128, @alignCast(@ptrCast(value.handle))).*, + else => @trap(), + }); + } + fn isMinusOne(value: Value) bool { return value.type_descriptor.isSigned() and value.getSignedInteger() == -1; @@ -120,7 +136,7 @@ const Value = extern struct { try writer.print("{}", .{value.getUnsignedInteger()}); } }, - .float => @panic("TODO: write float"), + .float => try writer.print("{}", .{value.getFloat()}), .unknown => try writer.writeAll("(unknown)"), } } @@ -176,11 +192,10 @@ fn divRemHandler( lhs_handle: ValueHandle, rhs_handle: ValueHandle, ) callconv(.c) noreturn { - const is_signed = data.type_descriptor.isSigned(); const lhs = lhs_handle.getValue(data); const rhs = rhs_handle.getValue(data); - if (is_signed and rhs.getSignedInteger() == -1) { + if (rhs.isMinusOne()) { logMessage( "division of {} by -1 cannot be represented in type {s}", .{ lhs, data.type_descriptor.getName() }, @@ -409,6 +424,68 @@ fn loadInvalidValue( }); } +const InvalidBuiltinData = extern struct { + loc: SourceLocation, + kind: enum(u8) { + ctz, + clz, + }, +}; + +fn invalidBuiltin(data: *const InvalidBuiltinData) callconv(.c) noreturn { + logMessage( + "passing zero to {s}(), which is not a valid argument", + .{@tagName(data.kind)}, + ); +} + +const VlaBoundNotPositive = extern struct { + loc: SourceLocation, + type_descriptor: *const TypeDescriptor, +}; + +fn vlaBoundNotPositive( + data: *const VlaBoundNotPositive, + bound_handle: ValueHandle, +) callconv(.c) noreturn { + logMessage("variable length array bound evaluates to non-positive value {}", .{ + bound_handle.getValue(data), + }); +} + +const FloatCastOverflowData = extern struct { + from: *const TypeDescriptor, + to: *const TypeDescriptor, +}; + +const FloatCastOverflowDataV2 = extern struct { + loc: SourceLocation, + from: *const TypeDescriptor, + to: *const TypeDescriptor, +}; + +fn floatCastOverflow( + data_handle: *align(8) const anyopaque, + from_handle: ValueHandle, +) callconv(.c) noreturn { + // See: https://github.com/llvm/llvm-project/blob/release/19.x/compiler-rt/lib/ubsan/ubsan_handlers.cpp#L463 + // for more information on this check. + const ptr: [*]const u8 = @ptrCast(data_handle); + if (ptr[0] + ptr[1] < 2 or ptr[0] == 0xFF or ptr[1] == 0xFF) { + const data: *const FloatCastOverflowData = @ptrCast(data_handle); + const from_value: Value = .{ .handle = from_handle, .type_descriptor = data.from }; + logMessage("{} is outside the range of representable values of type {s}", .{ + from_value, data.to.getName(), + }); + } else { + const data: *const FloatCastOverflowDataV2 = @ptrCast(data_handle); + const from_value: Value = .{ .handle = from_handle, .type_descriptor = data.from }; + logMessage("{} is outside the range of representable values of type {s}", .{ + from_value, data.to.getName(), + }); + } +} + fn SimpleHandler(comptime error_name: []const u8) type { return struct { fn handler() callconv(.c) noreturn { @@ -479,10 +556,10 @@ comptime { exportHandler(&nonNullReturn, "nonnull_return_v1", true); exportHandler(&nonNullArg, "nonnull_arg", true); exportHandler(&loadInvalidValue, "load_invalid_value", true); + exportHandler(&invalidBuiltin, "invalid_builtin", true); + exportHandler(&vlaBoundNotPositive, "vla_bound_not_positive", true); + exportHandler(&floatCastOverflow, "float_cast_overflow", true); - exportHelper("vla-bound-not-positive", "vla_bound_not_positive", true); - exportHelper("float-cast-overflow", "float_cast_overflow", true); - exportHelper("invalid-builtin", "invalid_builtin", true); exportHelper("function-type-mismatch", "function_type_mismatch", true); exportHelper("implicit-conversion", "implicit_conversion", true); exportHelper("nullability-arg", "nullability_arg", true); From 658fba982ce7280721e770df1d7ce4ab3f4eaf40 Mon Sep 17 00:00:00 2001 From: David Rubin Date: Thu, 26 Dec 2024 03:05:49 -0800 Subject: [PATCH 08/22] ubsan: extend `ptr` before adding to avoid overflow --- lib/ubsan.zig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/ubsan.zig b/lib/ubsan.zig index 0142adddc3c3..e7132aad98fc 100644 --- a/lib/ubsan.zig +++ b/lib/ubsan.zig @@ -1,5 +1,3 @@ -//! Minimal UBSan Runtime - const std = @import("std"); const builtin = @import("builtin"); const assert = std.debug.assert; @@ -471,7 +469,7 @@ fn floatCastOverflow( // See: https://github.com/llvm/llvm-project/blob/release/19.x/compiler-rt/lib/ubsan/ubsan_handlers.cpp#L463 // for more information on this check. const ptr: [*]const u8 = @ptrCast(data_handle); - if (ptr[0] + ptr[1] < 2 or ptr[0] == 0xFF or ptr[1] == 0xFF) { + if (@as(u16, ptr[0]) + @as(u16, ptr[1]) < 2 or ptr[0] == 0xFF or ptr[1] == 0xFF) { const data: *const FloatCastOverflowData = @ptrCast(data_handle); const from_value: Value = .{ .handle = from_handle, .type_descriptor = data.from }; logMessage("{} is outside the range of representable values of type {s}", .{ From 50b95562fde68147c66eb8e8624628094ae7e029 Mon Sep 17 00:00:00 2001 From: David Rubin Date: Thu, 26 Dec 2024 03:40:02 -0800 Subject: [PATCH 09/22] ubsan: clean-up and remove the unused handlers --- lib/ubsan.zig | 76 ++++++++++++++++++++++----------------------------- 1 file changed, 33 insertions(+), 43 deletions(-) diff --git a/lib/ubsan.zig b/lib/ubsan.zig index e7132aad98fc..215a1cd0ac27 100644 --- a/lib/ubsan.zig +++ b/lib/ubsan.zig @@ -484,14 +484,6 @@ fn floatCastOverflow( } } -fn SimpleHandler(comptime error_name: []const u8) type { - return struct { - fn handler() callconv(.c) noreturn { - logMessage("{s}", .{error_name}); - } - }; -} - inline fn logMessage(comptime fmt: []const u8, args: anytype) noreturn { std.debug.panicExtra(null, @returnAddress(), fmt, args); } @@ -513,72 +505,70 @@ fn exportHandler( } fn exportMinimal( - err_name: anytype, + comptime err_name: []const u8, comptime sym_name: []const u8, comptime abort: bool, ) void { - const handler = &SimpleHandler(err_name).handler; + const S = struct { + fn handler() callconv(.c) noreturn { + logMessage("{s}", .{err_name}); + } + }; const linkage = if (builtin.is_test) .internal else .weak; { const N = "__ubsan_handle_" ++ sym_name ++ "_minimal"; - @export(handler, .{ .name = N, .linkage = linkage }); + @export(&S.handler, .{ .name = N, .linkage = linkage }); } if (abort) { const N = "__ubsan_handle_" ++ sym_name ++ "_minimal_abort"; - @export(handler, .{ .name = N, .linkage = linkage }); + @export(&S.handler, .{ .name = N, .linkage = linkage }); } } -fn exportHelper( - comptime err_name: []const u8, - comptime sym_name: []const u8, - comptime abort: bool, -) void { - exportHandler(&SimpleHandler(err_name).handler, sym_name, abort); - exportMinimal(err_name, sym_name, abort); -} - comptime { overflowHandler("add_overflow", "+"); - overflowHandler("sub_overflow", "-"); overflowHandler("mul_overflow", "*"); - exportHandler(&negationHandler, "negate_overflow", true); - exportHandler(&divRemHandler, "divrem_overflow", true); + overflowHandler("sub_overflow", "-"); exportHandler(&alignmentAssumptionHandler, "alignment_assumption", true); - exportHandler(&shiftOob, "shift_out_of_bounds", true); - exportHandler(&outOfBounds, "out_of_bounds", true); - exportHandler(&pointerOverflow, "pointer_overflow", true); - exportHandler(&typeMismatch, "type_mismatch_v1", true); exportHandler(&builtinUnreachable, "builtin_unreachable", false); + exportHandler(&divRemHandler, "divrem_overflow", true); + exportHandler(&floatCastOverflow, "float_cast_overflow", true); + exportHandler(&invalidBuiltin, "invalid_builtin", true); + exportHandler(&loadInvalidValue, "load_invalid_value", true); exportHandler(&missingReturn, "missing_return", false); - exportHandler(&nonNullReturn, "nonnull_return_v1", true); + exportHandler(&negationHandler, "negate_overflow", true); exportHandler(&nonNullArg, "nonnull_arg", true); - exportHandler(&loadInvalidValue, "load_invalid_value", true); - exportHandler(&invalidBuiltin, "invalid_builtin", true); + exportHandler(&nonNullReturn, "nonnull_return_v1", true); + exportHandler(&outOfBounds, "out_of_bounds", true); + exportHandler(&pointerOverflow, "pointer_overflow", true); + exportHandler(&shiftOob, "shift_out_of_bounds", true); + exportHandler(&typeMismatch, "type_mismatch_v1", true); exportHandler(&vlaBoundNotPositive, "vla_bound_not_positive", true); - exportHandler(&floatCastOverflow, "float_cast_overflow", true); - - exportHelper("function-type-mismatch", "function_type_mismatch", true); - exportHelper("implicit-conversion", "implicit_conversion", true); - exportHelper("nullability-arg", "nullability_arg", true); - exportHelper("nullability-return", "nullability_return", true); - exportHelper("cfi-check-fail", "cfi_check_fail", true); - exportHelper("function-type-mismatch-v1", "function_type_mismatch_v1", true); - exportMinimal("builtin-unreachable", "builtin_unreachable", false); exportMinimal("add-overflow", "add_overflow", true); exportMinimal("sub-overflow", "sub_overflow", true); exportMinimal("mul-overflow", "mul_overflow", true); - exportMinimal("negation-handler", "negate_overflow", true); - exportMinimal("divrem-handler", "divrem_overflow", true); exportMinimal("alignment-assumption-handler", "alignment_assumption", true); - exportMinimal("shift-oob", "shift_out_of_bounds", true); + exportMinimal("builtin-unreachable", "builtin_unreachable", false); + exportMinimal("divrem-handler", "divrem_overflow", true); + exportMinimal("float-cast-overflow", "float_cast_overflow", true); + exportMinimal("invalid-builtin", "invalid_builtin", true); + exportMinimal("load-invalid-value", "load_invalid_value", true); + exportMinimal("missing-return", "missing_return", true); + exportMinimal("negation-handler", "negate_overflow", true); + exportMinimal("nonnull-arg", "nonnull_arg", true); exportMinimal("out-of-bounds", "out_of_bounds", true); exportMinimal("pointer-overflow", "pointer_overflow", true); + exportMinimal("shift-oob", "shift_out_of_bounds", true); exportMinimal("type-mismatch", "type_mismatch", true); + exportMinimal("vla-bound-not-positive", "vla_bound_not_positive", true); // these checks are nearly impossible to duplicate in zig, as they rely on nuances // in the Itanium C++ ABI. // exportHelper("dynamic_type_cache_miss", "dynamic-type-cache-miss", true); // exportHelper("vptr_type_cache", "vptr-type-cache", true); + + // we disable -fsanitize=function for reasons explained in src/Compilation.zig + // exportHelper("function-type-mismatch", "function_type_mismatch", true); + // exportHelper("function-type-mismatch-v1", "function_type_mismatch_v1", true); } From a468929519157cf7a2070a0368357cb73ad80b61 Mon Sep 17 00:00:00 2001 From: David Rubin Date: Thu, 26 Dec 2024 03:59:27 -0800 Subject: [PATCH 10/22] ubsan: resolve the last of the TODOs --- lib/ubsan.zig | 56 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/lib/ubsan.zig b/lib/ubsan.zig index 215a1cd0ac27..5b94649cd7f0 100644 --- a/lib/ubsan.zig +++ b/lib/ubsan.zig @@ -213,25 +213,34 @@ fn alignmentAssumptionHandler( alignment: ValueHandle, maybe_offset: ?ValueHandle, ) callconv(.c) noreturn { - _ = pointer; - // TODO: add the hint here? - // const real_pointer = @intFromPtr(pointer) - @intFromPtr(maybe_offset); - // const lsb = @ctz(real_pointer); - // const actual_alignment = @as(u64, 1) << @intCast(lsb); - // const mask = @intFromPtr(alignment) - 1; - // const misalignment_offset = real_pointer & mask; - // _ = actual_alignment; - // _ = misalignment_offset; + const real_pointer = @intFromPtr(pointer) - @intFromPtr(maybe_offset); + const lsb = @ctz(real_pointer); + const actual_alignment = @as(u64, 1) << @intCast(lsb); + const mask = @intFromPtr(alignment) - 1; + const misalignment_offset = real_pointer & mask; if (maybe_offset) |offset| { logMessage( - "assumption of {} byte alignment (with offset of {} byte) for pointer of type {s} failed", - .{ alignment.getValue(data), @intFromPtr(offset), data.type_descriptor.getName() }, + "assumption of {} byte alignment (with offset of {} byte) for pointer of type {s} failed\n" ++ + "offset address is {} aligned, misalignment offset is {} bytes", + .{ + alignment.getValue(data), + @intFromPtr(offset), + data.type_descriptor.getName(), + actual_alignment, + misalignment_offset, + }, ); } else { logMessage( - "assumption of {} byte alignment for pointer of type {s} failed", - .{ alignment.getValue(data), data.type_descriptor.getName() }, + "assumption of {} byte alignment for pointer of type {s} failed\n" ++ + "address is {} aligned, misalignment offset is {} bytes", + .{ + alignment.getValue(data), + data.type_descriptor.getName(), + actual_alignment, + misalignment_offset, + }, ); } } @@ -309,7 +318,26 @@ fn pointerOverflow( .{base}, ); } else { - @panic("TODO"); + const signed_base: isize = @bitCast(base); + const signed_result: isize = @bitCast(result); + if ((signed_base >= 0) == (signed_result >= 0)) { + if (base > result) { + logMessage( + "addition of unsigned offset to 0x{x} overflowed to 0x{x}", + .{ base, result }, + ); + } else { + logMessage( + "subtraction of unsigned offset to 0x{x} overflowed to 0x{x}", + .{ base, result }, + ); + } + } else { + logMessage( + "pointer index expression with base 0x{x} overflowed to 0x{x}", + .{ base, result }, + ); + } } } } From 2d4574aafbf95f357ad6306b98d9c8ebebbafe2e Mon Sep 17 00:00:00 2001 From: David Rubin Date: Thu, 26 Dec 2024 05:30:56 -0800 Subject: [PATCH 11/22] Compilation: always import ubsan if a ZCU exists Unlike `compiler-rt`, `ubsan` uses the standard library quite a lot. Using a similar approach to how `compiler-rt` is handled today, where it's compiled into its own object and then linked would be sub-optimal as we'd be introducing a lot of code bloat. This approach always "imports" `ubsan` if the ZCU, if it exists. If it doesn't such as the case where we're compiling only C code, then we have no choice other than to compile it down to an object and link. There's still a tiny optimization we can do in that case, which is when compiling to a static library, there's no need to construct an archive with a single object. We'd only go back and parse out ubsan from the archive later in the pipeline. So we compile it to an object instead and link that to the static library. TLDR; - `zig build-exe foo.c` -> build `libubsan.a` and links - `zig build-obj foo.c` -> doesn't build anything, just emits references to ubsan runtime - `zig build-lib foo.c -static` -> build `ubsan.o` and link it - `zig build-exe foo.zig bar.c` -> import `ubsan-rt` into the ZCU - `zig build-obj foo.zig bar.c` -> import `ubsan-rt` into the ZCU - `zig build-lib foo.zig bar.c` -> import `ubsan-rt` into the ZCU --- src/Compilation.zig | 7 +++++-- src/Zcu.zig | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index e4bccdc2d8b6..1fc43b85a9b4 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -1309,7 +1309,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil (!options.skip_linker_dependencies and is_exe_or_dyn_lib); const include_ubsan_rt = options.want_ubsan_rt orelse - (!options.skip_linker_dependencies and is_exe_or_dyn_lib); + (!options.skip_linker_dependencies and is_exe_or_dyn_lib and !have_zcu); if (include_compiler_rt and output_mode == .Obj) { // For objects, this mechanism relies on essentially `_ = @import("compiler-rt");` @@ -1337,7 +1337,10 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil try options.root_mod.deps.putNoClobber(arena, "compiler_rt", compiler_rt_mod); } - if (include_ubsan_rt and output_mode == .Obj) { + // unlike compiler_rt, we always want to go through the `_ = @import("ubsan-rt")` + // approach, since the ubsan runtime uses quite a lot of the standard library + // and this reduces unnecessary bloat. + if (!options.skip_linker_dependencies and have_zcu) { const ubsan_rt_mod = try Package.Module.create(arena, .{ .global_cache_directory = options.global_cache_directory, .paths = .{ diff --git a/src/Zcu.zig b/src/Zcu.zig index fd0b0dba642a..f262177da19e 100644 --- a/src/Zcu.zig +++ b/src/Zcu.zig @@ -175,7 +175,7 @@ nav_val_analysis_queued: std.AutoArrayHashMapUnmanaged(InternPool.Nav.Index, voi /// These are the modules which we initially queue for analysis in `Compilation.update`. /// `resolveReferences` will use these as the root of its reachability traversal. -analysis_roots: std.BoundedArray(*Package.Module, 3) = .{}, +analysis_roots: std.BoundedArray(*Package.Module, 4) = .{}, /// This is the cached result of `Zcu.resolveReferences`. It is computed on-demand, and /// reset to `null` when any semantic analysis occurs (since this invalidates the data). /// Allocated into `gpa`. From 14178475e35674122c65d11c8f0957c89e10506c Mon Sep 17 00:00:00 2001 From: David Rubin Date: Thu, 26 Dec 2024 06:36:09 -0800 Subject: [PATCH 12/22] main: add `-f{no-}ubsan-rt` to the usage text --- src/main.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main.zig b/src/main.zig index 82c20681c6bf..5e66244484b9 100644 --- a/src/main.zig +++ b/src/main.zig @@ -561,6 +561,8 @@ const usage_build_generic = \\ -fno-lld Prevent using LLD as the linker \\ -fcompiler-rt Always include compiler-rt symbols in output \\ -fno-compiler-rt Prevent including compiler-rt symbols in output + \\ -fubsan-rt Always include ubsan-rt symbols in the output + \\ -fno-ubsan-rt Prevent including ubsan-rt symbols in the output \\ -rdynamic Add all symbols to the dynamic symbol table \\ -feach-lib-rpath Ensure adding rpath for each used dynamic library \\ -fno-each-lib-rpath Prevent adding rpath for each used dynamic library From d669b9520b85aa5e892fe073b2c485f1bf1afef5 Mon Sep 17 00:00:00 2001 From: David Rubin Date: Thu, 26 Dec 2024 17:46:28 -0800 Subject: [PATCH 13/22] ubsan: clean-up a bit more --- lib/ubsan.zig | 105 +++++++++++++++++++++++++------------------------- 1 file changed, 53 insertions(+), 52 deletions(-) diff --git a/lib/ubsan.zig b/lib/ubsan.zig index 5b94649cd7f0..05950299f948 100644 --- a/lib/ubsan.zig +++ b/lib/ubsan.zig @@ -42,19 +42,15 @@ const TypeDescriptor = extern struct { } }; -const ValueHandle = *const opaque { - fn getValue(handle: ValueHandle, data: anytype) Value { - return .{ .handle = handle, .type_descriptor = data.type_descriptor }; - } -}; +const ValueHandle = *const opaque {}; const Value = extern struct { - type_descriptor: *const TypeDescriptor, + td: *const TypeDescriptor, handle: ValueHandle, fn getUnsignedInteger(value: Value) u128 { - assert(!value.type_descriptor.isSigned()); - const size = value.type_descriptor.getIntegerSize(); + assert(!value.td.isSigned()); + const size = value.td.getIntegerSize(); const max_inline_size = @bitSizeOf(ValueHandle); if (size <= max_inline_size) { return @intFromPtr(value.handle); @@ -68,8 +64,8 @@ const Value = extern struct { } fn getSignedInteger(value: Value) i128 { - assert(value.type_descriptor.isSigned()); - const size = value.type_descriptor.getIntegerSize(); + assert(value.td.isSigned()); + const size = value.td.getIntegerSize(); const max_inline_size = @bitSizeOf(ValueHandle); if (size <= max_inline_size) { const extra_bits: std.math.Log2Int(usize) = @intCast(max_inline_size - size); @@ -84,8 +80,8 @@ const Value = extern struct { } fn getFloat(value: Value) c_longdouble { - assert(value.type_descriptor.kind == .float); - const size = value.type_descriptor.info.float; + assert(value.td.kind == .float); + const size = value.td.info.float; const max_inline_size = @bitSizeOf(ValueHandle); if (size <= max_inline_size) { return @bitCast(@intFromPtr(value.handle)); @@ -99,17 +95,17 @@ const Value = extern struct { } fn isMinusOne(value: Value) bool { - return value.type_descriptor.isSigned() and + return value.td.isSigned() and value.getSignedInteger() == -1; } fn isNegative(value: Value) bool { - return value.type_descriptor.isSigned() and + return value.td.isSigned() and value.getSignedInteger() < 0; } fn getPositiveInteger(value: Value) u128 { - if (value.type_descriptor.isSigned()) { + if (value.td.isSigned()) { const signed = value.getSignedInteger(); assert(signed >= 0); return @intCast(signed); @@ -126,9 +122,9 @@ const Value = extern struct { ) !void { comptime assert(fmt.len == 0); - switch (value.type_descriptor.kind) { + switch (value.td.kind) { .integer => { - if (value.type_descriptor.isSigned()) { + if (value.td.isSigned()) { try writer.print("{}", .{value.getSignedInteger()}); } else { try writer.print("{}", .{value.getUnsignedInteger()}); @@ -142,7 +138,7 @@ const Value = extern struct { const OverflowData = extern struct { loc: SourceLocation, - type_descriptor: *const TypeDescriptor, + td: *const TypeDescriptor, }; fn overflowHandler( @@ -155,10 +151,10 @@ fn overflowHandler( lhs_handle: ValueHandle, rhs_handle: ValueHandle, ) callconv(.c) noreturn { - const lhs = lhs_handle.getValue(data); - const rhs = rhs_handle.getValue(data); + const lhs: Value = .{ .handle = lhs_handle, .td = data.td }; + const rhs: Value = .{ .handle = rhs_handle, .td = data.td }; - const is_signed = data.type_descriptor.isSigned(); + const is_signed = data.td.isSigned(); const fmt = "{s} integer overflow: " ++ "{} " ++ operator ++ " {} cannot be represented in type {s}"; @@ -166,7 +162,7 @@ fn overflowHandler( if (is_signed) "signed" else "unsigned", lhs, rhs, - data.type_descriptor.getName(), + data.td.getName(), }); } }; @@ -176,12 +172,12 @@ fn overflowHandler( fn negationHandler( data: *const OverflowData, - old_value_handle: ValueHandle, + value_handle: ValueHandle, ) callconv(.c) noreturn { - const old_value = old_value_handle.getValue(data); + const value: Value = .{ .handle = value_handle, .td = data.td }; logMessage( "negation of {} cannot be represented in type {s}", - .{ old_value, data.type_descriptor.getName() }, + .{ value, data.td.getName() }, ); } @@ -190,13 +186,13 @@ fn divRemHandler( lhs_handle: ValueHandle, rhs_handle: ValueHandle, ) callconv(.c) noreturn { - const lhs = lhs_handle.getValue(data); - const rhs = rhs_handle.getValue(data); + const lhs: Value = .{ .handle = lhs_handle, .td = data.lhs_type }; + const rhs: Value = .{ .handle = rhs_handle, .td = data.rhs_type }; if (rhs.isMinusOne()) { logMessage( "division of {} by -1 cannot be represented in type {s}", - .{ lhs, data.type_descriptor.getName() }, + .{ lhs, data.td.getName() }, ); } else logMessage("division by zero", .{}); } @@ -204,29 +200,30 @@ fn divRemHandler( const AlignmentAssumptionData = extern struct { loc: SourceLocation, assumption_loc: SourceLocation, - type_descriptor: *const TypeDescriptor, + td: *const TypeDescriptor, }; fn alignmentAssumptionHandler( data: *const AlignmentAssumptionData, pointer: ValueHandle, - alignment: ValueHandle, + alignment_handle: ValueHandle, maybe_offset: ?ValueHandle, ) callconv(.c) noreturn { const real_pointer = @intFromPtr(pointer) - @intFromPtr(maybe_offset); const lsb = @ctz(real_pointer); const actual_alignment = @as(u64, 1) << @intCast(lsb); - const mask = @intFromPtr(alignment) - 1; + const mask = @intFromPtr(alignment_handle) - 1; const misalignment_offset = real_pointer & mask; + const alignment: Value = .{ .handle = alignment_handle, .td = data.td }; if (maybe_offset) |offset| { logMessage( "assumption of {} byte alignment (with offset of {} byte) for pointer of type {s} failed\n" ++ "offset address is {} aligned, misalignment offset is {} bytes", .{ - alignment.getValue(data), + alignment, @intFromPtr(offset), - data.type_descriptor.getName(), + data.td.getName(), actual_alignment, misalignment_offset, }, @@ -236,8 +233,8 @@ fn alignmentAssumptionHandler( "assumption of {} byte alignment for pointer of type {s} failed\n" ++ "address is {} aligned, misalignment offset is {} bytes", .{ - alignment.getValue(data), - data.type_descriptor.getName(), + alignment, + data.td.getName(), actual_alignment, misalignment_offset, }, @@ -256,8 +253,8 @@ fn shiftOob( lhs_handle: ValueHandle, rhs_handle: ValueHandle, ) callconv(.c) noreturn { - const lhs: Value = .{ .handle = lhs_handle, .type_descriptor = data.lhs_type }; - const rhs: Value = .{ .handle = rhs_handle, .type_descriptor = data.rhs_type }; + const lhs: Value = .{ .handle = lhs_handle, .td = data.lhs_type }; + const rhs: Value = .{ .handle = rhs_handle, .td = data.rhs_type }; if (rhs.isNegative() or rhs.getPositiveInteger() >= data.lhs_type.getIntegerSize()) @@ -289,7 +286,7 @@ const OutOfBoundsData = extern struct { }; fn outOfBounds(data: *const OutOfBoundsData, index_handle: ValueHandle) callconv(.c) noreturn { - const index: Value = .{ .handle = index_handle, .type_descriptor = data.index_type }; + const index: Value = .{ .handle = index_handle, .td = data.index_type }; logMessage( "index {} out of bounds for type {s}", .{ index, data.array_type.getName() }, @@ -344,7 +341,7 @@ fn pointerOverflow( const TypeMismatchData = extern struct { loc: SourceLocation, - type_descriptor: *const TypeDescriptor, + td: *const TypeDescriptor, log_alignment: u8, kind: enum(u8) { load, @@ -388,17 +385,17 @@ fn typeMismatch( if (pointer == null) { logMessage( "{s} null pointer of type {s}", - .{ data.kind.getName(), data.type_descriptor.getName() }, + .{ data.kind.getName(), data.td.getName() }, ); } else if (!std.mem.isAligned(handle, alignment)) { logMessage( "{s} misaligned address 0x{x} for type {s}, which requires {} byte alignment", - .{ data.kind.getName(), handle, data.type_descriptor.getName(), alignment }, + .{ data.kind.getName(), handle, data.td.getName(), alignment }, ); } else { logMessage( "{s} address 0x{x} with insufficient space for an object of type {s}", - .{ data.kind.getName(), handle, data.type_descriptor.getName() }, + .{ data.kind.getName(), handle, data.td.getName() }, ); } } @@ -438,16 +435,18 @@ fn nonNullArg(data: *const NonNullArgData) callconv(.c) noreturn { const InvalidValueData = extern struct { loc: SourceLocation, - type_descriptor: *const TypeDescriptor, + td: *const TypeDescriptor, }; fn loadInvalidValue( data: *const InvalidValueData, value_handle: ValueHandle, ) callconv(.c) noreturn { - logMessage("load of value {}, which is not valid for type {s}", .{ - value_handle.getValue(data), data.type_descriptor.getName(), - }); + const value: Value = .{ .handle = value_handle, .td = data.td }; + logMessage( + "load of value {}, which is not valid for type {s}", + .{ value, data.td.getName() }, + ); } const InvalidBuiltinData = extern struct { @@ -467,16 +466,18 @@ fn invalidBuiltin(data: *const InvalidBuiltinData) callconv(.c) noreturn { const VlaBoundNotPositive = extern struct { loc: SourceLocation, - type_descriptor: *const TypeDescriptor, + td: *const TypeDescriptor, }; fn vlaBoundNotPositive( data: *const VlaBoundNotPositive, bound_handle: ValueHandle, ) callconv(.c) noreturn { - logMessage("variable length array bound evaluates to non-positive value {}", .{ - bound_handle.getValue(data), - }); + const bound: Value = .{ .handle = bound_handle, .td = data.td }; + logMessage( + "variable length array bound evaluates to non-positive value {}", + .{bound}, + ); } const FloatCastOverflowData = extern struct { @@ -499,13 +500,13 @@ fn floatCastOverflow( const ptr: [*]const u8 = @ptrCast(data_handle); if (@as(u16, ptr[0]) + @as(u16, ptr[1]) < 2 or ptr[0] == 0xFF or ptr[1] == 0xFF) { const data: *const FloatCastOverflowData = @ptrCast(data_handle); - const from_value: Value = .{ .handle = from_handle, .type_descriptor = data.from }; + const from_value: Value = .{ .handle = from_handle, .td = data.from }; logMessage("{} is outside the range of representable values of type {s}", .{ from_value, data.to.getName(), }); } else { const data: *const FloatCastOverflowDataV2 = @ptrCast(data_handle); - const from_value: Value = .{ .handle = from_handle, .type_descriptor = data.from }; + const from_value: Value = .{ .handle = from_handle, .td = data.from }; logMessage("{} is outside the range of representable values of type {s}", .{ from_value, data.to.getName(), }); From 9432a9b6e1d2bd340afac6b29e49f1094cf1ba93 Mon Sep 17 00:00:00 2001 From: David Rubin Date: Fri, 27 Dec 2024 01:13:15 -0800 Subject: [PATCH 14/22] build: add `bundle_ubsan_rt` --- lib/std/Build/Step/Compile.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/std/Build/Step/Compile.zig b/lib/std/Build/Step/Compile.zig index 65030fde32f4..616e8f76b450 100644 --- a/lib/std/Build/Step/Compile.zig +++ b/lib/std/Build/Step/Compile.zig @@ -40,6 +40,7 @@ compress_debug_sections: enum { none, zlib, zstd } = .none, verbose_link: bool, verbose_cc: bool, bundle_compiler_rt: ?bool = null, +bundle_ubsan_rt: ?bool = null, rdynamic: bool, import_memory: bool = false, export_memory: bool = false, @@ -1563,6 +1564,7 @@ fn getZigArgs(compile: *Compile, fuzz: bool) ![][]const u8 { } try addFlag(&zig_args, "compiler-rt", compile.bundle_compiler_rt); + try addFlag(&zig_args, "ubsan-rt", compile.bundle_ubsan_rt); try addFlag(&zig_args, "dll-export-fns", compile.dll_export_fns); if (compile.rdynamic) { try zig_args.append("-rdynamic"); From 931178494f4c77631e5eb9c567f64492d6592eeb Mon Sep 17 00:00:00 2001 From: David Rubin Date: Wed, 1 Jan 2025 03:47:17 -0800 Subject: [PATCH 15/22] Compilation: correct when to include ubsan --- lib/{ubsan.zig => ubsan_rt.zig} | 48 +++---------------- src/Compilation.zig | 81 +++++++++++++++++++++++---------- src/Compilation/Config.zig | 3 ++ src/link.zig | 4 +- src/link/MachO/relocatable.zig | 4 +- 5 files changed, 71 insertions(+), 69 deletions(-) rename lib/{ubsan.zig => ubsan_rt.zig} (90%) diff --git a/lib/ubsan.zig b/lib/ubsan_rt.zig similarity index 90% rename from lib/ubsan.zig rename to lib/ubsan_rt.zig index 05950299f948..e7030c99ac40 100644 --- a/lib/ubsan.zig +++ b/lib/ubsan_rt.zig @@ -59,7 +59,7 @@ const Value = extern struct { return switch (size) { 64 => @as(*const u64, @alignCast(@ptrCast(value.handle))).*, 128 => @as(*const u128, @alignCast(@ptrCast(value.handle))).*, - else => unreachable, + else => @trap(), }; } @@ -186,8 +186,8 @@ fn divRemHandler( lhs_handle: ValueHandle, rhs_handle: ValueHandle, ) callconv(.c) noreturn { - const lhs: Value = .{ .handle = lhs_handle, .td = data.lhs_type }; - const rhs: Value = .{ .handle = rhs_handle, .td = data.rhs_type }; + const lhs: Value = .{ .handle = lhs_handle, .td = data.td }; + const rhs: Value = .{ .handle = rhs_handle, .td = data.td }; if (rhs.isMinusOne()) { logMessage( @@ -533,26 +533,10 @@ fn exportHandler( } } -fn exportMinimal( - comptime err_name: []const u8, - comptime sym_name: []const u8, - comptime abort: bool, -) void { - const S = struct { - fn handler() callconv(.c) noreturn { - logMessage("{s}", .{err_name}); - } - }; - const linkage = if (builtin.is_test) .internal else .weak; - { - const N = "__ubsan_handle_" ++ sym_name ++ "_minimal"; - @export(&S.handler, .{ .name = N, .linkage = linkage }); - } - if (abort) { - const N = "__ubsan_handle_" ++ sym_name ++ "_minimal_abort"; - @export(&S.handler, .{ .name = N, .linkage = linkage }); - } -} +const can_build_ubsan = switch (builtin.zig_backend) { + .stage2_riscv64 => false, + else => true, +}; comptime { overflowHandler("add_overflow", "+"); @@ -574,24 +558,6 @@ comptime { exportHandler(&typeMismatch, "type_mismatch_v1", true); exportHandler(&vlaBoundNotPositive, "vla_bound_not_positive", true); - exportMinimal("add-overflow", "add_overflow", true); - exportMinimal("sub-overflow", "sub_overflow", true); - exportMinimal("mul-overflow", "mul_overflow", true); - exportMinimal("alignment-assumption-handler", "alignment_assumption", true); - exportMinimal("builtin-unreachable", "builtin_unreachable", false); - exportMinimal("divrem-handler", "divrem_overflow", true); - exportMinimal("float-cast-overflow", "float_cast_overflow", true); - exportMinimal("invalid-builtin", "invalid_builtin", true); - exportMinimal("load-invalid-value", "load_invalid_value", true); - exportMinimal("missing-return", "missing_return", true); - exportMinimal("negation-handler", "negate_overflow", true); - exportMinimal("nonnull-arg", "nonnull_arg", true); - exportMinimal("out-of-bounds", "out_of_bounds", true); - exportMinimal("pointer-overflow", "pointer_overflow", true); - exportMinimal("shift-oob", "shift_out_of_bounds", true); - exportMinimal("type-mismatch", "type_mismatch", true); - exportMinimal("vla-bound-not-positive", "vla_bound_not_positive", true); - // these checks are nearly impossible to duplicate in zig, as they rely on nuances // in the Itanium C++ ABI. // exportHelper("dynamic_type_cache_miss", "dynamic-type-cache-miss", true); diff --git a/src/Compilation.zig b/src/Compilation.zig index 1fc43b85a9b4..60586450ca92 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -78,8 +78,8 @@ implib_emit: ?Path, /// This is non-null when `-femit-docs` is provided. docs_emit: ?Path, root_name: [:0]const u8, -include_compiler_rt: bool, -include_ubsan_rt: bool, +compiler_rt_strat: RtStrat, +ubsan_rt_strat: RtStrat, /// Resolved into known paths, any GNU ld scripts already resolved. link_inputs: []const link.Input, /// Needed only for passing -F args to clang. @@ -1256,6 +1256,8 @@ fn addModuleTableToCacheHash( } } +const RtStrat = enum { none, lib, obj, zcu }; + pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compilation { const output_mode = options.config.output_mode; const is_dyn_lib = switch (output_mode) { @@ -1287,6 +1289,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil const any_unwind_tables = options.config.any_unwind_tables or options.root_mod.unwind_tables != .none; const any_non_single_threaded = options.config.any_non_single_threaded or !options.root_mod.single_threaded; const any_sanitize_thread = options.config.any_sanitize_thread or options.root_mod.sanitize_thread; + const any_sanitize_c = options.config.any_sanitize_c or options.root_mod.sanitize_c; const any_fuzz = options.config.any_fuzz or options.root_mod.fuzz; const link_eh_frame_hdr = options.link_eh_frame_hdr or any_unwind_tables; @@ -1305,13 +1308,25 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil const sysroot = options.sysroot orelse libc_dirs.sysroot; - const include_compiler_rt = options.want_compiler_rt orelse - (!options.skip_linker_dependencies and is_exe_or_dyn_lib); + const compiler_rt_strat: RtStrat = s: { + if (options.skip_linker_dependencies) break :s .none; + const want = options.want_compiler_rt orelse is_exe_or_dyn_lib; + if (!want) break :s .none; + if (have_zcu and output_mode == .Obj) break :s .zcu; + if (is_exe_or_dyn_lib) break :s .lib; + break :s .obj; + }; - const include_ubsan_rt = options.want_ubsan_rt orelse - (!options.skip_linker_dependencies and is_exe_or_dyn_lib and !have_zcu); + const ubsan_rt_strat: RtStrat = s: { + const want_ubsan_rt = options.want_ubsan_rt orelse (any_sanitize_c and output_mode != .Obj); + if (!want_ubsan_rt) break :s .none; + if (options.skip_linker_dependencies) break :s .none; + if (have_zcu) break :s .zcu; + if (is_exe_or_dyn_lib) break :s .lib; + break :s .obj; + }; - if (include_compiler_rt and output_mode == .Obj) { + if (compiler_rt_strat == .zcu) { // For objects, this mechanism relies on essentially `_ = @import("compiler-rt");` // injected into the object. const compiler_rt_mod = try Package.Module.create(arena, .{ @@ -1340,7 +1355,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil // unlike compiler_rt, we always want to go through the `_ = @import("ubsan-rt")` // approach, since the ubsan runtime uses quite a lot of the standard library // and this reduces unnecessary bloat. - if (!options.skip_linker_dependencies and have_zcu) { + if (ubsan_rt_strat == .zcu) { const ubsan_rt_mod = try Package.Module.create(arena, .{ .global_cache_directory = options.global_cache_directory, .paths = .{ @@ -1536,8 +1551,8 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil .windows_libs = windows_libs, .version = options.version, .libc_installation = libc_dirs.libc_installation, - .include_compiler_rt = include_compiler_rt, - .include_ubsan_rt = include_ubsan_rt, + .compiler_rt_strat = compiler_rt_strat, + .ubsan_rt_strat = ubsan_rt_strat, .link_inputs = options.link_inputs, .framework_dirs = options.framework_dirs, .llvm_opt_bisect_limit = options.llvm_opt_bisect_limit, @@ -1563,6 +1578,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil comp.config.any_unwind_tables = any_unwind_tables; comp.config.any_non_single_threaded = any_non_single_threaded; comp.config.any_sanitize_thread = any_sanitize_thread; + comp.config.any_sanitize_c = any_sanitize_c; comp.config.any_fuzz = any_fuzz; const lf_open_opts: link.File.OpenOptions = .{ @@ -1909,34 +1925,51 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil comp.remaining_prelink_tasks += 1; } +<<<<<<< HEAD if (comp.include_compiler_rt and capable_of_building_compiler_rt) { if (is_exe_or_dyn_lib) { +======= + if (target.isMinGW() and comp.config.any_non_single_threaded) { + // LLD might drop some symbols as unused during LTO and GCing, therefore, + // we force mark them for resolution here. + + const tls_index_sym = switch (target.cpu.arch) { + .x86 => "__tls_index", + else => "_tls_index", + }; + + try comp.force_undefined_symbols.put(comp.gpa, tls_index_sym, {}); + } + + if (capable_of_building_compiler_rt) { + if (comp.compiler_rt_strat == .lib) { +>>>>>>> 050e3e69ac (Compilation: correct when to include ubsan) log.debug("queuing a job to build compiler_rt_lib", .{}); comp.queued_jobs.compiler_rt_lib = true; comp.remaining_prelink_tasks += 1; - } else if (output_mode != .Obj) { + } else if (comp.compiler_rt_strat == .obj) { log.debug("queuing a job to build compiler_rt_obj", .{}); // In this case we are making a static library, so we ask // for a compiler-rt object to put in it. comp.queued_jobs.compiler_rt_obj = true; comp.remaining_prelink_tasks += 1; } - } - if (comp.include_ubsan_rt and capable_of_building_compiler_rt) { - if (is_exe_or_dyn_lib) { + if (comp.ubsan_rt_strat == .lib) { log.debug("queuing a job to build ubsan_rt_lib", .{}); comp.job_queued_ubsan_rt_lib = true; - } else if (output_mode != .Obj) { + comp.remaining_prelink_tasks += 1; + } else if (comp.ubsan_rt_strat == .obj) { log.debug("queuing a job to build ubsan_rt_obj", .{}); comp.job_queued_ubsan_rt_obj = true; + comp.remaining_prelink_tasks += 1; } - } - if (is_exe_or_dyn_lib and comp.config.any_fuzz and capable_of_building_compiler_rt) { - log.debug("queuing a job to build libfuzzer", .{}); - comp.queued_jobs.fuzzer_lib = true; - comp.remaining_prelink_tasks += 1; + if (is_exe_or_dyn_lib and comp.config.any_fuzz) { + log.debug("queuing a job to build libfuzzer", .{}); + comp.queued_jobs.fuzzer_lib = true; + comp.remaining_prelink_tasks += 1; + } } } @@ -2656,8 +2689,8 @@ fn addNonIncrementalStuffToCacheManifest( man.hash.addOptional(comp.version); man.hash.add(comp.link_eh_frame_hdr); man.hash.add(comp.skip_linker_dependencies); - man.hash.add(comp.include_compiler_rt); - man.hash.add(comp.include_ubsan_rt); + man.hash.add(comp.compiler_rt_strat); + man.hash.add(comp.ubsan_rt_strat); man.hash.add(comp.rc_includes); man.hash.addListOfBytes(comp.force_undefined_symbols.keys()); man.hash.addListOfBytes(comp.framework_dirs); @@ -3749,11 +3782,11 @@ fn performAllTheWorkInner( } if (comp.queued_jobs.ubsan_rt_lib and comp.ubsan_rt_lib == null) { - comp.link_task_wait_group.spawnManager(buildRt, .{ comp, "ubsan.zig", .libubsan, .Lib, &comp.ubsan_rt_lib, main_progress_node }); + comp.link_task_wait_group.spawnManager(buildRt, .{ comp, "ubsan_rt.zig", .libubsan, .Lib, &comp.ubsan_rt_lib, main_progress_node }); } if (comp.queued_jobs.ubsan_rt_obj and comp.ubsan_rt_obj == null) { - comp.link_task_wait_group.spawnManager(buildRt, .{ comp, "ubsan.zig", .libubsan, .Obj, &comp.ubsan_rt_obj, main_progress_node }); + comp.link_task_wait_group.spawnManager(buildRt, .{ comp, "ubsan_rt.zig", .libubsan, .Obj, &comp.ubsan_rt_obj, main_progress_node }); } if (comp.queued_jobs.glibc_shared_objects) { diff --git a/src/Compilation/Config.zig b/src/Compilation/Config.zig index ee175cce1161..bc648ae4ad26 100644 --- a/src/Compilation/Config.zig +++ b/src/Compilation/Config.zig @@ -32,6 +32,7 @@ any_non_single_threaded: bool, /// per-Module setting. any_error_tracing: bool, any_sanitize_thread: bool, +any_sanitize_c: bool, any_fuzz: bool, pie: bool, /// If this is true then linker code is responsible for making an LLVM IR @@ -87,6 +88,7 @@ pub const Options = struct { ensure_libcpp_on_non_freestanding: bool = false, any_non_single_threaded: bool = false, any_sanitize_thread: bool = false, + any_sanitize_c: bool = false, any_fuzz: bool = false, any_unwind_tables: bool = false, any_dyn_libs: bool = false, @@ -476,6 +478,7 @@ pub fn resolve(options: Options) ResolveError!Config { .any_non_single_threaded = options.any_non_single_threaded, .any_error_tracing = any_error_tracing, .any_sanitize_thread = options.any_sanitize_thread, + .any_sanitize_c = options.any_sanitize_c, .any_fuzz = options.any_fuzz, .san_cov_trace_pc_guard = options.san_cov_trace_pc_guard, .root_error_tracing = root_error_tracing, diff --git a/src/link.zig b/src/link.zig index 82b4e6339798..27711db41d6f 100644 --- a/src/link.zig +++ b/src/link.zig @@ -1102,12 +1102,12 @@ pub const File = struct { log.debug("zcu_obj_path={s}", .{if (zcu_obj_path) |s| s else "(null)"}); - const compiler_rt_path: ?Path = if (comp.include_compiler_rt) + const compiler_rt_path: ?Path = if (comp.compiler_rt_strat == .obj) comp.compiler_rt_obj.?.full_object_path else null; - const ubsan_rt_path: ?Path = if (comp.include_ubsan_rt) + const ubsan_rt_path: ?Path = if (comp.ubsan_rt_strat == .obj) comp.ubsan_rt_obj.?.full_object_path else null; diff --git a/src/link/MachO/relocatable.zig b/src/link/MachO/relocatable.zig index 6b9557dfd4f1..4edf6e043ddb 100644 --- a/src/link/MachO/relocatable.zig +++ b/src/link/MachO/relocatable.zig @@ -93,11 +93,11 @@ pub fn flushStaticLib(macho_file: *MachO, comp: *Compilation, module_obj_path: ? if (module_obj_path) |path| try positionals.append(try link.openObjectInput(diags, path)); - if (comp.include_compiler_rt) { + if (comp.compiler_rt_strat == .obj) { try positionals.append(try link.openObjectInput(diags, comp.compiler_rt_obj.?.full_object_path)); } - if (comp.include_ubsan_rt) { + if (comp.ubsan_rt_strat == .obj) { try positionals.append(try link.openObjectInput(diags, comp.ubsan_rt_obj.?.full_object_path)); } From 35b9db3b1549668c5a3a464f97aed1441b18d791 Mon Sep 17 00:00:00 2001 From: David Rubin Date: Mon, 13 Jan 2025 21:27:24 -0800 Subject: [PATCH 16/22] correct some bugs --- lib/ubsan_rt.zig | 178 ++++++++++++++++++++---- src/Compilation.zig | 56 +++----- test/link/elf.zig | 4 + test/link/glibc_compat/build.zig | 6 + test/link/wasm/export-data/build.zig | 1 + test/link/wasm/export/build.zig | 4 + test/link/wasm/function-table/build.zig | 2 + test/link/wasm/shared-memory/build.zig | 1 + test/link/wasm/type/build.zig | 1 + test/src/StackTrace.zig | 1 + tools/incr-check.zig | 1 + 11 files changed, 195 insertions(+), 60 deletions(-) diff --git a/lib/ubsan_rt.zig b/lib/ubsan_rt.zig index e7030c99ac40..f74f1a11860f 100644 --- a/lib/ubsan_rt.zig +++ b/lib/ubsan_rt.zig @@ -79,12 +79,16 @@ const Value = extern struct { }; } - fn getFloat(value: Value) c_longdouble { + fn getFloat(value: Value) f128 { assert(value.td.kind == .float); const size = value.td.info.float; const max_inline_size = @bitSizeOf(ValueHandle); if (size <= max_inline_size) { - return @bitCast(@intFromPtr(value.handle)); + return @as(switch (@bitSizeOf(usize)) { + 32 => f32, + 64 => f64, + else => @compileError("unsupported target"), + }, @bitCast(@intFromPtr(value.handle))); } return @floatCast(switch (size) { 64 => @as(*const f64, @alignCast(@ptrCast(value.handle))).*, @@ -122,6 +126,11 @@ const Value = extern struct { ) !void { comptime assert(fmt.len == 0); + if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) { + try writer.writeAll("(unknown)"); + return; + } + switch (value.td.kind) { .integer => { if (value.td.isSigned()) { @@ -146,6 +155,14 @@ fn overflowHandler( comptime operator: []const u8, ) void { const S = struct { + fn abort( + data: *const OverflowData, + lhs_handle: ValueHandle, + rhs_handle: ValueHandle, + ) callconv(.c) noreturn { + handler(data, lhs_handle, rhs_handle); + } + fn handler( data: *const OverflowData, lhs_handle: ValueHandle, @@ -167,7 +184,14 @@ fn overflowHandler( } }; - exportHandler(&S.handler, sym_name, true); + exportHandlerWithAbort(&S.handler, &S.abort, sym_name); +} + +fn negationHandlerAbort( + data: *const OverflowData, + value_handle: ValueHandle, +) callconv(.c) noreturn { + negationHandler(data, value_handle); } fn negationHandler( @@ -181,6 +205,14 @@ fn negationHandler( ); } +fn divRemHandlerAbort( + data: *const OverflowData, + lhs_handle: ValueHandle, + rhs_handle: ValueHandle, +) callconv(.c) noreturn { + divRemHandler(data, lhs_handle, rhs_handle); +} + fn divRemHandler( data: *const OverflowData, lhs_handle: ValueHandle, @@ -203,6 +235,20 @@ const AlignmentAssumptionData = extern struct { td: *const TypeDescriptor, }; +fn alignmentAssumptionHandlerAbort( + data: *const AlignmentAssumptionData, + pointer: ValueHandle, + alignment_handle: ValueHandle, + maybe_offset: ?ValueHandle, +) callconv(.c) noreturn { + alignmentAssumptionHandler( + data, + pointer, + alignment_handle, + maybe_offset, + ); +} + fn alignmentAssumptionHandler( data: *const AlignmentAssumptionData, pointer: ValueHandle, @@ -248,6 +294,14 @@ const ShiftOobData = extern struct { rhs_type: *const TypeDescriptor, }; +fn shiftOobAbort( + data: *const ShiftOobData, + lhs_handle: ValueHandle, + rhs_handle: ValueHandle, +) callconv(.c) noreturn { + shiftOob(data, lhs_handle, rhs_handle); +} + fn shiftOob( data: *const ShiftOobData, lhs_handle: ValueHandle, @@ -285,7 +339,17 @@ const OutOfBoundsData = extern struct { index_type: *const TypeDescriptor, }; -fn outOfBounds(data: *const OutOfBoundsData, index_handle: ValueHandle) callconv(.c) noreturn { +fn outOfBoundsAbort( + data: *const OutOfBoundsData, + index_handle: ValueHandle, +) callconv(.c) noreturn { + outOfBounds(data, index_handle); +} + +fn outOfBounds( + data: *const OutOfBoundsData, + index_handle: ValueHandle, +) callconv(.c) noreturn { const index: Value = .{ .handle = index_handle, .td = data.index_type }; logMessage( "index {} out of bounds for type {s}", @@ -297,6 +361,14 @@ const PointerOverflowData = extern struct { loc: SourceLocation, }; +fn pointerOverflowAbort( + data: *const PointerOverflowData, + base: usize, + result: usize, +) callconv(.c) noreturn { + pointerOverflow(data, base, result); +} + fn pointerOverflow( _: *const PointerOverflowData, base: usize, @@ -375,6 +447,13 @@ const TypeMismatchData = extern struct { }, }; +fn typeMismatchAbort( + data: *const TypeMismatchData, + pointer: ?ValueHandle, +) callconv(.c) noreturn { + typeMismatch(data, pointer); +} + fn typeMismatch( data: *const TypeMismatchData, pointer: ?ValueHandle, @@ -416,6 +495,9 @@ const NonNullReturnData = extern struct { attribute_loc: SourceLocation, }; +fn nonNullReturnAbort(data: *const NonNullReturnData) callconv(.c) noreturn { + nonNullReturn(data); +} fn nonNullReturn(_: *const NonNullReturnData) callconv(.c) noreturn { logMessage("null pointer returned from function declared to never return null", .{}); } @@ -426,6 +508,10 @@ const NonNullArgData = extern struct { arg_index: i32, }; +fn nonNullArgAbort(data: *const NonNullArgData) callconv(.c) noreturn { + nonNullArg(data); +} + fn nonNullArg(data: *const NonNullArgData) callconv(.c) noreturn { logMessage( "null pointer passed as argument {}, which is declared to never be null", @@ -438,6 +524,13 @@ const InvalidValueData = extern struct { td: *const TypeDescriptor, }; +fn loadInvalidValueAbort( + data: *const InvalidValueData, + value_handle: ValueHandle, +) callconv(.c) noreturn { + loadInvalidValue(data, value_handle); +} + fn loadInvalidValue( data: *const InvalidValueData, value_handle: ValueHandle, @@ -456,6 +549,9 @@ const InvalidBuiltinData = extern struct { clz, }, }; +fn invalidBuiltinAbort(data: *const InvalidBuiltinData) callconv(.c) noreturn { + invalidBuiltin(data); +} fn invalidBuiltin(data: *const InvalidBuiltinData) callconv(.c) noreturn { logMessage( @@ -469,6 +565,13 @@ const VlaBoundNotPositive = extern struct { td: *const TypeDescriptor, }; +fn vlaBoundNotPositiveAbort( + data: *const VlaBoundNotPositive, + bound_handle: ValueHandle, +) callconv(.c) noreturn { + vlaBoundNotPositive(data, bound_handle); +} + fn vlaBoundNotPositive( data: *const VlaBoundNotPositive, bound_handle: ValueHandle, @@ -491,6 +594,13 @@ const FloatCastOverflowDataV2 = extern struct { to: *const TypeDescriptor, }; +fn floatCastOverflowAbort( + data_handle: *align(8) const anyopaque, + from_handle: ValueHandle, +) callconv(.c) noreturn { + floatCastOverflow(data_handle, from_handle); +} + fn floatCastOverflow( data_handle: *align(8) const anyopaque, from_handle: ValueHandle, @@ -514,22 +624,31 @@ fn floatCastOverflow( } inline fn logMessage(comptime fmt: []const u8, args: anytype) noreturn { - std.debug.panicExtra(null, @returnAddress(), fmt, args); + std.debug.panicExtra(@returnAddress(), fmt, args); } fn exportHandler( handler: anytype, comptime sym_name: []const u8, - comptime abort: bool, ) void { - const linkage = if (builtin.is_test) .internal else .weak; + const linkage = if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) .internal else .weak; + const N = "__ubsan_handle_" ++ sym_name; + @export(handler, .{ .name = N, .linkage = linkage }); +} + +fn exportHandlerWithAbort( + handler: anytype, + abort_handler: anytype, + comptime sym_name: []const u8, +) void { + const linkage = if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) .internal else .weak; { const N = "__ubsan_handle_" ++ sym_name; @export(handler, .{ .name = N, .linkage = linkage }); } - if (abort) { + { const N = "__ubsan_handle_" ++ sym_name ++ "_abort"; - @export(handler, .{ .name = N, .linkage = linkage }); + @export(abort_handler, .{ .name = N, .linkage = linkage }); } } @@ -539,24 +658,29 @@ const can_build_ubsan = switch (builtin.zig_backend) { }; comptime { - overflowHandler("add_overflow", "+"); - overflowHandler("mul_overflow", "*"); - overflowHandler("sub_overflow", "-"); - exportHandler(&alignmentAssumptionHandler, "alignment_assumption", true); - exportHandler(&builtinUnreachable, "builtin_unreachable", false); - exportHandler(&divRemHandler, "divrem_overflow", true); - exportHandler(&floatCastOverflow, "float_cast_overflow", true); - exportHandler(&invalidBuiltin, "invalid_builtin", true); - exportHandler(&loadInvalidValue, "load_invalid_value", true); - exportHandler(&missingReturn, "missing_return", false); - exportHandler(&negationHandler, "negate_overflow", true); - exportHandler(&nonNullArg, "nonnull_arg", true); - exportHandler(&nonNullReturn, "nonnull_return_v1", true); - exportHandler(&outOfBounds, "out_of_bounds", true); - exportHandler(&pointerOverflow, "pointer_overflow", true); - exportHandler(&shiftOob, "shift_out_of_bounds", true); - exportHandler(&typeMismatch, "type_mismatch_v1", true); - exportHandler(&vlaBoundNotPositive, "vla_bound_not_positive", true); + if (can_build_ubsan) { + overflowHandler("add_overflow", "+"); + overflowHandler("mul_overflow", "*"); + overflowHandler("sub_overflow", "-"); + exportHandlerWithAbort(&alignmentAssumptionHandler, &alignmentAssumptionHandlerAbort, "alignment_assumption"); + + exportHandlerWithAbort(&divRemHandler, &divRemHandlerAbort, "divrem_overflow"); + exportHandlerWithAbort(&floatCastOverflow, &floatCastOverflowAbort, "float_cast_overflow"); + exportHandlerWithAbort(&invalidBuiltin, &invalidBuiltinAbort, "invalid_builtin"); + exportHandlerWithAbort(&loadInvalidValue, &loadInvalidValueAbort, "load_invalid_value"); + + exportHandlerWithAbort(&negationHandler, &negationHandlerAbort, "negate_overflow"); + exportHandlerWithAbort(&nonNullArg, &nonNullArgAbort, "nonnull_arg"); + exportHandlerWithAbort(&nonNullReturn, &nonNullReturnAbort, "nonnull_return_v1"); + exportHandlerWithAbort(&outOfBounds, &outOfBoundsAbort, "out_of_bounds"); + exportHandlerWithAbort(&pointerOverflow, &pointerOverflowAbort, "pointer_overflow"); + exportHandlerWithAbort(&shiftOob, &shiftOobAbort, "shift_out_of_bounds"); + exportHandlerWithAbort(&typeMismatch, &typeMismatchAbort, "type_mismatch_v1"); + exportHandlerWithAbort(&vlaBoundNotPositive, &vlaBoundNotPositiveAbort, "vla_bound_not_positive"); + + exportHandler(&builtinUnreachable, "builtin_unreachable"); + exportHandler(&missingReturn, "missing_return"); + } // these checks are nearly impossible to duplicate in zig, as they rely on nuances // in the Itanium C++ ABI. diff --git a/src/Compilation.zig b/src/Compilation.zig index 60586450ca92..1dc7916a5488 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -1317,15 +1317,6 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil break :s .obj; }; - const ubsan_rt_strat: RtStrat = s: { - const want_ubsan_rt = options.want_ubsan_rt orelse (any_sanitize_c and output_mode != .Obj); - if (!want_ubsan_rt) break :s .none; - if (options.skip_linker_dependencies) break :s .none; - if (have_zcu) break :s .zcu; - if (is_exe_or_dyn_lib) break :s .lib; - break :s .obj; - }; - if (compiler_rt_strat == .zcu) { // For objects, this mechanism relies on essentially `_ = @import("compiler-rt");` // injected into the object. @@ -1355,6 +1346,15 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil // unlike compiler_rt, we always want to go through the `_ = @import("ubsan-rt")` // approach, since the ubsan runtime uses quite a lot of the standard library // and this reduces unnecessary bloat. + const ubsan_rt_strat: RtStrat = s: { + const want_ubsan_rt = options.want_ubsan_rt orelse (any_sanitize_c and output_mode != .Obj); + if (!want_ubsan_rt) break :s .none; + if (options.skip_linker_dependencies) break :s .none; + if (have_zcu) break :s .zcu; + if (is_exe_or_dyn_lib) break :s .lib; + break :s .obj; + }; + if (ubsan_rt_strat == .zcu) { const ubsan_rt_mod = try Package.Module.create(arena, .{ .global_cache_directory = options.global_cache_directory, @@ -1362,7 +1362,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil .root = .{ .root_dir = options.zig_lib_directory, }, - .root_src_path = "ubsan.zig", + .root_src_path = "ubsan_rt.zig", }, .fully_qualified_name = "ubsan_rt", .cc_argv = &.{}, @@ -1925,25 +1925,8 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil comp.remaining_prelink_tasks += 1; } -<<<<<<< HEAD - if (comp.include_compiler_rt and capable_of_building_compiler_rt) { - if (is_exe_or_dyn_lib) { -======= - if (target.isMinGW() and comp.config.any_non_single_threaded) { - // LLD might drop some symbols as unused during LTO and GCing, therefore, - // we force mark them for resolution here. - - const tls_index_sym = switch (target.cpu.arch) { - .x86 => "__tls_index", - else => "_tls_index", - }; - - try comp.force_undefined_symbols.put(comp.gpa, tls_index_sym, {}); - } - if (capable_of_building_compiler_rt) { if (comp.compiler_rt_strat == .lib) { ->>>>>>> 050e3e69ac (Compilation: correct when to include ubsan) log.debug("queuing a job to build compiler_rt_lib", .{}); comp.queued_jobs.compiler_rt_lib = true; comp.remaining_prelink_tasks += 1; @@ -1957,11 +1940,11 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil if (comp.ubsan_rt_strat == .lib) { log.debug("queuing a job to build ubsan_rt_lib", .{}); - comp.job_queued_ubsan_rt_lib = true; + comp.queued_jobs.ubsan_rt_lib = true; comp.remaining_prelink_tasks += 1; } else if (comp.ubsan_rt_strat == .obj) { log.debug("queuing a job to build ubsan_rt_obj", .{}); - comp.job_queued_ubsan_rt_obj = true; + comp.queued_jobs.ubsan_rt_obj = true; comp.remaining_prelink_tasks += 1; } @@ -3782,11 +3765,11 @@ fn performAllTheWorkInner( } if (comp.queued_jobs.ubsan_rt_lib and comp.ubsan_rt_lib == null) { - comp.link_task_wait_group.spawnManager(buildRt, .{ comp, "ubsan_rt.zig", .libubsan, .Lib, &comp.ubsan_rt_lib, main_progress_node }); + comp.link_task_wait_group.spawnManager(buildRt, .{ comp, "ubsan_rt.zig", .libubsan, .Lib, false, &comp.ubsan_rt_lib, main_progress_node }); } if (comp.queued_jobs.ubsan_rt_obj and comp.ubsan_rt_obj == null) { - comp.link_task_wait_group.spawnManager(buildRt, .{ comp, "ubsan_rt.zig", .libubsan, .Obj, &comp.ubsan_rt_obj, main_progress_node }); + comp.link_task_wait_group.spawnManager(buildRt, .{ comp, "ubsan_rt.zig", .libubsan, .Obj, false, &comp.ubsan_rt_obj, main_progress_node }); } if (comp.queued_jobs.glibc_shared_objects) { @@ -6037,9 +6020,16 @@ pub fn addCCArgs( try argv.append("-fno-sanitize=function"); // It's recommended to use the minimal runtime in production environments - // due to the security implications of the full runtime. + // due to the security implications of the full runtime. The minimal runtime + // doesn't provide much benefit over simply trapping. if (mod.optimize_mode == .ReleaseSafe) { - try argv.append("-fsanitize-minimal-runtime"); + try argv.append("-fsanitize-trap=undefined"); + } + + // This is necessary because, by default, Clang instructs LLVM to embed a COFF link + // dependency on `libclang_rt.ubsan_standalone.a` when the UBSan runtime is used. + if (target.os.tag == .windows) { + try argv.append("-fno-rtlib-defaultlib"); } } } diff --git a/test/link/elf.zig b/test/link/elf.zig index dc48db98362d..11286af455a0 100644 --- a/test/link/elf.zig +++ b/test/link/elf.zig @@ -2049,6 +2049,8 @@ fn testLargeBss(b: *Build, opts: Options) *Step { \\} , &.{}); exe.linkLibC(); + // Disabled to work around an ELF linker bug. + exe.root_module.sanitize_c = false; const run = addRunArtifact(exe); run.expectExitCode(0); @@ -3552,6 +3554,8 @@ fn testTlsLargeTbss(b: *Build, opts: Options) *Step { \\} , &.{}); exe.linkLibC(); + // Disabled to work around an ELF linker bug. + exe.root_module.sanitize_c = false; const run = addRunArtifact(exe); run.expectStdOutEqual("3 0 5 0 0 0\n"); diff --git a/test/link/glibc_compat/build.zig b/test/link/glibc_compat/build.zig index 972a7a6ae9f0..2e5d0d7d389c 100644 --- a/test/link/glibc_compat/build.zig +++ b/test/link/glibc_compat/build.zig @@ -22,6 +22,8 @@ pub fn build(b: *std.Build) void { .link_libc = true, }), }); + exe.bundle_ubsan_rt = false; + exe.root_module.sanitize_c = false; exe.root_module.addCSourceFile(.{ .file = b.path("main.c") }); // TODO: actually test the output _ = exe.getEmittedBin(); @@ -62,6 +64,8 @@ pub fn build(b: *std.Build) void { .link_libc = true, }), }); + exe.bundle_ubsan_rt = false; + exe.root_module.sanitize_c = false; exe.root_module.addCSourceFile(.{ .file = b.path("glibc_runtime_check.c") }); // Only try running the test if the host glibc is known to be good enough. Ideally, the Zig @@ -161,6 +165,8 @@ pub fn build(b: *std.Build) void { .link_libc = true, }), }); + exe.bundle_ubsan_rt = false; + exe.root_module.sanitize_c = false; // Only try running the test if the host glibc is known to be good enough. Ideally, the Zig // test runner would be able to check this, but see https://github.com/ziglang/zig/pull/17702#issuecomment-1831310453 diff --git a/test/link/wasm/export-data/build.zig b/test/link/wasm/export-data/build.zig index 865017a22f8e..1213792a7552 100644 --- a/test/link/wasm/export-data/build.zig +++ b/test/link/wasm/export-data/build.zig @@ -13,6 +13,7 @@ pub fn build(b: *std.Build) void { }), }); lib.entry = .disabled; + lib.bundle_ubsan_rt = false; lib.use_lld = false; lib.root_module.export_symbol_names = &.{ "foo", "bar" }; // Object being linked has neither functions nor globals named "foo" or "bar" and diff --git a/test/link/wasm/export/build.zig b/test/link/wasm/export/build.zig index cf2c75e3b42a..eb6e70de258e 100644 --- a/test/link/wasm/export/build.zig +++ b/test/link/wasm/export/build.zig @@ -19,6 +19,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize no_export.entry = .disabled; no_export.use_llvm = false; no_export.use_lld = false; + no_export.bundle_ubsan_rt = false; const dynamic_export = b.addExecutable(.{ .name = "dynamic", @@ -32,6 +33,8 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize dynamic_export.rdynamic = true; dynamic_export.use_llvm = false; dynamic_export.use_lld = false; + // don't pull in ubsan, since we're just expecting a minimal executable + dynamic_export.bundle_ubsan_rt = false; const force_export = b.addExecutable(.{ .name = "force", @@ -45,6 +48,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize force_export.root_module.export_symbol_names = &.{"foo"}; force_export.use_llvm = false; force_export.use_lld = false; + force_export.bundle_ubsan_rt = false; const check_no_export = no_export.checkObject(); check_no_export.checkInHeaders(); diff --git a/test/link/wasm/function-table/build.zig b/test/link/wasm/function-table/build.zig index f922b06aecb3..cb3d3bfd3780 100644 --- a/test/link/wasm/function-table/build.zig +++ b/test/link/wasm/function-table/build.zig @@ -21,6 +21,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize export_table.use_lld = false; export_table.export_table = true; export_table.link_gc_sections = false; + export_table.bundle_ubsan_rt = false; const regular_table = b.addExecutable(.{ .name = "regular_table", @@ -34,6 +35,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize regular_table.use_llvm = false; regular_table.use_lld = false; regular_table.link_gc_sections = false; // Ensure function table is not empty + regular_table.bundle_ubsan_rt = false; const check_export = export_table.checkObject(); const check_regular = regular_table.checkObject(); diff --git a/test/link/wasm/shared-memory/build.zig b/test/link/wasm/shared-memory/build.zig index 02dc08a282c7..76c06ae0602f 100644 --- a/test/link/wasm/shared-memory/build.zig +++ b/test/link/wasm/shared-memory/build.zig @@ -31,6 +31,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize_mode: std.builtin.Opt exe.shared_memory = true; exe.max_memory = 67108864; exe.root_module.export_symbol_names = &.{"foo"}; + exe.bundle_ubsan_rt = false; const check_exe = exe.checkObject(); diff --git a/test/link/wasm/type/build.zig b/test/link/wasm/type/build.zig index 063fd779b378..4d08877386b5 100644 --- a/test/link/wasm/type/build.zig +++ b/test/link/wasm/type/build.zig @@ -21,6 +21,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize exe.use_llvm = false; exe.use_lld = false; exe.root_module.export_symbol_names = &.{"foo"}; + exe.bundle_ubsan_rt = false; b.installArtifact(exe); const check_exe = exe.checkObject(); diff --git a/test/src/StackTrace.zig b/test/src/StackTrace.zig index 5151447a436e..2ca5869eaf1e 100644 --- a/test/src/StackTrace.zig +++ b/test/src/StackTrace.zig @@ -81,6 +81,7 @@ fn addExpect( }), .use_llvm = use_llvm, }); + exe.bundle_ubsan_rt = false; const run = b.addRunArtifact(exe); run.removeEnvironmentVariable("CLICOLOR_FORCE"); diff --git a/tools/incr-check.zig b/tools/incr-check.zig index 93d16e1e583b..639d49725618 100644 --- a/tools/incr-check.zig +++ b/tools/incr-check.zig @@ -108,6 +108,7 @@ pub fn main() !void { "build-exe", case.root_source_file, "-fincremental", + "-fno-ubsan-rt", "-target", target.query, "--cache-dir", From 44d3b5a6e47c0caa3b5b3fe998ad52ef1d83e032 Mon Sep 17 00:00:00 2001 From: David Rubin Date: Mon, 3 Feb 2025 00:04:12 -0800 Subject: [PATCH 17/22] build: add comments explaining why we disable ubsan --- test/link/glibc_compat/build.zig | 6 ++++++ test/link/wasm/export-data/build.zig | 1 + test/link/wasm/export/build.zig | 4 +++- test/link/wasm/function-table/build.zig | 2 ++ test/link/wasm/shared-memory/build.zig | 1 + test/link/wasm/type/build.zig | 1 + 6 files changed, 14 insertions(+), 1 deletion(-) diff --git a/test/link/glibc_compat/build.zig b/test/link/glibc_compat/build.zig index 2e5d0d7d389c..4d83d34c9e4e 100644 --- a/test/link/glibc_compat/build.zig +++ b/test/link/glibc_compat/build.zig @@ -22,6 +22,8 @@ pub fn build(b: *std.Build) void { .link_libc = true, }), }); + // We disable UBSAN for these tests as the libc being tested here is + // so old, it doesn't even support compiling our UBSAN implementation. exe.bundle_ubsan_rt = false; exe.root_module.sanitize_c = false; exe.root_module.addCSourceFile(.{ .file = b.path("main.c") }); @@ -64,6 +66,8 @@ pub fn build(b: *std.Build) void { .link_libc = true, }), }); + // We disable UBSAN for these tests as the libc being tested here is + // so old, it doesn't even support compiling our UBSAN implementation. exe.bundle_ubsan_rt = false; exe.root_module.sanitize_c = false; exe.root_module.addCSourceFile(.{ .file = b.path("glibc_runtime_check.c") }); @@ -165,6 +169,8 @@ pub fn build(b: *std.Build) void { .link_libc = true, }), }); + // We disable UBSAN for these tests as the libc being tested here is + // so old, it doesn't even support compiling our UBSAN implementation. exe.bundle_ubsan_rt = false; exe.root_module.sanitize_c = false; diff --git a/test/link/wasm/export-data/build.zig b/test/link/wasm/export-data/build.zig index 1213792a7552..80ca1ebeea51 100644 --- a/test/link/wasm/export-data/build.zig +++ b/test/link/wasm/export-data/build.zig @@ -13,6 +13,7 @@ pub fn build(b: *std.Build) void { }), }); lib.entry = .disabled; + // Disabling due to self-hosted wasm linker bug. lib.bundle_ubsan_rt = false; lib.use_lld = false; lib.root_module.export_symbol_names = &.{ "foo", "bar" }; diff --git a/test/link/wasm/export/build.zig b/test/link/wasm/export/build.zig index eb6e70de258e..a3fa2c345c46 100644 --- a/test/link/wasm/export/build.zig +++ b/test/link/wasm/export/build.zig @@ -19,6 +19,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize no_export.entry = .disabled; no_export.use_llvm = false; no_export.use_lld = false; + // Don't pull in ubsan, since we're just expecting a very minimal executable. no_export.bundle_ubsan_rt = false; const dynamic_export = b.addExecutable(.{ @@ -33,7 +34,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize dynamic_export.rdynamic = true; dynamic_export.use_llvm = false; dynamic_export.use_lld = false; - // don't pull in ubsan, since we're just expecting a minimal executable + // Don't pull in ubsan, since we're just expecting a very minimal executable. dynamic_export.bundle_ubsan_rt = false; const force_export = b.addExecutable(.{ @@ -48,6 +49,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize force_export.root_module.export_symbol_names = &.{"foo"}; force_export.use_llvm = false; force_export.use_lld = false; + // Don't pull in ubsan, since we're just expecting a very minimal executable. force_export.bundle_ubsan_rt = false; const check_no_export = no_export.checkObject(); diff --git a/test/link/wasm/function-table/build.zig b/test/link/wasm/function-table/build.zig index cb3d3bfd3780..156ea788053c 100644 --- a/test/link/wasm/function-table/build.zig +++ b/test/link/wasm/function-table/build.zig @@ -21,6 +21,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize export_table.use_lld = false; export_table.export_table = true; export_table.link_gc_sections = false; + // Don't pull in ubsan, since we're just expecting a very minimal executable. export_table.bundle_ubsan_rt = false; const regular_table = b.addExecutable(.{ @@ -35,6 +36,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize regular_table.use_llvm = false; regular_table.use_lld = false; regular_table.link_gc_sections = false; // Ensure function table is not empty + // Don't pull in ubsan, since we're just expecting a very minimal executable. regular_table.bundle_ubsan_rt = false; const check_export = export_table.checkObject(); diff --git a/test/link/wasm/shared-memory/build.zig b/test/link/wasm/shared-memory/build.zig index 76c06ae0602f..1cb26deb5471 100644 --- a/test/link/wasm/shared-memory/build.zig +++ b/test/link/wasm/shared-memory/build.zig @@ -31,6 +31,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize_mode: std.builtin.Opt exe.shared_memory = true; exe.max_memory = 67108864; exe.root_module.export_symbol_names = &.{"foo"}; + // Don't pull in ubsan, since we're just expecting a very minimal executable. exe.bundle_ubsan_rt = false; const check_exe = exe.checkObject(); diff --git a/test/link/wasm/type/build.zig b/test/link/wasm/type/build.zig index 4d08877386b5..1644ddfa7020 100644 --- a/test/link/wasm/type/build.zig +++ b/test/link/wasm/type/build.zig @@ -21,6 +21,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize exe.use_llvm = false; exe.use_lld = false; exe.root_module.export_symbol_names = &.{"foo"}; + // Don't pull in ubsan, since we're just expecting a very minimal executable. exe.bundle_ubsan_rt = false; b.installArtifact(exe); From d4413e3504cf7b260a2fd29546ab29d6fd3de079 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 23 Feb 2025 20:25:07 -0800 Subject: [PATCH 18/22] ubsan: avoid depending on `@returnAddress` combined with `inline` --- lib/ubsan_rt.zig | 81 ++++++++++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 33 deletions(-) diff --git a/lib/ubsan_rt.zig b/lib/ubsan_rt.zig index f74f1a11860f..08c3e6b18005 100644 --- a/lib/ubsan_rt.zig +++ b/lib/ubsan_rt.zig @@ -1,6 +1,7 @@ const std = @import("std"); const builtin = @import("builtin"); const assert = std.debug.assert; +const panic = std.debug.panicExtra; const SourceLocation = extern struct { file_name: ?[*:0]const u8, @@ -175,7 +176,7 @@ fn overflowHandler( const fmt = "{s} integer overflow: " ++ "{} " ++ operator ++ " {} cannot be represented in type {s}"; - logMessage(fmt, .{ + panic(@returnAddress(), fmt, .{ if (is_signed) "signed" else "unsigned", lhs, rhs, @@ -199,7 +200,8 @@ fn negationHandler( value_handle: ValueHandle, ) callconv(.c) noreturn { const value: Value = .{ .handle = value_handle, .td = data.td }; - logMessage( + panic( + @returnAddress(), "negation of {} cannot be represented in type {s}", .{ value, data.td.getName() }, ); @@ -222,11 +224,12 @@ fn divRemHandler( const rhs: Value = .{ .handle = rhs_handle, .td = data.td }; if (rhs.isMinusOne()) { - logMessage( + panic( + @returnAddress(), "division of {} by -1 cannot be represented in type {s}", .{ lhs, data.td.getName() }, ); - } else logMessage("division by zero", .{}); + } else panic(@returnAddress(), "division by zero", .{}); } const AlignmentAssumptionData = extern struct { @@ -263,7 +266,8 @@ fn alignmentAssumptionHandler( const alignment: Value = .{ .handle = alignment_handle, .td = data.td }; if (maybe_offset) |offset| { - logMessage( + panic( + @returnAddress(), "assumption of {} byte alignment (with offset of {} byte) for pointer of type {s} failed\n" ++ "offset address is {} aligned, misalignment offset is {} bytes", .{ @@ -275,7 +279,8 @@ fn alignmentAssumptionHandler( }, ); } else { - logMessage( + panic( + @returnAddress(), "assumption of {} byte alignment for pointer of type {s} failed\n" ++ "address is {} aligned, misalignment offset is {} bytes", .{ @@ -314,18 +319,20 @@ fn shiftOob( rhs.getPositiveInteger() >= data.lhs_type.getIntegerSize()) { if (rhs.isNegative()) { - logMessage("shift exponent {} is negative", .{rhs}); + panic(@returnAddress(), "shift exponent {} is negative", .{rhs}); } else { - logMessage( + panic( + @returnAddress(), "shift exponent {} is too large for {}-bit type {s}", .{ rhs, data.lhs_type.getIntegerSize(), data.lhs_type.getName() }, ); } } else { if (lhs.isNegative()) { - logMessage("left shift of negative value {}", .{lhs}); + panic(@returnAddress(), "left shift of negative value {}", .{lhs}); } else { - logMessage( + panic( + @returnAddress(), "left shift of {} by {} places cannot be represented in type {s}", .{ lhs, rhs, data.lhs_type.getName() }, ); @@ -351,7 +358,8 @@ fn outOfBounds( index_handle: ValueHandle, ) callconv(.c) noreturn { const index: Value = .{ .handle = index_handle, .td = data.index_type }; - logMessage( + panic( + @returnAddress(), "index {} out of bounds for type {s}", .{ index, data.array_type.getName() }, ); @@ -376,13 +384,14 @@ fn pointerOverflow( ) callconv(.c) noreturn { if (base == 0) { if (result == 0) { - logMessage("applying zero offset to null pointer", .{}); + panic(@returnAddress(), "applying zero offset to null pointer", .{}); } else { - logMessage("applying non-zero offset {} to null pointer", .{result}); + panic(@returnAddress(), "applying non-zero offset {} to null pointer", .{result}); } } else { if (result == 0) { - logMessage( + panic( + @returnAddress(), "applying non-zero offset to non-null pointer 0x{x} produced null pointer", .{base}, ); @@ -391,18 +400,21 @@ fn pointerOverflow( const signed_result: isize = @bitCast(result); if ((signed_base >= 0) == (signed_result >= 0)) { if (base > result) { - logMessage( + panic( + @returnAddress(), "addition of unsigned offset to 0x{x} overflowed to 0x{x}", .{ base, result }, ); } else { - logMessage( + panic( + @returnAddress(), "subtraction of unsigned offset to 0x{x} overflowed to 0x{x}", .{ base, result }, ); } } else { - logMessage( + panic( + @returnAddress(), "pointer index expression with base 0x{x} overflowed to 0x{x}", .{ base, result }, ); @@ -462,17 +474,20 @@ fn typeMismatch( const handle: usize = @intFromPtr(pointer); if (pointer == null) { - logMessage( + panic( + @returnAddress(), "{s} null pointer of type {s}", .{ data.kind.getName(), data.td.getName() }, ); } else if (!std.mem.isAligned(handle, alignment)) { - logMessage( + panic( + @returnAddress(), "{s} misaligned address 0x{x} for type {s}, which requires {} byte alignment", .{ data.kind.getName(), handle, data.td.getName(), alignment }, ); } else { - logMessage( + panic( + @returnAddress(), "{s} address 0x{x} with insufficient space for an object of type {s}", .{ data.kind.getName(), handle, data.td.getName() }, ); @@ -484,11 +499,11 @@ const UnreachableData = extern struct { }; fn builtinUnreachable(_: *const UnreachableData) callconv(.c) noreturn { - logMessage("execution reached an unreachable program point", .{}); + panic(@returnAddress(), "execution reached an unreachable program point", .{}); } fn missingReturn(_: *const UnreachableData) callconv(.c) noreturn { - logMessage("execution reached the end of a value-returning function without returning a value", .{}); + panic(@returnAddress(), "execution reached the end of a value-returning function without returning a value", .{}); } const NonNullReturnData = extern struct { @@ -499,7 +514,7 @@ fn nonNullReturnAbort(data: *const NonNullReturnData) callconv(.c) noreturn { nonNullReturn(data); } fn nonNullReturn(_: *const NonNullReturnData) callconv(.c) noreturn { - logMessage("null pointer returned from function declared to never return null", .{}); + panic(@returnAddress(), "null pointer returned from function declared to never return null", .{}); } const NonNullArgData = extern struct { @@ -513,7 +528,8 @@ fn nonNullArgAbort(data: *const NonNullArgData) callconv(.c) noreturn { } fn nonNullArg(data: *const NonNullArgData) callconv(.c) noreturn { - logMessage( + panic( + @returnAddress(), "null pointer passed as argument {}, which is declared to never be null", .{data.arg_index}, ); @@ -536,7 +552,8 @@ fn loadInvalidValue( value_handle: ValueHandle, ) callconv(.c) noreturn { const value: Value = .{ .handle = value_handle, .td = data.td }; - logMessage( + panic( + @returnAddress(), "load of value {}, which is not valid for type {s}", .{ value, data.td.getName() }, ); @@ -554,7 +571,8 @@ fn invalidBuiltinAbort(data: *const InvalidBuiltinData) callconv(.c) noreturn { } fn invalidBuiltin(data: *const InvalidBuiltinData) callconv(.c) noreturn { - logMessage( + panic( + @returnAddress(), "passing zero to {s}(), which is not a valid argument", .{@tagName(data.kind)}, ); @@ -577,7 +595,8 @@ fn vlaBoundNotPositive( bound_handle: ValueHandle, ) callconv(.c) noreturn { const bound: Value = .{ .handle = bound_handle, .td = data.td }; - logMessage( + panic( + @returnAddress(), "variable length array bound evaluates to non-positive value {}", .{bound}, ); @@ -611,22 +630,18 @@ fn floatCastOverflow( if (@as(u16, ptr[0]) + @as(u16, ptr[1]) < 2 or ptr[0] == 0xFF or ptr[1] == 0xFF) { const data: *const FloatCastOverflowData = @ptrCast(data_handle); const from_value: Value = .{ .handle = from_handle, .td = data.from }; - logMessage("{} is outside the range of representable values of type {s}", .{ + panic(@returnAddress(), "{} is outside the range of representable values of type {s}", .{ from_value, data.to.getName(), }); } else { const data: *const FloatCastOverflowDataV2 = @ptrCast(data_handle); const from_value: Value = .{ .handle = from_handle, .td = data.from }; - logMessage("{} is outside the range of representable values of type {s}", .{ + panic(@returnAddress(), "{} is outside the range of representable values of type {s}", .{ from_value, data.to.getName(), }); } } -inline fn logMessage(comptime fmt: []const u8, args: anytype) noreturn { - std.debug.panicExtra(@returnAddress(), fmt, args); -} - fn exportHandler( handler: anytype, comptime sym_name: []const u8, From e18c7f9cca42731792cb9700744380197812747e Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 23 Feb 2025 20:25:39 -0800 Subject: [PATCH 19/22] ubsan: don't create ubsan in every static lib by default Problem here is if zig is asked to create multiple static libraries, it will build the runtime multiple times and then they will conflict. Instead we want to build the runtime exactly once. --- src/Compilation.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index 1dc7916a5488..a88063b95242 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -1347,7 +1347,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil // approach, since the ubsan runtime uses quite a lot of the standard library // and this reduces unnecessary bloat. const ubsan_rt_strat: RtStrat = s: { - const want_ubsan_rt = options.want_ubsan_rt orelse (any_sanitize_c and output_mode != .Obj); + const want_ubsan_rt = options.want_ubsan_rt orelse (any_sanitize_c and is_exe_or_dyn_lib); if (!want_ubsan_rt) break :s .none; if (options.skip_linker_dependencies) break :s .none; if (have_zcu) break :s .zcu; From faf256e429914a816dd5ab1555671b7ee69ecc4e Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 23 Feb 2025 21:27:51 -0800 Subject: [PATCH 20/22] std.mem.indexOfSentinel: don't ask the OS the page size simply use page_size_min instead. better yet, this logic would avoid depending on page size entirely... --- lib/std/mem.zig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 0fa7deadc3d0..016f3ab9da7a 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -1098,12 +1098,12 @@ pub fn indexOfSentinel(comptime T: type, comptime sentinel: T, p: [*:sentinel]co // as we don't read into a new page. This should be the case for most architectures // which use paged memory, however should be confirmed before adding a new arch below. .aarch64, .x86, .x86_64 => if (std.simd.suggestVectorLength(T)) |block_len| { - const page_size = std.heap.pageSize(); + const page_size = std.heap.page_size_min; const block_size = @sizeOf(T) * block_len; const Block = @Vector(block_len, T); const mask: Block = @splat(sentinel); - comptime assert(std.heap.page_size_max % @sizeOf(Block) == 0); + comptime assert(std.heap.page_size_min % @sizeOf(Block) == 0); assert(page_size % @sizeOf(Block) == 0); // First block may be unaligned @@ -1153,7 +1153,7 @@ pub fn indexOfSentinel(comptime T: type, comptime sentinel: T, p: [*:sentinel]co test "indexOfSentinel vector paths" { const Types = [_]type{ u8, u16, u32, u64 }; const allocator = std.testing.allocator; - const page_size = std.heap.pageSize(); + const page_size = std.heap.page_size_min; inline for (Types) |T| { const block_len = std.simd.suggestVectorLength(T) orelse continue; From 2447b87d98ee4b0fe13938c9b2acf2bb06cab572 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 23 Feb 2025 21:28:26 -0800 Subject: [PATCH 21/22] std.heap.page_size_min: relax freestanding restriction x86_64 and aarch64 have safe values for page_size_min --- lib/std/heap.zig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/std/heap.zig b/lib/std/heap.zig index 6fbc3d8b75ff..0d35c98c1c2f 100644 --- a/lib/std/heap.zig +++ b/lib/std/heap.zig @@ -42,10 +42,8 @@ pub var next_mmap_addr_hint: ?[*]align(page_size_min) u8 = null; /// /// On many systems, the actual page size can only be determined at runtime /// with `pageSize`. -pub const page_size_min: usize = std.options.page_size_min orelse (page_size_min_default orelse if (builtin.os.tag == .freestanding or builtin.os.tag == .other) - @compileError("freestanding/other page_size_min must provided with std.options.page_size_min") -else - @compileError(@tagName(builtin.cpu.arch) ++ "-" ++ @tagName(builtin.os.tag) ++ " has unknown page_size_min; populate std.options.page_size_min")); +pub const page_size_min: usize = std.options.page_size_min orelse page_size_min_default orelse + @compileError(@tagName(builtin.cpu.arch) ++ "-" ++ @tagName(builtin.os.tag) ++ " has unknown page_size_min; populate std.options.page_size_min"); /// comptime-known maximum page size of the target. /// @@ -831,8 +829,10 @@ const page_size_min_default: ?usize = switch (builtin.os.tag) { .xtensa => 4 << 10, else => null, }, - .freestanding => switch (builtin.cpu.arch) { + .freestanding, .other => switch (builtin.cpu.arch) { .wasm32, .wasm64 => 64 << 10, + .x86, .x86_64 => 4 << 10, + .aarch64, .aarch64_be => 4 << 10, else => null, }, else => null, From ca83f52fd95fa6ebf28b7d51d4ef396a2ccf4be4 Mon Sep 17 00:00:00 2001 From: David Rubin Date: Mon, 24 Feb 2025 03:49:45 -0800 Subject: [PATCH 22/22] ubsan: update wording --- lib/ubsan_rt.zig | 13 ++++++++----- test/link/elf.zig | 6 ++++-- test/link/wasm/export-data/build.zig | 3 ++- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/lib/ubsan_rt.zig b/lib/ubsan_rt.zig index 08c3e6b18005..20b0cc294223 100644 --- a/lib/ubsan_rt.zig +++ b/lib/ubsan_rt.zig @@ -127,6 +127,7 @@ const Value = extern struct { ) !void { comptime assert(fmt.len == 0); + // Work around x86_64 backend limitation. if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) { try writer.writeAll("(unknown)"); return; @@ -646,6 +647,7 @@ fn exportHandler( handler: anytype, comptime sym_name: []const u8, ) void { + // Work around x86_64 backend limitation. const linkage = if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) .internal else .weak; const N = "__ubsan_handle_" ++ sym_name; @export(handler, .{ .name = N, .linkage = linkage }); @@ -656,6 +658,7 @@ fn exportHandlerWithAbort( abort_handler: anytype, comptime sym_name: []const u8, ) void { + // Work around x86_64 backend limitation. const linkage = if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) .internal else .weak; { const N = "__ubsan_handle_" ++ sym_name; @@ -697,12 +700,12 @@ comptime { exportHandler(&missingReturn, "missing_return"); } - // these checks are nearly impossible to duplicate in zig, as they rely on nuances + // these checks are nearly impossible to replicate in zig, as they rely on nuances // in the Itanium C++ ABI. - // exportHelper("dynamic_type_cache_miss", "dynamic-type-cache-miss", true); - // exportHelper("vptr_type_cache", "vptr-type-cache", true); + // exportHandlerWithAbort(&dynamicTypeCacheMiss, &dynamicTypeCacheMissAbort, "dynamic-type-cache-miss"); + // exportHandlerWithAbort(&vptrTypeCache, &vptrTypeCacheAbort, "vptr-type-cache"); // we disable -fsanitize=function for reasons explained in src/Compilation.zig - // exportHelper("function-type-mismatch", "function_type_mismatch", true); - // exportHelper("function-type-mismatch-v1", "function_type_mismatch_v1", true); + // exportHandlerWithAbort(&functionTypeMismatch, &functionTypeMismatchAbort, "function-type-mismatch"); + // exportHandlerWithAbort(&functionTypeMismatchV1, &functionTypeMismatchV1Abort, "function-type-mismatch-v1"); } diff --git a/test/link/elf.zig b/test/link/elf.zig index 11286af455a0..60203f0a7834 100644 --- a/test/link/elf.zig +++ b/test/link/elf.zig @@ -2049,7 +2049,8 @@ fn testLargeBss(b: *Build, opts: Options) *Step { \\} , &.{}); exe.linkLibC(); - // Disabled to work around an ELF linker bug. + // Disabled to work around the ELF linker crashing. + // Can be reproduced on a x86_64-linux host by commenting out the line below. exe.root_module.sanitize_c = false; const run = addRunArtifact(exe); @@ -3554,7 +3555,8 @@ fn testTlsLargeTbss(b: *Build, opts: Options) *Step { \\} , &.{}); exe.linkLibC(); - // Disabled to work around an ELF linker bug. + // Disabled to work around the ELF linker crashing. + // Can be reproduced on a x86_64-linux host by commenting out the line below. exe.root_module.sanitize_c = false; const run = addRunArtifact(exe); diff --git a/test/link/wasm/export-data/build.zig b/test/link/wasm/export-data/build.zig index 80ca1ebeea51..1cddde208d42 100644 --- a/test/link/wasm/export-data/build.zig +++ b/test/link/wasm/export-data/build.zig @@ -13,7 +13,8 @@ pub fn build(b: *std.Build) void { }), }); lib.entry = .disabled; - // Disabling due to self-hosted wasm linker bug. + // Disabled to work around the Wasm linker crashing. + // Can be reproduced by commenting out the line below. lib.bundle_ubsan_rt = false; lib.use_lld = false; lib.root_module.export_symbol_names = &.{ "foo", "bar" };