From 4c3625d74546e22e1bd4695cc7bdde09f645bf30 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 13 Jul 2023 14:33:33 +0200 Subject: [PATCH 1/5] check-object: dump ELF header --- lib/std/Build/Step/CheckObject.zig | 35 +++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/lib/std/Build/Step/CheckObject.zig b/lib/std/Build/Step/CheckObject.zig index 171734c45063..d53352af804b 100644 --- a/lib/std/Build/Step/CheckObject.zig +++ b/lib/std/Build/Step/CheckObject.zig @@ -1,5 +1,6 @@ const std = @import("std"); const assert = std.debug.assert; +const elf = std.elf; const fs = std.fs; const macho = std.macho; const math = std.math; @@ -338,7 +339,9 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void { .macho => try MachODumper.parseAndDump(step, contents, .{ .dump_symtab = self.dump_symtab, }), - .elf => @panic("TODO elf parser"), + .elf => try ElfDumper.parseAndDump(step, contents, .{ + .dump_symtab = self.dump_symtab, + }), .coff => @panic("TODO coff parser"), .wasm => try WasmDumper.parseAndDump(step, contents, .{ .dump_symtab = self.dump_symtab, @@ -695,6 +698,36 @@ const MachODumper = struct { } }; +const ElfDumper = struct { + const symtab_label = "symtab"; + + fn parseAndDump(step: *Step, bytes: []const u8, opts: Opts) ![]const u8 { + _ = opts; + + const gpa = step.owner.allocator; + var stream = std.io.fixedBufferStream(bytes); + const reader = stream.reader(); + + const hdr = try reader.readStruct(elf.Elf64_Ehdr); + if (!mem.eql(u8, hdr.e_ident[0..4], "\x7fELF")) { + return error.InvalidMagicNumber; + } + + var output = std.ArrayList(u8).init(gpa); + const writer = output.writer(); + + try dumpHeader(hdr, writer); + + return output.toOwnedSlice(); + } + + fn dumpHeader(hdr: elf.Elf64_Ehdr, writer: anytype) !void { + try writer.writeAll("header\n"); + try writer.print("type {s}\n", .{@tagName(hdr.e_type)}); + try writer.print("entry {x}\n", .{hdr.e_entry}); + } +}; + const WasmDumper = struct { const symtab_label = "symbols"; From 0627ca527ac9b973ff0823c274913d2ece8ae6be Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 13 Jul 2023 15:08:01 +0200 Subject: [PATCH 2/5] elf: add ELF and GNU-specific missing defs --- lib/std/elf.zig | 63 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 3 deletions(-) diff --git a/lib/std/elf.zig b/lib/std/elf.zig index d464d7d12be9..3ea136fabe71 100644 --- a/lib/std/elf.zig +++ b/lib/std/elf.zig @@ -221,6 +221,58 @@ pub const DT_IA_64_NUM = 1; pub const DT_NIOS2_GP = 0x70000002; +pub const DF_ORIGIN = 0x00000001; +pub const DF_SYMBOLIC = 0x00000002; +pub const DF_TEXTREL = 0x00000004; +pub const DF_BIND_NOW = 0x00000008; +pub const DF_STATIC_TLS = 0x00000010; + +pub const DF_1_NOW = 0x00000001; +pub const DF_1_GLOBAL = 0x00000002; +pub const DF_1_GROUP = 0x00000004; +pub const DF_1_NODELETE = 0x00000008; +pub const DF_1_LOADFLTR = 0x00000010; +pub const DF_1_INITFIRST = 0x00000020; +pub const DF_1_NOOPEN = 0x00000040; +pub const DF_1_ORIGIN = 0x00000080; +pub const DF_1_DIRECT = 0x00000100; +pub const DF_1_TRANS = 0x00000200; +pub const DF_1_INTERPOSE = 0x00000400; +pub const DF_1_NODEFLIB = 0x00000800; +pub const DF_1_NODUMP = 0x00001000; +pub const DF_1_CONFALT = 0x00002000; +pub const DF_1_ENDFILTEE = 0x00004000; +pub const DF_1_DISPRELDNE = 0x00008000; +pub const DF_1_DISPRELPND = 0x00010000; +pub const DF_1_NODIRECT = 0x00020000; +pub const DF_1_IGNMULDEF = 0x00040000; +pub const DF_1_NOKSYMS = 0x00080000; +pub const DF_1_NOHDR = 0x00100000; +pub const DF_1_EDITED = 0x00200000; +pub const DF_1_NORELOC = 0x00400000; +pub const DF_1_SYMINTPOSE = 0x00800000; +pub const DF_1_GLOBAUDIT = 0x01000000; +pub const DF_1_SINGLETON = 0x02000000; +pub const DF_1_STUB = 0x04000000; +pub const DF_1_PIE = 0x08000000; + +pub const VERSYM_HIDDEN = 0x8000; +pub const VERSYM_VERSION = 0x7fff; + +/// Symbol is local +pub const VER_NDX_LOCAL = 0; +/// Symbol is global +pub const VER_NDX_GLOBAL = 1; +/// Beginning of reserved entries +pub const VER_NDX_LORESERVE = 0xff00; +/// Symbol is to be eliminated +pub const VER_NDX_ELIMINATE = 0xff01; + +/// Version definition of the file itself +pub const VER_FLG_BASE = 1; +/// Weak version identifier +pub const VER_FLG_WEAK = 2; + /// Program header table entry unused pub const PT_NULL = 0; /// Loadable program segment @@ -298,6 +350,14 @@ pub const SHT_SYMTAB_SHNDX = 18; pub const SHT_LOOS = 0x60000000; /// LLVM address-significance table pub const SHT_LLVM_ADDRSIG = 0x6fff4c03; +/// GNU hash table +pub const SHT_GNU_HASH = 0x6ffffff6; +/// GNU version definition table +pub const SHT_GNU_VERDEF = 0x6ffffffd; +/// GNU needed versions table +pub const SHT_GNU_VERNEED = 0x6ffffffe; +/// GNU symbol version table +pub const SHT_GNU_VERSYM = 0x6fffffff; /// End of OS-specific pub const SHT_HIOS = 0x6fffffff; /// Start of processor-specific @@ -369,9 +429,6 @@ pub const STT_HP_STUB = (STT_LOOS + 0x2); pub const STT_ARM_TFUNC = STT_LOPROC; pub const STT_ARM_16BIT = STT_HIPROC; -pub const VER_FLG_BASE = 0x1; -pub const VER_FLG_WEAK = 0x2; - pub const MAGIC = "\x7fELF"; /// File types From 76dc0d516089da6cd7dfb5ee1634c7e4ccef6a4a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 13 Jul 2023 17:01:26 +0200 Subject: [PATCH 3/5] check-object: dump some info on SHDRs --- lib/std/Build/Step/CheckObject.zig | 143 +++++++++++++++++++++++++++-- 1 file changed, 137 insertions(+), 6 deletions(-) diff --git a/lib/std/Build/Step/CheckObject.zig b/lib/std/Build/Step/CheckObject.zig index d53352af804b..418aa9f681aa 100644 --- a/lib/std/Build/Step/CheckObject.zig +++ b/lib/std/Build/Step/CheckObject.zig @@ -701,9 +701,34 @@ const MachODumper = struct { const ElfDumper = struct { const symtab_label = "symtab"; - fn parseAndDump(step: *Step, bytes: []const u8, opts: Opts) ![]const u8 { - _ = opts; + const Symtab = struct { + symbols: []align(1) const elf.Elf64_Sym, + strings: []const u8, + + fn get(st: Symtab, index: usize) ?elf.Elf64_Sym { + if (index >= st.symbols.len) return null; + return st.symbols[index]; + } + + fn getName(st: Symtab, index: usize) ?[]const u8 { + const sym = st.get(index) orelse return null; + assert(sym.st_name < st.strings.len); + return mem.sliceTo(@ptrCast(st.strings.ptr + sym.st_name), 0); + } + }; + const Context = struct { + gpa: Allocator, + data: []const u8, + hdr: elf.Elf64_Ehdr, + shdrs: []align(1) const elf.Elf64_Shdr, + phdrs: []align(1) const elf.Elf64_Phdr, + shstrtab: []const u8, + symtab: ?Symtab = null, + dysymtab: ?Symtab = null, + }; + + fn parseAndDump(step: *Step, bytes: []const u8, opts: Opts) ![]const u8 { const gpa = step.owner.allocator; var stream = std.io.fixedBufferStream(bytes); const reader = stream.reader(); @@ -713,18 +738,124 @@ const ElfDumper = struct { return error.InvalidMagicNumber; } + const shdrs = @as([*]align(1) const elf.Elf64_Shdr, @ptrCast(bytes.ptr + hdr.e_shoff))[0..hdr.e_shnum]; + const phdrs = @as([*]align(1) const elf.Elf64_Phdr, @ptrCast(bytes.ptr + hdr.e_phoff))[0..hdr.e_phnum]; + + var ctx = Context{ + .gpa = gpa, + .data = bytes, + .hdr = hdr, + .shdrs = shdrs, + .phdrs = phdrs, + .shstrtab = undefined, + }; + ctx.shstrtab = getSectionContents(ctx, ctx.hdr.e_shstrndx); + + if (opts.dump_symtab) { + for (ctx.shdrs, 0..) |shdr, i| switch (shdr.sh_type) { + elf.SHT_SYMTAB, elf.SHT_DYNSYM => { + const raw = getSectionContents(ctx, i); + const nsyms = @divExact(raw.len, @sizeOf(elf.Elf64_Sym)); + const symbols = @as([*]align(1) const elf.Elf64_Sym, @ptrCast(raw.ptr))[0..nsyms]; + const strings = getSectionContents(ctx, shdr.sh_link); + + switch (shdr.sh_type) { + elf.SHT_SYMTAB => { + ctx.symtab = .{ + .symbols = symbols, + .strings = strings, + }; + }, + elf.SHT_DYNSYM => { + ctx.dysymtab = .{ + .symbols = symbols, + .strings = strings, + }; + }, + else => unreachable, + } + }, + + else => {}, + }; + } + var output = std.ArrayList(u8).init(gpa); const writer = output.writer(); - try dumpHeader(hdr, writer); + try dumpHeader(ctx, writer); + try dumpShdrs(ctx, writer); return output.toOwnedSlice(); } - fn dumpHeader(hdr: elf.Elf64_Ehdr, writer: anytype) !void { + fn getSectionName(ctx: Context, shndx: usize) []const u8 { + const shdr = ctx.shdrs[shndx]; + assert(shdr.sh_name < ctx.shstrtab.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.shstrtab.ptr + shdr.sh_name)), 0); + } + + fn getSectionContents(ctx: Context, shndx: usize) []const u8 { + const shdr = ctx.shdrs[shndx]; + assert(shdr.sh_offset < ctx.data.len); + assert(shdr.sh_offset + shdr.sh_size <= ctx.data.len); + return ctx.data[shdr.sh_offset..][0..shdr.sh_size]; + } + + fn dumpHeader(ctx: Context, writer: anytype) !void { try writer.writeAll("header\n"); - try writer.print("type {s}\n", .{@tagName(hdr.e_type)}); - try writer.print("entry {x}\n", .{hdr.e_entry}); + try writer.print("type {s}\n", .{@tagName(ctx.hdr.e_type)}); + try writer.print("entry {x}\n", .{ctx.hdr.e_entry}); + } + + fn dumpShdrs(ctx: Context, writer: anytype) !void { + if (ctx.shdrs.len == 0) return; + + for (ctx.shdrs, 0..) |shdr, shndx| { + try writer.print("shdr {d}\n", .{shndx}); + try writer.print("name {s}\n", .{getSectionName(ctx, shndx)}); + try writer.print("type {s}\n", .{try fmtShType(ctx.gpa, shdr.sh_type)}); + } + } + + fn fmtShType(gpa: Allocator, sh_type: u32) ![]const u8 { + return switch (sh_type) { + elf.SHT_NULL => "NULL", + elf.SHT_PROGBITS => "PROGBITS", + elf.SHT_SYMTAB => "SYMTAB", + elf.SHT_STRTAB => "STRTAB", + elf.SHT_RELA => "RELA", + elf.SHT_HASH => "HASH", + elf.SHT_DYNAMIC => "DYNAMIC", + elf.SHT_NOTE => "NOTE", + elf.SHT_NOBITS => "NOBITS", + elf.SHT_REL => "REL", + elf.SHT_SHLIB => "SHLIB", + elf.SHT_DYNSYM => "DYNSYM", + elf.SHT_INIT_ARRAY => "INIT_ARRAY", + elf.SHT_FINI_ARRAY => "FINI_ARRAY", + elf.SHT_PREINIT_ARRAY => "PREINIT_ARRAY", + elf.SHT_GROUP => "GROUP", + elf.SHT_SYMTAB_SHNDX => "SYMTAB_SHNDX", + elf.SHT_X86_64_UNWIND => "X86_64_UNWIND", + elf.SHT_LLVM_ADDRSIG => "LLVM_ADDRSIG", + elf.SHT_GNU_HASH => "GNU_HASH", + elf.SHT_GNU_VERDEF => "VERDEF", + elf.SHT_GNU_VERNEED => "VERNEED", + elf.SHT_GNU_VERSYM => "VERSYM", + else => |sht| blk: { + if (elf.SHT_LOOS <= sht and sht < elf.SHT_HIOS) { + break :blk try std.fmt.allocPrint(gpa, "LOOS+0x{x}", .{sht - elf.SHT_LOOS}); + } + if (elf.SHT_LOPROC <= sht and sht < elf.SHT_HIPROC) { + break :blk try std.fmt.allocPrint(gpa, "LOPROC+0x{x}", .{sht - elf.SHT_LOPROC}); + } + if (elf.SHT_LOUSER <= sht and sht < elf.SHT_HIUSER) { + break :blk try std.fmt.allocPrint(gpa, "LOUSER+0x{x}", .{sht - elf.SHT_LOUSER}); + } + break :blk "UNKNOWN"; + }, + }; } }; From 33154b511cbf96e0c95e8bad0c5e19ba54371964 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 13 Jul 2023 20:31:19 +0200 Subject: [PATCH 4/5] check-object: dump more info on SHDRs --- lib/std/Build/Step/CheckObject.zig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/std/Build/Step/CheckObject.zig b/lib/std/Build/Step/CheckObject.zig index 418aa9f681aa..2458c1527161 100644 --- a/lib/std/Build/Step/CheckObject.zig +++ b/lib/std/Build/Step/CheckObject.zig @@ -815,6 +815,10 @@ const ElfDumper = struct { try writer.print("shdr {d}\n", .{shndx}); try writer.print("name {s}\n", .{getSectionName(ctx, shndx)}); try writer.print("type {s}\n", .{try fmtShType(ctx.gpa, shdr.sh_type)}); + try writer.print("addr {x}\n", .{shdr.sh_addr}); + try writer.print("offset {x}\n", .{shdr.sh_offset}); + try writer.print("size {x}\n", .{shdr.sh_size}); + try writer.print("addralign {x}\n", .{shdr.sh_addralign}); } } From 77026c67a42050cf6c15531d784afbf16c67c23c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 13 Jul 2023 21:27:18 +0200 Subject: [PATCH 5/5] check-object: dump info on PHDRs --- lib/std/Build/Step/CheckObject.zig | 141 +++++++++++++++++++++-------- 1 file changed, 102 insertions(+), 39 deletions(-) diff --git a/lib/std/Build/Step/CheckObject.zig b/lib/std/Build/Step/CheckObject.zig index 2458c1527161..5c24d5372224 100644 --- a/lib/std/Build/Step/CheckObject.zig +++ b/lib/std/Build/Step/CheckObject.zig @@ -785,6 +785,7 @@ const ElfDumper = struct { try dumpHeader(ctx, writer); try dumpShdrs(ctx, writer); + try dumpPhdrs(ctx, writer); return output.toOwnedSlice(); } @@ -814,52 +815,114 @@ const ElfDumper = struct { for (ctx.shdrs, 0..) |shdr, shndx| { try writer.print("shdr {d}\n", .{shndx}); try writer.print("name {s}\n", .{getSectionName(ctx, shndx)}); - try writer.print("type {s}\n", .{try fmtShType(ctx.gpa, shdr.sh_type)}); + try writer.print("type {s}\n", .{fmtShType(shdr.sh_type)}); try writer.print("addr {x}\n", .{shdr.sh_addr}); try writer.print("offset {x}\n", .{shdr.sh_offset}); try writer.print("size {x}\n", .{shdr.sh_size}); try writer.print("addralign {x}\n", .{shdr.sh_addralign}); + // TODO dump formatted sh_flags } } - fn fmtShType(gpa: Allocator, sh_type: u32) ![]const u8 { - return switch (sh_type) { - elf.SHT_NULL => "NULL", - elf.SHT_PROGBITS => "PROGBITS", - elf.SHT_SYMTAB => "SYMTAB", - elf.SHT_STRTAB => "STRTAB", - elf.SHT_RELA => "RELA", - elf.SHT_HASH => "HASH", - elf.SHT_DYNAMIC => "DYNAMIC", - elf.SHT_NOTE => "NOTE", - elf.SHT_NOBITS => "NOBITS", - elf.SHT_REL => "REL", - elf.SHT_SHLIB => "SHLIB", - elf.SHT_DYNSYM => "DYNSYM", - elf.SHT_INIT_ARRAY => "INIT_ARRAY", - elf.SHT_FINI_ARRAY => "FINI_ARRAY", - elf.SHT_PREINIT_ARRAY => "PREINIT_ARRAY", - elf.SHT_GROUP => "GROUP", - elf.SHT_SYMTAB_SHNDX => "SYMTAB_SHNDX", - elf.SHT_X86_64_UNWIND => "X86_64_UNWIND", - elf.SHT_LLVM_ADDRSIG => "LLVM_ADDRSIG", - elf.SHT_GNU_HASH => "GNU_HASH", - elf.SHT_GNU_VERDEF => "VERDEF", - elf.SHT_GNU_VERNEED => "VERNEED", - elf.SHT_GNU_VERSYM => "VERSYM", - else => |sht| blk: { - if (elf.SHT_LOOS <= sht and sht < elf.SHT_HIOS) { - break :blk try std.fmt.allocPrint(gpa, "LOOS+0x{x}", .{sht - elf.SHT_LOOS}); - } - if (elf.SHT_LOPROC <= sht and sht < elf.SHT_HIPROC) { - break :blk try std.fmt.allocPrint(gpa, "LOPROC+0x{x}", .{sht - elf.SHT_LOPROC}); - } - if (elf.SHT_LOUSER <= sht and sht < elf.SHT_HIUSER) { - break :blk try std.fmt.allocPrint(gpa, "LOUSER+0x{x}", .{sht - elf.SHT_LOUSER}); - } - break :blk "UNKNOWN"; - }, - }; + fn fmtShType(sh_type: u32) std.fmt.Formatter(formatShType) { + return .{ .data = sh_type }; + } + + fn formatShType( + sh_type: u32, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + if (elf.SHT_LOOS <= sh_type and sh_type < elf.SHT_HIOS) { + try writer.print("LOOS+0x{x}", .{sh_type - elf.SHT_LOOS}); + } else if (elf.SHT_LOPROC <= sh_type and sh_type < elf.SHT_HIPROC) { + try writer.print("LOPROC+0x{x}", .{sh_type - elf.SHT_LOPROC}); + } else if (elf.SHT_LOUSER <= sh_type and sh_type < elf.SHT_HIUSER) { + try writer.print("LOUSER+0x{x}", .{sh_type - elf.SHT_LOUSER}); + } else { + const name = switch (sh_type) { + elf.SHT_NULL => "NULL", + elf.SHT_PROGBITS => "PROGBITS", + elf.SHT_SYMTAB => "SYMTAB", + elf.SHT_STRTAB => "STRTAB", + elf.SHT_RELA => "RELA", + elf.SHT_HASH => "HASH", + elf.SHT_DYNAMIC => "DYNAMIC", + elf.SHT_NOTE => "NOTE", + elf.SHT_NOBITS => "NOBITS", + elf.SHT_REL => "REL", + elf.SHT_SHLIB => "SHLIB", + elf.SHT_DYNSYM => "DYNSYM", + elf.SHT_INIT_ARRAY => "INIT_ARRAY", + elf.SHT_FINI_ARRAY => "FINI_ARRAY", + elf.SHT_PREINIT_ARRAY => "PREINIT_ARRAY", + elf.SHT_GROUP => "GROUP", + elf.SHT_SYMTAB_SHNDX => "SYMTAB_SHNDX", + elf.SHT_X86_64_UNWIND => "X86_64_UNWIND", + elf.SHT_LLVM_ADDRSIG => "LLVM_ADDRSIG", + elf.SHT_GNU_HASH => "GNU_HASH", + elf.SHT_GNU_VERDEF => "VERDEF", + elf.SHT_GNU_VERNEED => "VERNEED", + elf.SHT_GNU_VERSYM => "VERSYM", + else => "UNKNOWN", + }; + try writer.writeAll(name); + } + } + + fn dumpPhdrs(ctx: Context, writer: anytype) !void { + if (ctx.phdrs.len == 0) return; + + for (ctx.phdrs, 0..) |phdr, phndx| { + try writer.print("phdr {d}\n", .{phndx}); + try writer.print("type {s}\n", .{fmtPhType(phdr.p_type)}); + try writer.print("vaddr {x}\n", .{phdr.p_vaddr}); + try writer.print("paddr {x}\n", .{phdr.p_paddr}); + try writer.print("offset {x}\n", .{phdr.p_offset}); + try writer.print("memsz {x}\n", .{phdr.p_memsz}); + try writer.print("filesz {x}\n", .{phdr.p_filesz}); + try writer.print("align {x}\n", .{phdr.p_align}); + // TODO dump formatted p_flags + } + } + + fn fmtPhType(ph_type: u32) std.fmt.Formatter(formatPhType) { + return .{ .data = ph_type }; + } + + fn formatPhType( + ph_type: u32, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + if (elf.PT_LOOS <= ph_type and ph_type < elf.PT_HIOS) { + try writer.print("LOOS+0x{x}", .{ph_type - elf.PT_LOOS}); + } else if (elf.PT_LOPROC <= ph_type and ph_type < elf.PT_HIPROC) { + try writer.print("LOPROC+0x{x}", .{ph_type - elf.PT_LOPROC}); + } else { + const p_type = switch (ph_type) { + elf.PT_NULL => "NULL", + elf.PT_LOAD => "LOAD", + elf.PT_DYNAMIC => "DYNAMIC", + elf.PT_INTERP => "INTERP", + elf.PT_NOTE => "NOTE", + elf.PT_SHLIB => "SHLIB", + elf.PT_PHDR => "PHDR", + elf.PT_TLS => "TLS", + elf.PT_NUM => "NUM", + elf.PT_GNU_EH_FRAME => "GNU_EH_FRAME", + elf.PT_GNU_STACK => "GNU_STACK", + elf.PT_GNU_RELRO => "GNU_RELRO", + else => "UNKNOWN", + }; + try writer.writeAll(p_type); + } } };