Skip to content

Commit

Permalink
Merge pull request #16398 from ziglang/check-object-elf
Browse files Browse the repository at this point in the history
std: add ELF parse'n'dump functionality to std.Build.Step.CheckObject
  • Loading branch information
kubkon authored Jul 14, 2023
2 parents 3ec3374 + 77026c6 commit 546212f
Show file tree
Hide file tree
Showing 2 changed files with 292 additions and 4 deletions.
233 changes: 232 additions & 1 deletion lib/std/Build/Step/CheckObject.zig
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
const std = @import("std");
const assert = std.debug.assert;
const elf = std.elf;
const fs = std.fs;
const macho = std.macho;
const math = std.math;
Expand Down Expand Up @@ -338,7 +339,9 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void {
.macho => try MachODumper.parseAndDump(step, contents, .{
.dump_symtab = self.dump_symtab,
}),
.elf => @panic("TODO elf parser"),
.elf => try ElfDumper.parseAndDump(step, contents, .{
.dump_symtab = self.dump_symtab,
}),
.coff => @panic("TODO coff parser"),
.wasm => try WasmDumper.parseAndDump(step, contents, .{
.dump_symtab = self.dump_symtab,
Expand Down Expand Up @@ -695,6 +698,234 @@ const MachODumper = struct {
}
};

const ElfDumper = struct {
const symtab_label = "symtab";

const Symtab = struct {
symbols: []align(1) const elf.Elf64_Sym,
strings: []const u8,

fn get(st: Symtab, index: usize) ?elf.Elf64_Sym {
if (index >= st.symbols.len) return null;
return st.symbols[index];
}

fn getName(st: Symtab, index: usize) ?[]const u8 {
const sym = st.get(index) orelse return null;
assert(sym.st_name < st.strings.len);
return mem.sliceTo(@ptrCast(st.strings.ptr + sym.st_name), 0);
}
};

const Context = struct {
gpa: Allocator,
data: []const u8,
hdr: elf.Elf64_Ehdr,
shdrs: []align(1) const elf.Elf64_Shdr,
phdrs: []align(1) const elf.Elf64_Phdr,
shstrtab: []const u8,
symtab: ?Symtab = null,
dysymtab: ?Symtab = null,
};

fn parseAndDump(step: *Step, bytes: []const u8, opts: Opts) ![]const u8 {
const gpa = step.owner.allocator;
var stream = std.io.fixedBufferStream(bytes);
const reader = stream.reader();

const hdr = try reader.readStruct(elf.Elf64_Ehdr);
if (!mem.eql(u8, hdr.e_ident[0..4], "\x7fELF")) {
return error.InvalidMagicNumber;
}

const shdrs = @as([*]align(1) const elf.Elf64_Shdr, @ptrCast(bytes.ptr + hdr.e_shoff))[0..hdr.e_shnum];
const phdrs = @as([*]align(1) const elf.Elf64_Phdr, @ptrCast(bytes.ptr + hdr.e_phoff))[0..hdr.e_phnum];

var ctx = Context{
.gpa = gpa,
.data = bytes,
.hdr = hdr,
.shdrs = shdrs,
.phdrs = phdrs,
.shstrtab = undefined,
};
ctx.shstrtab = getSectionContents(ctx, ctx.hdr.e_shstrndx);

if (opts.dump_symtab) {
for (ctx.shdrs, 0..) |shdr, i| switch (shdr.sh_type) {
elf.SHT_SYMTAB, elf.SHT_DYNSYM => {
const raw = getSectionContents(ctx, i);
const nsyms = @divExact(raw.len, @sizeOf(elf.Elf64_Sym));
const symbols = @as([*]align(1) const elf.Elf64_Sym, @ptrCast(raw.ptr))[0..nsyms];
const strings = getSectionContents(ctx, shdr.sh_link);

switch (shdr.sh_type) {
elf.SHT_SYMTAB => {
ctx.symtab = .{
.symbols = symbols,
.strings = strings,
};
},
elf.SHT_DYNSYM => {
ctx.dysymtab = .{
.symbols = symbols,
.strings = strings,
};
},
else => unreachable,
}
},

else => {},
};
}

var output = std.ArrayList(u8).init(gpa);
const writer = output.writer();

try dumpHeader(ctx, writer);
try dumpShdrs(ctx, writer);
try dumpPhdrs(ctx, writer);

return output.toOwnedSlice();
}

fn getSectionName(ctx: Context, shndx: usize) []const u8 {
const shdr = ctx.shdrs[shndx];
assert(shdr.sh_name < ctx.shstrtab.len);
return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.shstrtab.ptr + shdr.sh_name)), 0);
}

fn getSectionContents(ctx: Context, shndx: usize) []const u8 {
const shdr = ctx.shdrs[shndx];
assert(shdr.sh_offset < ctx.data.len);
assert(shdr.sh_offset + shdr.sh_size <= ctx.data.len);
return ctx.data[shdr.sh_offset..][0..shdr.sh_size];
}

fn dumpHeader(ctx: Context, writer: anytype) !void {
try writer.writeAll("header\n");
try writer.print("type {s}\n", .{@tagName(ctx.hdr.e_type)});
try writer.print("entry {x}\n", .{ctx.hdr.e_entry});
}

fn dumpShdrs(ctx: Context, writer: anytype) !void {
if (ctx.shdrs.len == 0) return;

for (ctx.shdrs, 0..) |shdr, shndx| {
try writer.print("shdr {d}\n", .{shndx});
try writer.print("name {s}\n", .{getSectionName(ctx, shndx)});
try writer.print("type {s}\n", .{fmtShType(shdr.sh_type)});
try writer.print("addr {x}\n", .{shdr.sh_addr});
try writer.print("offset {x}\n", .{shdr.sh_offset});
try writer.print("size {x}\n", .{shdr.sh_size});
try writer.print("addralign {x}\n", .{shdr.sh_addralign});
// TODO dump formatted sh_flags
}
}

fn fmtShType(sh_type: u32) std.fmt.Formatter(formatShType) {
return .{ .data = sh_type };
}

fn formatShType(
sh_type: u32,
comptime unused_fmt_string: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = unused_fmt_string;
_ = options;
if (elf.SHT_LOOS <= sh_type and sh_type < elf.SHT_HIOS) {
try writer.print("LOOS+0x{x}", .{sh_type - elf.SHT_LOOS});
} else if (elf.SHT_LOPROC <= sh_type and sh_type < elf.SHT_HIPROC) {
try writer.print("LOPROC+0x{x}", .{sh_type - elf.SHT_LOPROC});
} else if (elf.SHT_LOUSER <= sh_type and sh_type < elf.SHT_HIUSER) {
try writer.print("LOUSER+0x{x}", .{sh_type - elf.SHT_LOUSER});
} else {
const name = switch (sh_type) {
elf.SHT_NULL => "NULL",
elf.SHT_PROGBITS => "PROGBITS",
elf.SHT_SYMTAB => "SYMTAB",
elf.SHT_STRTAB => "STRTAB",
elf.SHT_RELA => "RELA",
elf.SHT_HASH => "HASH",
elf.SHT_DYNAMIC => "DYNAMIC",
elf.SHT_NOTE => "NOTE",
elf.SHT_NOBITS => "NOBITS",
elf.SHT_REL => "REL",
elf.SHT_SHLIB => "SHLIB",
elf.SHT_DYNSYM => "DYNSYM",
elf.SHT_INIT_ARRAY => "INIT_ARRAY",
elf.SHT_FINI_ARRAY => "FINI_ARRAY",
elf.SHT_PREINIT_ARRAY => "PREINIT_ARRAY",
elf.SHT_GROUP => "GROUP",
elf.SHT_SYMTAB_SHNDX => "SYMTAB_SHNDX",
elf.SHT_X86_64_UNWIND => "X86_64_UNWIND",
elf.SHT_LLVM_ADDRSIG => "LLVM_ADDRSIG",
elf.SHT_GNU_HASH => "GNU_HASH",
elf.SHT_GNU_VERDEF => "VERDEF",
elf.SHT_GNU_VERNEED => "VERNEED",
elf.SHT_GNU_VERSYM => "VERSYM",
else => "UNKNOWN",
};
try writer.writeAll(name);
}
}

fn dumpPhdrs(ctx: Context, writer: anytype) !void {
if (ctx.phdrs.len == 0) return;

for (ctx.phdrs, 0..) |phdr, phndx| {
try writer.print("phdr {d}\n", .{phndx});
try writer.print("type {s}\n", .{fmtPhType(phdr.p_type)});
try writer.print("vaddr {x}\n", .{phdr.p_vaddr});
try writer.print("paddr {x}\n", .{phdr.p_paddr});
try writer.print("offset {x}\n", .{phdr.p_offset});
try writer.print("memsz {x}\n", .{phdr.p_memsz});
try writer.print("filesz {x}\n", .{phdr.p_filesz});
try writer.print("align {x}\n", .{phdr.p_align});
// TODO dump formatted p_flags
}
}

fn fmtPhType(ph_type: u32) std.fmt.Formatter(formatPhType) {
return .{ .data = ph_type };
}

fn formatPhType(
ph_type: u32,
comptime unused_fmt_string: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = unused_fmt_string;
_ = options;
if (elf.PT_LOOS <= ph_type and ph_type < elf.PT_HIOS) {
try writer.print("LOOS+0x{x}", .{ph_type - elf.PT_LOOS});
} else if (elf.PT_LOPROC <= ph_type and ph_type < elf.PT_HIPROC) {
try writer.print("LOPROC+0x{x}", .{ph_type - elf.PT_LOPROC});
} else {
const p_type = switch (ph_type) {
elf.PT_NULL => "NULL",
elf.PT_LOAD => "LOAD",
elf.PT_DYNAMIC => "DYNAMIC",
elf.PT_INTERP => "INTERP",
elf.PT_NOTE => "NOTE",
elf.PT_SHLIB => "SHLIB",
elf.PT_PHDR => "PHDR",
elf.PT_TLS => "TLS",
elf.PT_NUM => "NUM",
elf.PT_GNU_EH_FRAME => "GNU_EH_FRAME",
elf.PT_GNU_STACK => "GNU_STACK",
elf.PT_GNU_RELRO => "GNU_RELRO",
else => "UNKNOWN",
};
try writer.writeAll(p_type);
}
}
};

const WasmDumper = struct {
const symtab_label = "symbols";

Expand Down
63 changes: 60 additions & 3 deletions lib/std/elf.zig
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,58 @@ pub const DT_IA_64_NUM = 1;

pub const DT_NIOS2_GP = 0x70000002;

pub const DF_ORIGIN = 0x00000001;
pub const DF_SYMBOLIC = 0x00000002;
pub const DF_TEXTREL = 0x00000004;
pub const DF_BIND_NOW = 0x00000008;
pub const DF_STATIC_TLS = 0x00000010;

pub const DF_1_NOW = 0x00000001;
pub const DF_1_GLOBAL = 0x00000002;
pub const DF_1_GROUP = 0x00000004;
pub const DF_1_NODELETE = 0x00000008;
pub const DF_1_LOADFLTR = 0x00000010;
pub const DF_1_INITFIRST = 0x00000020;
pub const DF_1_NOOPEN = 0x00000040;
pub const DF_1_ORIGIN = 0x00000080;
pub const DF_1_DIRECT = 0x00000100;
pub const DF_1_TRANS = 0x00000200;
pub const DF_1_INTERPOSE = 0x00000400;
pub const DF_1_NODEFLIB = 0x00000800;
pub const DF_1_NODUMP = 0x00001000;
pub const DF_1_CONFALT = 0x00002000;
pub const DF_1_ENDFILTEE = 0x00004000;
pub const DF_1_DISPRELDNE = 0x00008000;
pub const DF_1_DISPRELPND = 0x00010000;
pub const DF_1_NODIRECT = 0x00020000;
pub const DF_1_IGNMULDEF = 0x00040000;
pub const DF_1_NOKSYMS = 0x00080000;
pub const DF_1_NOHDR = 0x00100000;
pub const DF_1_EDITED = 0x00200000;
pub const DF_1_NORELOC = 0x00400000;
pub const DF_1_SYMINTPOSE = 0x00800000;
pub const DF_1_GLOBAUDIT = 0x01000000;
pub const DF_1_SINGLETON = 0x02000000;
pub const DF_1_STUB = 0x04000000;
pub const DF_1_PIE = 0x08000000;

pub const VERSYM_HIDDEN = 0x8000;
pub const VERSYM_VERSION = 0x7fff;

/// Symbol is local
pub const VER_NDX_LOCAL = 0;
/// Symbol is global
pub const VER_NDX_GLOBAL = 1;
/// Beginning of reserved entries
pub const VER_NDX_LORESERVE = 0xff00;
/// Symbol is to be eliminated
pub const VER_NDX_ELIMINATE = 0xff01;

/// Version definition of the file itself
pub const VER_FLG_BASE = 1;
/// Weak version identifier
pub const VER_FLG_WEAK = 2;

/// Program header table entry unused
pub const PT_NULL = 0;
/// Loadable program segment
Expand Down Expand Up @@ -298,6 +350,14 @@ pub const SHT_SYMTAB_SHNDX = 18;
pub const SHT_LOOS = 0x60000000;
/// LLVM address-significance table
pub const SHT_LLVM_ADDRSIG = 0x6fff4c03;
/// GNU hash table
pub const SHT_GNU_HASH = 0x6ffffff6;
/// GNU version definition table
pub const SHT_GNU_VERDEF = 0x6ffffffd;
/// GNU needed versions table
pub const SHT_GNU_VERNEED = 0x6ffffffe;
/// GNU symbol version table
pub const SHT_GNU_VERSYM = 0x6fffffff;
/// End of OS-specific
pub const SHT_HIOS = 0x6fffffff;
/// Start of processor-specific
Expand Down Expand Up @@ -369,9 +429,6 @@ pub const STT_HP_STUB = (STT_LOOS + 0x2);
pub const STT_ARM_TFUNC = STT_LOPROC;
pub const STT_ARM_16BIT = STT_HIPROC;

pub const VER_FLG_BASE = 0x1;
pub const VER_FLG_WEAK = 0x2;

pub const MAGIC = "\x7fELF";

/// File types
Expand Down

0 comments on commit 546212f

Please sign in to comment.