coff & pdb: improved correctness of our implementation, it is now able to handle stage1's pdb and print its stack traces

2019-07-28 19:03:36 +02:00 · 2019-07-28 19:03:36 +02:00 · 05032c8693
parent d08425a0a5
commit 05032c8693
3 changed files with 113 additions and 45 deletions
--- a/std/coff.zig
+++ b/std/coff.zig
@ -19,6 +19,7 @@ const IMAGE_NT_OPTIONAL_HDR32_MAGIC = 0x10b;
 const IMAGE_NT_OPTIONAL_HDR64_MAGIC = 0x20b;

 const IMAGE_NUMBEROF_DIRECTORY_ENTRIES = 16;
+const IMAGE_DEBUG_TYPE_CODEVIEW = 2;
 const DEBUG_DIRECTORY = 6;

 pub const CoffError = error{
@ -28,6 +29,7 @@ pub const CoffError = error{
    MissingCoffSection,
 };

+// Official documentation of the format: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format
 pub const Coff = struct {
    in_file: File,
    allocator: *mem.Allocator,
@ -120,6 +122,7 @@ pub const Coff = struct {

    pub fn getPdbPath(self: *Coff, buffer: []u8) !usize {
        try self.loadSections();
+
        const header = blk: {
            if (self.getSection(".buildid")) |section| {
                break :blk section.header;
@ -130,14 +133,32 @@ pub const Coff = struct {
            }
        };

-        // The linker puts a chunk that contains the .pdb path right after the
-        // debug_directory.
        const debug_dir = &self.pe_header.data_directory[DEBUG_DIRECTORY];
        const file_offset = debug_dir.virtual_address - header.virtual_address + header.pointer_to_raw_data;
-        try self.in_file.seekTo(file_offset + debug_dir.size);

        var file_stream = self.in_file.inStream();
        const in = &file_stream.stream;
+        try self.in_file.seekTo(file_offset);
+
+        // Find the correct DebugDirectoryEntry, and where its data is stored.
+        // It can be in any section.
+        const debug_dir_entry_count = debug_dir.size / @sizeOf(DebugDirectoryEntry);
+        var i: u32 = 0;
+        blk: while (i < debug_dir_entry_count) : (i += 1) {
+            const debug_dir_entry = try in.readStruct(DebugDirectoryEntry);
+            if (debug_dir_entry.type == IMAGE_DEBUG_TYPE_CODEVIEW) {
+                for (self.sections.toSlice()) |*section| {
+                    const section_start = section.header.virtual_address;
+                    const section_size = section.header.misc.virtual_size;
+                    const rva = debug_dir_entry.address_of_raw_data;
+                    const offset = rva - section_start;
+                    if (section_start <= rva and offset < section_size and debug_dir_entry.size_of_data <= section_size - offset) {
+                        try self.in_file.seekTo(section.header.pointer_to_raw_data + offset);
+                        break :blk;
+                    }
+                }
+            }
+        }

        var cv_signature: [4]u8 = undefined; // CodeView signature
        try in.readNoEof(cv_signature[0..]);
@ -149,7 +170,7 @@ pub const Coff = struct {

        // Finally read the null-terminated string.
        var byte = try in.readByte();
-        var i: usize = 0;
+        i = 0;
        while (byte != 0 and i < buffer.len) : (i += 1) {
            buffer[i] = byte;
            byte = try in.readByte();
@ -178,7 +199,7 @@ pub const Coff = struct {
            try self.sections.append(Section{
                .header = SectionHeader{
                    .name = name,
-                    .misc = SectionHeader.Misc{ .physical_address = try in.readIntLittle(u32) },
+                    .misc = SectionHeader.Misc{ .virtual_size = try in.readIntLittle(u32) },
                    .virtual_address = try in.readIntLittle(u32),
                    .size_of_raw_data = try in.readIntLittle(u32),
                    .pointer_to_raw_data = try in.readIntLittle(u32),
@ -222,6 +243,17 @@ const OptionalHeader = struct {
    data_directory: [IMAGE_NUMBEROF_DIRECTORY_ENTRIES]DataDirectory,
 };

+const DebugDirectoryEntry = packed struct {
+    characteristiccs: u32,
+    time_date_stamp: u32,
+    major_version: u16,
+    minor_version: u16,
+    @"type": u32,
+    size_of_data: u32,
+    address_of_raw_data: u32,
+    pointer_to_raw_data: u32,
+};
+
 pub const Section = struct {
    header: SectionHeader,
 };
--- a/std/debug.zig
+++ b/std/debug.zig
@ -375,7 +375,7 @@ fn printSourceAtAddressWindows(di: *DebugInfo, out_stream: var, relocated_addres
    const obj_basename = fs.path.basename(mod.obj_file_name);

    var symbol_i: usize = 0;
-    const symbol_name = while (symbol_i != mod.symbols.len) {
+    const symbol_name = if (!mod.populated) "???" else while (symbol_i != mod.symbols.len) {
        const prefix = @ptrCast(*pdb.RecordPrefix, &mod.symbols[symbol_i]);
        if (prefix.RecordLen < 2)
            return error.InvalidDebugInfo;
@ -858,8 +858,10 @@ fn openSelfDebugInfoWindows(allocator: *mem.Allocator) !DebugInfo {
    const age = try pdb_stream.stream.readIntLittle(u32);
    var guid: [16]u8 = undefined;
    try pdb_stream.stream.readNoEof(guid[0..]);
+    if (version != 20000404) // VC70, only value observed by LLVM team
+        return error.UnknownPDBVersion;
    if (!mem.eql(u8, di.coff.guid, guid) or di.coff.age != age)
-        return error.InvalidDebugInfo;
+        return error.PDBMismatch;
    // We validated the executable and pdb match.

    const string_table_index = str_tab_index: {
@ -903,13 +905,18 @@ fn openSelfDebugInfoWindows(allocator: *mem.Allocator) !DebugInfo {
        return error.MissingDebugInfo;
    };

-    di.pdb.string_table = di.pdb.getStreamById(string_table_index) orelse return error.InvalidDebugInfo;
+    di.pdb.string_table = di.pdb.getStreamById(string_table_index) orelse return error.MissingDebugInfo;
    di.pdb.dbi = di.pdb.getStream(pdb.StreamType.Dbi) orelse return error.MissingDebugInfo;

    const dbi = di.pdb.dbi;

    // Dbi Header
    const dbi_stream_header = try dbi.stream.readStruct(pdb.DbiStreamHeader);
+    if (dbi_stream_header.VersionHeader != 19990903) // V70, only value observed by LLVM team
+        return error.UnknownPDBVersion;
+    if (dbi_stream_header.Age != age)
+        return error.UnmatchingPDB;
+
    const mod_info_size = dbi_stream_header.ModInfoSize;
    const section_contrib_size = dbi_stream_header.SectionContributionSize;

--- a/std/pdb.zig
+++ b/std/pdb.zig
@ -499,45 +499,78 @@ const Msf = struct {

        const superblock = try in.readStruct(SuperBlock);

+        // Sanity checks
        if (!mem.eql(u8, superblock.FileMagic, SuperBlock.file_magic))
            return error.InvalidDebugInfo;
-
+        if (superblock.FreeBlockMapBlock != 1 and superblock.FreeBlockMapBlock != 2)
+            return error.InvalidDebugInfo;
+        if (superblock.NumBlocks * superblock.BlockSize != try file.getEndPos())
+            return error.InvalidDebugInfo;
        switch (superblock.BlockSize) {
            // llvm only supports 4096 but we can handle any of these values
            512, 1024, 2048, 4096 => {},
            else => return error.InvalidDebugInfo,
        }

-        if (superblock.NumBlocks * superblock.BlockSize != try file.getEndPos())
-            return error.InvalidDebugInfo;
+        const dir_block_count = blockCountFromSize(superblock.NumDirectoryBytes, superblock.BlockSize);
+        if (dir_block_count > superblock.BlockSize / @sizeOf(u32))
+            return error.UnhandledBigDirectoryStream; // cf. BlockMapAddr comment.

-        self.directory = try MsfStream.init(
+        try file.seekTo(superblock.BlockSize * superblock.BlockMapAddr);
+        var dir_blocks = try allocator.alloc(u32, dir_block_count);
+        for (dir_blocks) |*b| {
+            b.* = try in.readIntLittle(u32);
+        }
+        self.directory = MsfStream.init(
            superblock.BlockSize,
-            blockCountFromSize(superblock.NumDirectoryBytes, superblock.BlockSize),
-            superblock.BlockSize * superblock.BlockMapAddr,
            file,
-            allocator,
+            dir_blocks,
        );

+        const begin = self.directory.pos;
        const stream_count = try self.directory.stream.readIntLittle(u32);
-
        const stream_sizes = try allocator.alloc(u32, stream_count);
-        for (stream_sizes) |*s| {
+        defer allocator.free(stream_sizes);
+
+        // Microsoft's implementation uses u32(-1) for inexistant streams.
+        // These streams are not used, but still participate in the file
+        // and must be taken into account when resolving stream indices.
+        const Nil = 0xFFFFFFFF;
+        for (stream_sizes) |*s, i| {
            const size = try self.directory.stream.readIntLittle(u32);
-            s.* = blockCountFromSize(size, superblock.BlockSize);
+            s.* = if (size == Nil) 0 else blockCountFromSize(size, superblock.BlockSize);
        }

        self.streams = try allocator.alloc(MsfStream, stream_count);
        for (self.streams) |*stream, i| {
-            stream.* = try MsfStream.init(
-                superblock.BlockSize,
-                stream_sizes[i],
-                // MsfStream.init expects the file to be at the part where it reads [N]u32
-                try file.getPos(),
-                file,
-                allocator,
-            );
+            const size = stream_sizes[i];
+            if (size == 0) {
+                stream.* = MsfStream{
+                    .blocks = [_]u32{},
+                };
+            } else {
+                var blocks = try allocator.alloc(u32, size);
+                var j: u32 = 0;
+                while (j < size) : (j += 1) {
+                    const block_id = try self.directory.stream.readIntLittle(u32);
+                    const n = (block_id % superblock.BlockSize);
+                    // 0 is for SuperBlock, 1 and 2 for FPMs.
+                    if (block_id == 0 or n == 1 or n == 2 or block_id * superblock.BlockSize > try file.getEndPos())
+                        return error.InvalidBlockIndex;
+                    blocks[j] = block_id;
+                }
+
+                stream.* = MsfStream.init(
+                    superblock.BlockSize,
+                    file,
+                    blocks,
+                );
+            }
        }
+
+        const end = self.directory.pos;
+        if (end - begin != superblock.NumDirectoryBytes)
+            return error.InvalidStreamDirectory;
    }
 };

@ -574,7 +607,6 @@ const SuperBlock = packed struct {
    NumDirectoryBytes: u32,

    Unknown: u32,
-
    /// The index of a block within the MSF file. At this block is an array of
    /// ulittle32_t’s listing the blocks that the stream directory resides on.
    /// For large MSF files, the stream directory (which describes the block
@ -584,45 +616,41 @@ const SuperBlock = packed struct {
    /// and the stream directory itself can be stitched together accordingly.
    /// The number of ulittle32_t’s in this array is given by
    /// ceil(NumDirectoryBytes / BlockSize).
+    // Note: microsoft-pdb code actually suggests this is a variable-length
+    // array. If the indices of blocks occupied by the Stream Directory didn't
+    // fit in one page, there would be other u32 following it.
+    // This would mean the Stream Directory is bigger than BlockSize / sizeof(u32)
+    // blocks. We're not even close to this with a 1GB pdb file, and LLVM didn't
+    // implement it so we're kind of safe making this assumption for now.
    BlockMapAddr: u32,
 };

 const MsfStream = struct {
-    in_file: File,
-    pos: u64,
-    blocks: []u32,
-    block_size: u32,
+    in_file: File = undefined,
+    pos: u64 = undefined,
+    blocks: []u32 = undefined,
+    block_size: u32 = undefined,

    /// Implementation of InStream trait for Pdb.MsfStream
-    stream: Stream,
+    stream: Stream = undefined,

    pub const Error = @typeOf(read).ReturnType.ErrorSet;
    pub const Stream = io.InStream(Error);

-    fn init(block_size: u32, block_count: u32, pos: u64, file: File, allocator: *mem.Allocator) !MsfStream {
-        var stream = MsfStream{
+    fn init(block_size: u32, file: File, blocks: []u32) MsfStream {
+        const stream = MsfStream{
            .in_file = file,
            .pos = 0,
-            .blocks = try allocator.alloc(u32, block_count),
+            .blocks = blocks,
            .block_size = block_size,
            .stream = Stream{ .readFn = readFn },
        };

-        var file_stream = file.inStream();
-        const in = &file_stream.stream;
-        try file.seekTo(pos);
-
-        var i: u32 = 0;
-        while (i < block_count) : (i += 1) {
-            stream.blocks[i] = try in.readIntLittle(u32);
-        }
-
        return stream;
    }

    fn readNullTermString(self: *MsfStream, allocator: *mem.Allocator) ![]u8 {
        var list = ArrayList(u8).init(allocator);
-        defer list.deinit();
        while (true) {
            const byte = try self.stream.readByte();
            if (byte == 0) {
@ -633,6 +661,7 @@ const MsfStream = struct {
    }

    fn read(self: *MsfStream, buffer: []u8) !usize {
+
        var block_id = @intCast(usize, self.pos / self.block_size);
        var block = self.blocks[block_id];
        var offset = self.pos % self.block_size;