From 4d8f96dd88c97fa175952f8dd7ab548409c78b0e Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Thu, 9 May 2019 23:46:12 +0200 Subject: [PATCH] Fix minor bug in LEB128 parsing --- CMakeLists.txt | 1 + std/debug.zig | 155 +++++++----------------------- std/debug/leb128.zig | 220 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 256 insertions(+), 120 deletions(-) create mode 100644 std/debug/leb128.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index 991dc8519..456945a6c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -487,6 +487,7 @@ set(ZIG_STD_FILES "crypto/x25519.zig" "cstr.zig" "debug.zig" + "debug/leb128.zig" "debug/failing_allocator.zig" "dwarf.zig" "dynamic_library.zig" diff --git a/std/debug.zig b/std/debug.zig index 7f1a249f7..61d6e648d 100644 --- a/std/debug.zig +++ b/std/debug.zig @@ -13,6 +13,8 @@ const ArrayList = std.ArrayList; const builtin = @import("builtin"); const maxInt = std.math.maxInt; +const leb = @import("debug/leb128.zig"); + pub const FailingAllocator = @import("debug/failing_allocator.zig").FailingAllocator; pub const failing_allocator = &FailingAllocator.init(global_allocator, 0).allocator; @@ -1460,7 +1462,7 @@ fn parseFormValueConstant(allocator: *mem.Allocator, in_stream: var, signed: boo 2 => try in_stream.readIntLittle(u16), 4 => try in_stream.readIntLittle(u32), 8 => try in_stream.readIntLittle(u64), - -1 => if (signed) @bitCast(u64, try readILeb128(in_stream)) else try readULeb128(in_stream), + -1 => if (signed) @bitCast(u64, try leb.readILEB128(i64, in_stream)) else try leb.readULEB128(u64, in_stream), else => @compileError("Invalid size"), }, }, @@ -1482,7 +1484,7 @@ fn parseFormValueRef(allocator: *mem.Allocator, in_stream: var, size: i32) !Form 2 => try in_stream.readIntLittle(u16), 4 => try in_stream.readIntLittle(u32), 8 => try in_stream.readIntLittle(u64), - -1 => try readULeb128(in_stream), + -1 => try leb.readULEB128(u64, in_stream), else => unreachable, }, }; @@ -1495,7 +1497,7 @@ fn parseFormValue(allocator: *mem.Allocator, in_stream: var, form_id: u64, is_64 DW.FORM_block2 => parseFormValueBlock(allocator, in_stream, 2), DW.FORM_block4 => parseFormValueBlock(allocator, in_stream, 4), DW.FORM_block => x: { - const block_len = try readULeb128(in_stream); + const block_len = try leb.readULEB128(usize, in_stream); return parseFormValueBlockLen(allocator, in_stream, block_len); }, DW.FORM_data1 => parseFormValueConstant(allocator, in_stream, false, 1), @@ -1507,7 +1509,7 @@ fn parseFormValue(allocator: *mem.Allocator, in_stream: var, form_id: u64, is_64 return parseFormValueConstant(allocator, in_stream, signed, -1); }, DW.FORM_exprloc => { - const size = try readULeb128(in_stream); + const size = try leb.readULEB128(usize, in_stream); const buf = try readAllocBytes(allocator, in_stream, size); return FormValue{ .ExprLoc = buf }; }, @@ -1527,7 +1529,7 @@ fn parseFormValue(allocator: *mem.Allocator, in_stream: var, form_id: u64, is_64 DW.FORM_string => FormValue{ .String = try readStringRaw(allocator, in_stream) }, DW.FORM_strp => FormValue{ .StrPtr = try parseFormValueDwarfOffsetSize(in_stream, is_64) }, DW.FORM_indirect => { - const child_form_id = try readULeb128(in_stream); + const child_form_id = try leb.readULEB128(u64, in_stream); return parseFormValue(allocator, in_stream, child_form_id, is_64); }, else => error.InvalidDebugInfo, @@ -1537,19 +1539,19 @@ fn parseFormValue(allocator: *mem.Allocator, in_stream: var, form_id: u64, is_64 fn parseAbbrevTable(di: *DwarfInfo) !AbbrevTable { var result = AbbrevTable.init(di.allocator()); while (true) { - const abbrev_code = try readULeb128(di.dwarf_in_stream); + const abbrev_code = try leb.readULEB128(u64, di.dwarf_in_stream); if (abbrev_code == 0) return result; try result.append(AbbrevTableEntry{ .abbrev_code = abbrev_code, - .tag_id = try readULeb128(di.dwarf_in_stream), + .tag_id = try leb.readULEB128(u64, di.dwarf_in_stream), .has_children = (try di.dwarf_in_stream.readByte()) == DW.CHILDREN_yes, .attrs = ArrayList(AbbrevAttr).init(di.allocator()), }); const attrs = &result.items[result.len - 1].attrs; while (true) { - const attr_id = try readULeb128(di.dwarf_in_stream); - const form_id = try readULeb128(di.dwarf_in_stream); + const attr_id = try leb.readULEB128(u64, di.dwarf_in_stream); + const form_id = try leb.readULEB128(u64, di.dwarf_in_stream); if (attr_id == 0 and form_id == 0) break; try attrs.append(AbbrevAttr{ .attr_id = attr_id, @@ -1583,7 +1585,7 @@ fn getAbbrevTableEntry(abbrev_table: *const AbbrevTable, abbrev_code: u64) ?*con } fn parseDie1(di: *DwarfInfo, abbrev_table: *const AbbrevTable, is_64: bool) !?Die { - const abbrev_code = try readULeb128(di.dwarf_in_stream); + const abbrev_code = try leb.readULEB128(u64, di.dwarf_in_stream); if (abbrev_code == 0) return null; const table_entry = getAbbrevTableEntry(abbrev_table, abbrev_code) orelse return error.InvalidDebugInfo; @@ -1603,7 +1605,7 @@ fn parseDie1(di: *DwarfInfo, abbrev_table: *const AbbrevTable, is_64: bool) !?Di } fn parseDie(di: *DwarfInfo, abbrev_table: *const AbbrevTable, is_64: bool) !Die { - const abbrev_code = try readULeb128(di.dwarf_in_stream); + const abbrev_code = try leb.readULEB128(u64, di.dwarf_in_stream); const table_entry = getAbbrevTableEntry(abbrev_table, abbrev_code) orelse return error.InvalidDebugInfo; var result = Die{ @@ -1716,9 +1718,9 @@ fn getLineNumberInfoMacOs(di: *DebugInfo, symbol: MachoSymbol, target_address: u while (true) { const file_name = readStringMem(&ptr); if (file_name.len == 0) break; - const dir_index = try readULeb128Mem(&ptr); - const mtime = try readULeb128Mem(&ptr); - const len_bytes = try readULeb128Mem(&ptr); + const dir_index = try leb.readULEB128Mem(u64, &ptr); + const mtime = try leb.readULEB128Mem(u64, &ptr); + const len_bytes = try leb.readULEB128Mem(u64, &ptr); try file_entries.append(FileEntry{ .file_name = file_name, .dir_index = dir_index, @@ -1732,7 +1734,7 @@ fn getLineNumberInfoMacOs(di: *DebugInfo, symbol: MachoSymbol, target_address: u const opcode = readByteMem(&ptr); if (opcode == DW.LNS_extended_op) { - const op_size = try readULeb128Mem(&ptr); + const op_size = try leb.readULEB128Mem(u64, &ptr); if (op_size < 1) return error.InvalidDebugInfo; var sub_op = readByteMem(&ptr); switch (sub_op) { @@ -1747,9 +1749,9 @@ fn getLineNumberInfoMacOs(di: *DebugInfo, symbol: MachoSymbol, target_address: u }, DW.LNE_define_file => { const file_name = readStringMem(&ptr); - const dir_index = try readULeb128Mem(&ptr); - const mtime = try readULeb128Mem(&ptr); - const len_bytes = try readULeb128Mem(&ptr); + const dir_index = try leb.readULEB128Mem(u64, &ptr); + const mtime = try leb.readULEB128Mem(u64, &ptr); + const len_bytes = try leb.readULEB128Mem(u64, &ptr); try file_entries.append(FileEntry{ .file_name = file_name, .dir_index = dir_index, @@ -1777,19 +1779,19 @@ fn getLineNumberInfoMacOs(di: *DebugInfo, symbol: MachoSymbol, target_address: u prog.basic_block = false; }, DW.LNS_advance_pc => { - const arg = try readULeb128Mem(&ptr); + const arg = try leb.readULEB128Mem(u64, &ptr); prog.address += arg * minimum_instruction_length; }, DW.LNS_advance_line => { - const arg = try readILeb128Mem(&ptr); + const arg = try leb.readILEB128Mem(i64, &ptr); prog.line += arg; }, DW.LNS_set_file => { - const arg = try readULeb128Mem(&ptr); + const arg = try leb.readULEB128Mem(u64, &ptr); prog.file = arg; }, DW.LNS_set_column => { - const arg = try readULeb128Mem(&ptr); + const arg = try leb.readULEB128Mem(u64, &ptr); prog.column = arg; }, DW.LNS_negate_stmt => { @@ -1880,9 +1882,9 @@ fn getLineNumberInfoDwarf(di: *DwarfInfo, compile_unit: CompileUnit, target_addr while (true) { const file_name = try di.readString(); if (file_name.len == 0) break; - const dir_index = try readULeb128(di.dwarf_in_stream); - const mtime = try readULeb128(di.dwarf_in_stream); - const len_bytes = try readULeb128(di.dwarf_in_stream); + const dir_index = try leb.readULEB128(u64, di.dwarf_in_stream); + const mtime = try leb.readULEB128(u64, di.dwarf_in_stream); + const len_bytes = try leb.readULEB128(u64, di.dwarf_in_stream); try file_entries.append(FileEntry{ .file_name = file_name, .dir_index = dir_index, @@ -1897,7 +1899,7 @@ fn getLineNumberInfoDwarf(di: *DwarfInfo, compile_unit: CompileUnit, target_addr const opcode = try di.dwarf_in_stream.readByte(); if (opcode == DW.LNS_extended_op) { - const op_size = try readULeb128(di.dwarf_in_stream); + const op_size = try leb.readULEB128(u64, di.dwarf_in_stream); if (op_size < 1) return error.InvalidDebugInfo; var sub_op = try di.dwarf_in_stream.readByte(); switch (sub_op) { @@ -1912,9 +1914,9 @@ fn getLineNumberInfoDwarf(di: *DwarfInfo, compile_unit: CompileUnit, target_addr }, DW.LNE_define_file => { const file_name = try di.readString(); - const dir_index = try readULeb128(di.dwarf_in_stream); - const mtime = try readULeb128(di.dwarf_in_stream); - const len_bytes = try readULeb128(di.dwarf_in_stream); + const dir_index = try leb.readULEB128(u64, di.dwarf_in_stream); + const mtime = try leb.readULEB128(u64, di.dwarf_in_stream); + const len_bytes = try leb.readULEB128(u64, di.dwarf_in_stream); try file_entries.append(FileEntry{ .file_name = file_name, .dir_index = dir_index, @@ -1943,19 +1945,19 @@ fn getLineNumberInfoDwarf(di: *DwarfInfo, compile_unit: CompileUnit, target_addr prog.basic_block = false; }, DW.LNS_advance_pc => { - const arg = try readULeb128(di.dwarf_in_stream); + const arg = try leb.readULEB128(u64, di.dwarf_in_stream); prog.address += arg * minimum_instruction_length; }, DW.LNS_advance_line => { - const arg = try readILeb128(di.dwarf_in_stream); + const arg = try leb.readILEB128(i64, di.dwarf_in_stream); prog.line += arg; }, DW.LNS_set_file => { - const arg = try readULeb128(di.dwarf_in_stream); + const arg = try leb.readULEB128(u64, di.dwarf_in_stream); prog.file = arg; }, DW.LNS_set_column => { - const arg = try readULeb128(di.dwarf_in_stream); + const arg = try leb.readULEB128(u64, di.dwarf_in_stream); prog.column = arg; }, DW.LNS_negate_stmt => { @@ -2240,53 +2242,6 @@ fn readStringMem(ptr: *[*]const u8) []const u8 { return result; } -fn readULeb128Mem(ptr: *[*]const u8) !u64 { - var result: u64 = 0; - var shift: usize = 0; - var i: usize = 0; - - while (true) { - const byte = ptr.*[i]; - i += 1; - - var operand: u64 = undefined; - - if (@shlWithOverflow(u64, byte & 0b01111111, @intCast(u6, shift), &operand)) return error.InvalidDebugInfo; - - result |= operand; - - if ((byte & 0b10000000) == 0) { - ptr.* += i; - return result; - } - - shift += 7; - } -} -fn readILeb128Mem(ptr: *[*]const u8) !i64 { - var result: i64 = 0; - var shift: usize = 0; - var i: usize = 0; - - while (true) { - const byte = ptr.*[i]; - i += 1; - - if (shift > @sizeOf(i64) * 8) return error.InvalidDebugInfo; - - result |= i64(byte & 0b01111111) << @intCast(u6, shift); - shift += 7; - - if ((byte & 0b10000000) == 0) { - if (shift < @sizeOf(i64) * 8 and (byte & 0b01000000) != 0) { - result |= -(i64(1) << @intCast(u6, shift)); - } - ptr.* += i; - return result; - } - } -} - fn readInitialLength(comptime E: type, in_stream: *io.InStream(E), is_64: *bool) !u64 { const first_32_bits = try in_stream.readIntLittle(u32); is_64.* = (first_32_bits == 0xffffffff); @@ -2298,46 +2253,6 @@ fn readInitialLength(comptime E: type, in_stream: *io.InStream(E), is_64: *bool) } } -fn readULeb128(in_stream: var) !u64 { - var result: u64 = 0; - var shift: usize = 0; - - while (true) { - const byte = try in_stream.readByte(); - - var operand: u64 = undefined; - - if (@shlWithOverflow(u64, byte & 0b01111111, @intCast(u6, shift), &operand)) return error.InvalidDebugInfo; - - result |= operand; - - if ((byte & 0b10000000) == 0) return result; - - shift += 7; - } -} - -fn readILeb128(in_stream: var) !i64 { - var result: i64 = 0; - var shift: usize = 0; - - while (true) { - const byte = try in_stream.readByte(); - - if (shift > @sizeOf(i64) * 8) return error.InvalidDebugInfo; - - result |= i64(byte & 0b01111111) << @intCast(u6, shift); - shift += 7; - - if ((byte & 0b10000000) == 0) { - if (shift < @sizeOf(i64) * 8 and (byte & 0b01000000) != 0) { - result |= -(i64(1) << @intCast(u6, shift)); - } - return result; - } - } -} - /// This should only be used in temporary test programs. pub const global_allocator = &global_fixed_allocator.allocator; var global_fixed_allocator = std.heap.ThreadSafeFixedBufferAllocator.init(global_allocator_mem[0..]); diff --git a/std/debug/leb128.zig b/std/debug/leb128.zig new file mode 100644 index 000000000..7bdb30a26 --- /dev/null +++ b/std/debug/leb128.zig @@ -0,0 +1,220 @@ +const std = @import("std"); +const testing = std.testing; + +pub fn readULEB128(comptime T: type, in_stream: var) !T { + const ShiftT = @IntType(false, std.math.log2(T.bit_count)); + + var result: T = 0; + var shift: ShiftT = 0; + + while (true) { + const byte = try in_stream.readByte(); + + var operand: T = undefined; + if (@shlWithOverflow(T, byte & 0x7f, shift, &operand)) + return error.Overflow; + + result |= operand; + + if (@addWithOverflow(ShiftT, shift, 7, &shift)) + return error.Overflow; + + if ((byte & 0x80) == 0) + return result; + } +} + +pub fn readULEB128Mem(comptime T: type, ptr: *[*]const u8) !T { + const ShiftT = @IntType(false, std.math.log2(T.bit_count)); + + var result: T = 0; + var shift: ShiftT = 0; + var i: usize = 0; + + while (true) { + const byte = ptr.*[i]; + i += 1; + + var operand: T = undefined; + if (@shlWithOverflow(T, byte & 0x7f, shift, &operand)) + return error.Overflow; + + result |= operand; + + if (@addWithOverflow(ShiftT, shift, 7, &shift)) + return error.Overflow; + + if ((byte & 0x80) == 0) { + ptr.* += i; + return result; + } + } +} + +pub fn readILEB128(comptime T: type, in_stream: var) !T { + const ShiftT = @IntType(false, std.math.log2(T.bit_count)); + + var result: T = 0; + var shift: ShiftT = 0; + + while (true) { + const byte = u8(try in_stream.readByte()); + + var operand: T = undefined; + if (@shlWithOverflow(T, @intCast(T, byte & 0x7f), shift, &operand)) + return error.Overflow; + + result |= operand; + + if (@addWithOverflow(ShiftT, shift, 7, &shift)) + return error.Overflow; + + if ((byte & 0x80) == 0) { + if (shift <= ShiftT(T.bit_count - 1) and (byte & 0x40) != 0) { + result |= T(-1) << shift; + } + return result; + } + } +} + +pub fn readILEB128Mem(comptime T: type, ptr: *[*]const u8) !T { + const ShiftT = @IntType(false, std.math.log2(T.bit_count)); + + var result: T = 0; + var shift: ShiftT = 0; + var i: usize = 0; + + while (true) { + const byte = ptr.*[i]; + i += 1; + + var operand: T = undefined; + if (@shlWithOverflow(T, @intCast(T, byte & 0x7f), shift, &operand)) + return error.Overflow; + + result |= operand; + + if (@addWithOverflow(ShiftT, shift, 7, &shift)) + return error.Overflow; + + if ((byte & 0x80) == 0) { + if (shift <= ShiftT(T.bit_count - 1) and (byte & 0x40) != 0) { + result |= T(-1) << shift; + } + ptr.* += i; + return result; + } + } +} + +const OneByteReadInStream = struct { + const Error = error{NoError}; + const Stream = std.io.InStream(Error); + + stream: Stream, + str: []const u8, + curr: usize, + + fn init(str: []const u8) @This() { + return @This(){ + .stream = Stream{ .readFn = readFn }, + .str = str, + .curr = 0, + }; + } + + fn readFn(in_stream: *Stream, dest: []u8) Error!usize { + const self = @fieldParentPtr(@This(), "stream", in_stream); + if (self.str.len <= self.curr or dest.len == 0) + return 0; + + dest[0] = self.str[self.curr]; + self.curr += 1; + return 1; + } +}; + +fn test_read_ileb128(comptime T: type, encoded: []const u8) !T { + var in_stream = OneByteReadInStream.init(encoded); + const v1 = try readILEB128(T, &in_stream.stream); + var in_ptr = encoded.ptr; + const v2 = try readILEB128Mem(T, &in_ptr); + testing.expectEqual(v1, v2); + return v2; +} + +fn test_read_uleb128(comptime T: type, encoded: []const u8) !T { + var in_stream = OneByteReadInStream.init(encoded); + const v1 = try readULEB128(T, &in_stream.stream); + var in_ptr = encoded.ptr; + const v2 = try readULEB128Mem(T, &in_ptr); + return v2; +} + +test "deserialize signed LEB128" { + // Truncated + testing.expectError(error.EndOfStream, test_read_ileb128(i64, "\x80")); + + // Overflow + testing.expectError(error.Overflow, test_read_ileb128(i8, "\x80\x80\x40")); + testing.expectError(error.Overflow, test_read_ileb128(i16, "\x80\x80\x80\x40")); + testing.expectError(error.Overflow, test_read_ileb128(i32, "\x80\x80\x80\x80\x40")); + testing.expectError(error.Overflow, test_read_ileb128(i64, "\x80\x80\x80\x80\x80\x80\x80\x80\x80\x40")); + + // Decode SLEB128 + testing.expect((try test_read_ileb128(i64, "\x00")) == 0); + testing.expect((try test_read_ileb128(i64, "\x01")) == 1); + testing.expect((try test_read_ileb128(i64, "\x3f")) == 63); + testing.expect((try test_read_ileb128(i64, "\x40")) == -64); + testing.expect((try test_read_ileb128(i64, "\x41")) == -63); + testing.expect((try test_read_ileb128(i64, "\x7f")) == -1); + testing.expect((try test_read_ileb128(i64, "\x80\x01")) == 128); + testing.expect((try test_read_ileb128(i64, "\x81\x01")) == 129); + testing.expect((try test_read_ileb128(i64, "\xff\x7e")) == -129); + testing.expect((try test_read_ileb128(i64, "\x80\x7f")) == -128); + testing.expect((try test_read_ileb128(i64, "\x81\x7f")) == -127); + testing.expect((try test_read_ileb128(i64, "\xc0\x00")) == 64); + testing.expect((try test_read_ileb128(i64, "\xc7\x9f\x7f")) == -12345); + + // Decode unnormalized SLEB128 with extra padding bytes. + testing.expect((try test_read_ileb128(i64, "\x80\x00")) == 0); + testing.expect((try test_read_ileb128(i64, "\x80\x80\x00")) == 0); + testing.expect((try test_read_ileb128(i64, "\xff\x00")) == 0x7f); + testing.expect((try test_read_ileb128(i64, "\xff\x80\x00")) == 0x7f); + testing.expect((try test_read_ileb128(i64, "\x80\x81\x00")) == 0x80); + testing.expect((try test_read_ileb128(i64, "\x80\x81\x80\x00")) == 0x80); +} + +test "deserialize unsigned LEB128" { + // Truncated + testing.expectError(error.EndOfStream, test_read_uleb128(u64, "\x80")); + + // Overflow + testing.expectError(error.Overflow, test_read_uleb128(u8, "\x80\x80\x40")); + testing.expectError(error.Overflow, test_read_uleb128(u16, "\x80\x80\x80\x40")); + testing.expectError(error.Overflow, test_read_uleb128(u32, "\x80\x80\x80\x80\x40")); + testing.expectError(error.Overflow, test_read_uleb128(u64, "\x80\x80\x80\x80\x80\x80\x80\x80\x80\x40")); + + // Decode ULEB128 + testing.expect((try test_read_uleb128(u64, "\x00")) == 0); + testing.expect((try test_read_uleb128(u64, "\x01")) == 1); + testing.expect((try test_read_uleb128(u64, "\x3f")) == 63); + testing.expect((try test_read_uleb128(u64, "\x40")) == 64); + testing.expect((try test_read_uleb128(u64, "\x7f")) == 0x7f); + testing.expect((try test_read_uleb128(u64, "\x80\x01")) == 0x80); + testing.expect((try test_read_uleb128(u64, "\x81\x01")) == 0x81); + testing.expect((try test_read_uleb128(u64, "\x90\x01")) == 0x90); + testing.expect((try test_read_uleb128(u64, "\xff\x01")) == 0xff); + testing.expect((try test_read_uleb128(u64, "\x80\x02")) == 0x100); + testing.expect((try test_read_uleb128(u64, "\x81\x02")) == 0x101); + testing.expect((try test_read_uleb128(u64, "\x80\xc1\x80\x80\x10")) == 4294975616); + + // Decode ULEB128 with extra padding bytes + testing.expect((try test_read_uleb128(u64, "\x80\x00")) == 0); + testing.expect((try test_read_uleb128(u64, "\x80\x80\x00")) == 0); + testing.expect((try test_read_uleb128(u64, "\xff\x00")) == 0x7f); + testing.expect((try test_read_uleb128(u64, "\xff\x80\x00")) == 0x7f); + testing.expect((try test_read_uleb128(u64, "\x80\x81\x00")) == 0x80); + testing.expect((try test_read_uleb128(u64, "\x80\x81\x80\x00")) == 0x80); +}