Merge pull request #6654 from joachimschmidt557/stage2-arm

stage2 ARM: more stuff
2020-10-29 18:29:24 -04:00 · 2020-10-29 18:29:24 -04:00 · f4bb8be9fc
parent aaff66b8ed 7b4f3c7cfc
commit f4bb8be9fc
2 changed files with 360 additions and 49 deletions
--- a/src/codegen.zig
+++ b/src/codegen.zig
@ -573,25 +573,54 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                        // sub sp, sp, #reloc
                        mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.push(.al, .{ .fp, .lr }).toU32());
                        mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, .fp, Instruction.Operand.reg(.sp, Instruction.Operand.Shift.none)).toU32());
-                        // TODO: prepare stack for local variables
-                        // const backpatch_reloc = try self.code.addManyAsArray(4);
+                        const backpatch_reloc = self.code.items.len;
+                        try self.code.resize(backpatch_reloc + 4);

                        try self.dbgSetPrologueEnd();

                        try self.genBody(self.mod_fn.analysis.success);

                        // Backpatch stack offset
-                        // const stack_end = self.max_end_stack;
-                        // const aligned_stack_end = mem.alignForward(stack_end, self.stack_align);
-                        // mem.writeIntLittle(u32, backpatch_reloc, Instruction.sub(.al, .sp, .sp, Instruction.Operand.imm()));
+                        const stack_end = self.max_end_stack;
+                        const aligned_stack_end = mem.alignForward(stack_end, self.stack_align);
+                        if (Instruction.Operand.fromU32(@intCast(u32, aligned_stack_end))) |op| {
+                            mem.writeIntLittle(u32, self.code.items[backpatch_reloc..][0..4], Instruction.sub(.al, .sp, .sp, op).toU32());
+                        } else {
+                            return self.fail(self.src, "TODO ARM: allow larger stacks", .{});
+                        }

                        try self.dbgSetEpilogueBegin();

+                        // exitlude jumps
+                        if (self.exitlude_jump_relocs.items.len == 1) {
+                            // There is only one relocation. Hence,
+                            // this relocation must be at the end of
+                            // the code. Therefore, we can just delete
+                            // the space initially reserved for the
+                            // jump
+                            self.code.items.len -= 4;
+                        } else for (self.exitlude_jump_relocs.items) |jmp_reloc| {
+                            const amt = self.code.items.len - (jmp_reloc + 4);
+                            if (amt == 0) {
+                                // This return is at the end of the
+                                // code block. We can't just delete
+                                // the space because there may be
+                                // other jumps we already relocated to
+                                // the address. Instead, insert a nop
+                                mem.writeIntLittle(u32, self.code.items[jmp_reloc..][0..4], Instruction.nop().toU32());
+                            } else {
+                                if (math.cast(i26, amt)) |offset| {
+                                    mem.writeIntLittle(u32, self.code.items[jmp_reloc..][0..4], Instruction.b(.al, offset).toU32());
+                                } else |err| {
+                                    return self.fail(self.src, "exitlude jump is too large", .{});
+                                }
+                            }
+                        }
+
                        // mov sp, fp
                        // pop {fp, pc}
-                        // TODO: return by jumping to this code, use relocations
-                        // mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, .sp, Instruction.Operand.reg(.fp, Instruction.Operand.Shift.none)).toU32());
-                        // mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.pop(.al, .{ .fp, .pc }).toU32());
+                        mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, .sp, Instruction.Operand.reg(.fp, Instruction.Operand.Shift.none)).toU32());
+                        mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.pop(.al, .{ .fp, .pc }).toU32());
                    } else {
                        try self.dbgSetPrologueEnd();
                        try self.genBody(self.mod_fn.analysis.success);
@ -1661,12 +1690,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                    mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.jalr(.zero, 0, .ra).toU32());
                },
                .arm => {
-                    mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, .sp, Instruction.Operand.reg(.fp, Instruction.Operand.Shift.none)).toU32());
-                    mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.pop(.al, .{ .fp, .pc }).toU32());
-                    // TODO: jump to the end with relocation
-                    // // Just add space for an instruction, patch this later
-                    // try self.code.resize(self.code.items.len + 4);
-                    // try self.exitlude_jump_relocs.append(self.gpa, self.code.items.len - 4);
+                    // Just add space for an instruction, patch this later
+                    try self.code.resize(self.code.items.len + 4);
+                    try self.exitlude_jump_relocs.append(self.gpa, self.code.items.len - 4);
                },
                else => return self.fail(src, "TODO implement return for {}", .{self.target.cpu.arch}),
            }
@ -1932,6 +1958,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                        mem.writeIntLittle(i32, self.code.addManyAsArrayAssumeCapacity(4), delta);
                    }
                },
+                .arm => {
+                    if (math.cast(i26, @intCast(i32, index) - @intCast(i32, self.code.items.len))) |delta| {
+                        mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.b(.al, delta).toU32());
+                    } else |err| {
+                        return self.fail(src, "TODO: enable larger branch offset", .{});
+                    }
+                },
                else => return self.fail(src, "TODO implement jump for {}", .{self.target.cpu.arch}),
            }
        }
@ -2167,6 +2200,58 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {

        fn genSetStack(self: *Self, src: usize, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void {
            switch (arch) {
+                .arm => switch (mcv) {
+                    .dead => unreachable,
+                    .ptr_stack_offset => unreachable,
+                    .ptr_embedded_in_code => unreachable,
+                    .unreach, .none => return, // Nothing to do.
+                    .undef => {
+                        if (!self.wantSafety())
+                            return; // The already existing value will do just fine.
+                        // TODO Upgrade this to a memset call when we have that available.
+                        switch (ty.abiSize(self.target.*)) {
+                            1 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaa }),
+                            2 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaa }),
+                            4 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaa }),
+                            8 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaaaaaaaaaa }),
+                            else => return self.fail(src, "TODO implement memset", .{}),
+                        }
+                    },
+                    .compare_flags_unsigned => |op| {
+                        return self.fail(src, "TODO implement set stack variable with compare flags value (unsigned)", .{});
+                    },
+                    .compare_flags_signed => |op| {
+                        return self.fail(src, "TODO implement set stack variable with compare flags value (signed)", .{});
+                    },
+                    .immediate => {
+                        const reg = try self.copyToTmpRegister(src, mcv);
+                        return self.genSetStack(src, ty, stack_offset, MCValue{ .register = reg });
+                    },
+                    .embedded_in_code => |code_offset| {
+                        return self.fail(src, "TODO implement set stack variable from embedded_in_code", .{});
+                    },
+                    .register => |reg| {
+                        // TODO: strb, strh
+                        if (stack_offset <= math.maxInt(u12)) {
+                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.str(.al, reg, .fp, .{
+                                .offset = Instruction.Offset.imm(@intCast(u12, stack_offset)),
+                                .positive = false,
+                            }).toU32());
+                        } else {
+                            return self.fail(src, "TODO genSetStack with larger offsets", .{});
+                        }
+                    },
+                    .memory => |vaddr| {
+                        return self.fail(src, "TODO implement set stack variable from memory vaddr", .{});
+                    },
+                    .stack_offset => |off| {
+                        if (stack_offset == off)
+                            return; // Copy stack variable to itself; nothing to do.
+
+                        const reg = try self.copyToTmpRegister(src, mcv);
+                        return self.genSetStack(src, ty, stack_offset, MCValue{ .register = reg });
+                    },
+                },
                .x86_64 => switch (mcv) {
                    .dead => unreachable,
                    .ptr_stack_offset => unreachable,
@ -2274,35 +2359,39 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                        return self.genSetReg(src, reg, .{ .immediate = 0xaaaaaaaa });
                    },
                    .immediate => |x| {
-                        // TODO better analysis of x to determine the
-                        // least amount of necessary instructions (use
-                        // more intelligent rotating)
-                        if (x <= math.maxInt(u8)) {
-                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
-                            return;
-                        } else if (x <= math.maxInt(u16)) {
-                            // TODO Use movw Note: Not supported on
-                            // all ARM targets!
-                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
-                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
-                        } else if (x <= math.maxInt(u32)) {
-                            // TODO Use movw and movt Note: Not
-                            // supported on all ARM targets! Also TODO
-                            // write constant to code and load
-                            // relative to pc
+                        if (x > math.maxInt(u32)) return self.fail(src, "ARM registers are 32-bit wide", .{});

-                            // immediate: 0xaabbccdd
-                            // mov reg, #0xaa
-                            // orr reg, reg, #0xbb, 24
-                            // orr reg, reg, #0xcc, 16
-                            // orr reg, reg, #0xdd, 8
-                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
-                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
-                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 16), 8)).toU32());
-                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 24), 4)).toU32());
-                            return;
+                        if (Instruction.Operand.fromU32(@intCast(u32, x))) |op| {
+                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, op).toU32());
+                        } else if (Instruction.Operand.fromU32(~@intCast(u32, x))) |op| {
+                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mvn(.al, reg, op).toU32());
+                        } else if (x <= math.maxInt(u16)) {
+                            if (Target.arm.featureSetHas(self.target.cpu.features, .has_v7)) {
+                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.movw(.al, reg, @intCast(u16, x)).toU32());
+                            } else {
+                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
+                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
+                            }
                        } else {
-                            return self.fail(src, "ARM registers are 32-bit wide", .{});
+                            // TODO write constant to code and load
+                            // relative to pc
+                            if (Target.arm.featureSetHas(self.target.cpu.features, .has_v7)) {
+                                // immediate: 0xaaaabbbb
+                                // movw reg, #0xbbbb
+                                // movt reg, #0xaaaa
+                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.movw(.al, reg, @truncate(u16, x)).toU32());
+                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.movt(.al, reg, @truncate(u16, x >> 16)).toU32());
+                            } else {
+                                // immediate: 0xaabbccdd
+                                // mov reg, #0xaa
+                                // orr reg, reg, #0xbb, 24
+                                // orr reg, reg, #0xcc, 16
+                                // orr reg, reg, #0xdd, 8
+                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
+                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
+                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 16), 8)).toU32());
+                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 24), 4)).toU32());
+                            }
                        }
                    },
                    .register => |src_reg| {
@ -2319,6 +2408,18 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                        try self.genSetReg(src, reg, .{ .immediate = addr });
                        mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldr(.al, reg, reg, .{ .offset = Instruction.Offset.none }).toU32());
                    },
+                    .stack_offset => |unadjusted_off| {
+                        // TODO: ldrb, ldrh
+                        // TODO: maybe addressing from sp instead of fp
+                        if (unadjusted_off <= math.maxInt(u12)) {
+                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldr(.al, reg, .fp, .{
+                                .offset = Instruction.Offset.imm(@intCast(u12, unadjusted_off)),
+                                .positive = false,
+                            }).toU32());
+                        } else {
+                            return self.fail(src, "TODO genSetReg with larger stack offset", .{});
+                        }
+                    },
                    else => return self.fail(src, "TODO implement getSetReg for arm {}", .{mcv}),
                },
                .riscv64 => switch (mcv) {
--- a/src/codegen/arm.zig
+++ b/src/codegen/arm.zig
@ -138,6 +138,29 @@ pub const Instruction = union(enum) {
        fixed: u2 = 0b00,
        cond: u4,
    },
+    Multiply: packed struct {
+        rn: u4,
+        fixed_1: u4 = 0b1001,
+        rm: u4,
+        ra: u4,
+        rd: u4,
+        set_cond: u1,
+        accumulate: u1,
+        fixed_2: u6 = 0b000000,
+        cond: u4,
+    },
+    MultiplyLong: packed struct {
+        rn: u4,
+        fixed_1: u4 = 0b1001,
+        rm: u4,
+        rdlo: u4,
+        rdhi: u4,
+        set_cond: u1,
+        accumulate: u1,
+        unsigned: u1,
+        fixed_2: u5 = 0b00001,
+        cond: u4,
+    },
    SingleDataTransfer: packed struct {
        offset: u12,
        rd: u4,
@ -317,6 +340,29 @@ pub const Instruction = union(enum) {
                },
            };
        }
+
+        /// Tries to convert an unsigned 32 bit integer into an
+        /// immediate operand using rotation. Returns null when there
+        /// is no conversion
+        pub fn fromU32(x: u32) ?Operand {
+            const masks = comptime blk: {
+                const base_mask: u32 = std.math.maxInt(u8);
+                var result = [_]u32{0} ** 16;
+                for (result) |*mask, i| mask.* = std.math.rotr(u32, base_mask, 2 * i);
+                break :blk result;
+            };
+
+            return for (masks) |mask, i| {
+                if (x & mask == x) {
+                    break Operand{
+                        .Immediate = .{
+                            .imm = @intCast(u8, std.math.rotl(u32, x, 2 * i)),
+                            .rotate = @intCast(u4, i),
+                        },
+                    };
+                }
+            } else null;
+        }
    };

    /// Represents the offset operand of a load or store
@ -349,7 +395,7 @@ pub const Instruction = union(enum) {
            };
        }

-        pub fn imm(immediate: u8) Offset {
+        pub fn imm(immediate: u12) Offset {
            return Offset{
                .Immediate = immediate,
            };
@ -380,6 +426,8 @@ pub const Instruction = union(enum) {
    pub fn toU32(self: Instruction) u32 {
        return switch (self) {
            .DataProcessing => |v| @bitCast(u32, v),
+            .Multiply => |v| @bitCast(u32, v),
+            .MultiplyLong => |v| @bitCast(u32, v),
            .SingleDataTransfer => |v| @bitCast(u32, v),
            .BlockDataTransfer => |v| @bitCast(u32, v),
            .Branch => |v| @bitCast(u32, v),
@ -412,6 +460,70 @@ pub const Instruction = union(enum) {
        };
    }

+    fn specialMov(
+        cond: Condition,
+        rd: Register,
+        imm: u16,
+        top: bool,
+    ) Instruction {
+        return Instruction{
+            .DataProcessing = .{
+                .cond = @enumToInt(cond),
+                .i = 1,
+                .opcode = if (top) 0b1010 else 0b1000,
+                .s = 0,
+                .rn = @truncate(u4, imm >> 12),
+                .rd = rd.id(),
+                .op2 = @truncate(u12, imm),
+            },
+        };
+    }
+
+    fn multiply(
+        cond: Condition,
+        set_cond: u1,
+        rd: Register,
+        rn: Register,
+        rm: Register,
+        ra: ?Register,
+    ) Instruction {
+        return Instruction{
+            .Multiply = .{
+                .cond = @enumToInt(cond),
+                .accumulate = @boolToInt(ra != null),
+                .set_cond = set_cond,
+                .rd = rd.id(),
+                .rn = rn.id(),
+                .ra = if (ra) |reg| reg.id() else 0b0000,
+                .rm = rm.id(),
+            },
+        };
+    }
+
+    fn multiplyLong(
+        cond: Condition,
+        signed: u1,
+        accumulate: u1,
+        set_cond: u1,
+        rdhi: Register,
+        rdlo: Register,
+        rm: Register,
+        rn: Register,
+    ) Instruction {
+        return Instruction{
+            .MultiplyLong = .{
+                .cond = @enumToInt(cond),
+                .unsigned = signed,
+                .accumulate = accumulate,
+                .set_cond = set_cond,
+                .rdlo = rdlo.id(),
+                .rdhi = rdhi.id(),
+                .rn = rn.id(),
+                .rm = rm.id(),
+            },
+        };
+    }
+
    fn singleDataTransfer(
        cond: Condition,
        rd: Register,
@ -463,12 +575,12 @@ pub const Instruction = union(enum) {
        };
    }

-    fn branch(cond: Condition, offset: i24, link: u1) Instruction {
+    fn branch(cond: Condition, offset: i26, link: u1) Instruction {
        return Instruction{
            .Branch = .{
                .cond = @enumToInt(cond),
                .link = link,
-                .offset = @bitCast(u24, offset),
+                .offset = @bitCast(u24, @intCast(i24, offset >> 2)),
            },
        };
    }
@ -618,10 +730,96 @@ pub const Instruction = union(enum) {
        return dataProcessing(cond, .mvn, 1, rd, .r0, op2);
    }

+    // movw and movt
+
+    pub fn movw(cond: Condition, rd: Register, imm: u16) Instruction {
+        return specialMov(cond, rd, imm, false);
+    }
+
+    pub fn movt(cond: Condition, rd: Register, imm: u16) Instruction {
+        return specialMov(cond, rd, imm, true);
+    }
+
    // PSR transfer

    pub fn mrs(cond: Condition, rd: Register, psr: Psr) Instruction {
-        return dataProcessing(cond, if (psr == .cpsr) .tst else .cmp, 0, rd, .r15, Operand.reg(.r0, Operand.Shift.none));
+        return Instruction{
+            .DataProcessing = .{
+                .cond = @enumToInt(cond),
+                .i = 0,
+                .opcode = if (psr == .spsr) 0b1010 else 0b1000,
+                .s = 0,
+                .rn = 0b1111,
+                .rd = rd.id(),
+                .op2 = 0b0000_0000_0000,
+            },
+        };
+    }
+
+    pub fn msr(cond: Condition, psr: Psr, op: Operand) Instruction {
+        return Instruction{
+            .DataProcessing = .{
+                .cond = @enumToInt(cond),
+                .i = 0,
+                .opcode = if (psr == .spsr) 0b1011 else 0b1001,
+                .s = 0,
+                .rn = 0b1111,
+                .rd = 0b1111,
+                .op2 = op.toU12(),
+            },
+        };
+    }
+
+    // Multiply
+
+    pub fn mul(cond: Condition, rd: Register, rn: Register, rm: Register) Instruction {
+        return multiply(cond, 0, rd, rn, rm, null);
+    }
+
+    pub fn muls(cond: Condition, rd: Register, rn: Register, rm: Register) Instruction {
+        return multiply(cond, 1, rd, rn, rm, null);
+    }
+
+    pub fn mla(cond: Condition, rd: Register, rn: Register, rm: Register, ra: Register) Instruction {
+        return multiply(cond, 0, rd, rn, rm, ra);
+    }
+
+    pub fn mlas(cond: Condition, rd: Register, rn: Register, rm: Register, ra: Register) Instruction {
+        return multiply(cond, 1, rd, rn, rm, ra);
+    }
+
+    // Multiply long
+
+    pub fn umull(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
+        return multiplyLong(cond, 0, 0, 0, rdhi, rdlo, rm, rn);
+    }
+
+    pub fn umulls(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
+        return multiplyLong(cond, 0, 0, 1, rdhi, rdlo, rm, rn);
+    }
+
+    pub fn umlal(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
+        return multiplyLong(cond, 0, 1, 0, rdhi, rdlo, rm, rn);
+    }
+
+    pub fn umlals(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
+        return multiplyLong(cond, 0, 1, 1, rdhi, rdlo, rm, rn);
+    }
+
+    pub fn smull(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
+        return multiplyLong(cond, 1, 0, 0, rdhi, rdlo, rm, rn);
+    }
+
+    pub fn smulls(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
+        return multiplyLong(cond, 1, 0, 1, rdhi, rdlo, rm, rn);
+    }
+
+    pub fn smlal(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
+        return multiplyLong(cond, 1, 1, 0, rdhi, rdlo, rm, rn);
+    }
+
+    pub fn smlals(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
+        return multiplyLong(cond, 1, 1, 1, rdhi, rdlo, rm, rn);
    }

    // Single data transfer
@ -697,11 +895,11 @@ pub const Instruction = union(enum) {

    // Branch

-    pub fn b(cond: Condition, offset: i24) Instruction {
+    pub fn b(cond: Condition, offset: i26) Instruction {
        return branch(cond, offset, 0);
    }

-    pub fn bl(cond: Condition, offset: i24) Instruction {
+    pub fn bl(cond: Condition, offset: i26) Instruction {
        return branch(cond, offset, 1);
    }

@ -731,6 +929,10 @@ pub const Instruction = union(enum) {

    // Aliases

+    pub fn nop() Instruction {
+        return mov(.al, .r0, Instruction.Operand.reg(.r0, Instruction.Operand.Shift.none));
+    }
+
    pub fn pop(cond: Condition, args: anytype) Instruction {
        if (@typeInfo(@TypeOf(args)) != .Struct) {
            @compileError("Expected tuple or struct argument, found " ++ @typeName(@TypeOf(args)));
@ -805,6 +1007,14 @@ test "serialize instructions" {
            .inst = Instruction.mrs(.al, .r5, .cpsr),
            .expected = 0b1110_00010_0_001111_0101_000000000000,
        },
+        .{ // mul r0, r1, r2
+            .inst = Instruction.mul(.al, .r0, .r1, .r2),
+            .expected = 0b1110_000000_0_0_0000_0000_0010_1001_0001,
+        },
+        .{ // umlal r0, r1, r5, r6
+            .inst = Instruction.umlal(.al, .r0, .r1, .r5, .r6),
+            .expected = 0b1110_00001_0_1_0_0001_0000_0110_1001_0101,
+        },
        .{ // ldr r0, [r2, #42]
            .inst = Instruction.ldr(.al, .r0, .r2, .{
                .offset = Instruction.Offset.imm(42),
@ -819,11 +1029,11 @@ test "serialize instructions" {
        },
        .{ // b #12
            .inst = Instruction.b(.al, 12),
-            .expected = 0b1110_101_0_0000_0000_0000_0000_0000_1100,
+            .expected = 0b1110_101_0_0000_0000_0000_0000_0000_0011,
        },
        .{ // bl #-4
            .inst = Instruction.bl(.al, -4),
-            .expected = 0b1110_101_1_1111_1111_1111_1111_1111_1100,
+            .expected = 0b1110_101_1_1111_1111_1111_1111_1111_1111,
        },
        .{ // bx lr
            .inst = Instruction.bx(.al, .lr),