Merge pull request #6654 from joachimschmidt557/stage2-arm

stage2 ARM: more stuff
master
Andrew Kelley 2020-10-29 18:29:24 -04:00 committed by GitHub
commit f4bb8be9fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 360 additions and 49 deletions

View File

@ -573,25 +573,54 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// sub sp, sp, #reloc
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.push(.al, .{ .fp, .lr }).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, .fp, Instruction.Operand.reg(.sp, Instruction.Operand.Shift.none)).toU32());
// TODO: prepare stack for local variables
// const backpatch_reloc = try self.code.addManyAsArray(4);
const backpatch_reloc = self.code.items.len;
try self.code.resize(backpatch_reloc + 4);
try self.dbgSetPrologueEnd();
try self.genBody(self.mod_fn.analysis.success);
// Backpatch stack offset
// const stack_end = self.max_end_stack;
// const aligned_stack_end = mem.alignForward(stack_end, self.stack_align);
// mem.writeIntLittle(u32, backpatch_reloc, Instruction.sub(.al, .sp, .sp, Instruction.Operand.imm()));
const stack_end = self.max_end_stack;
const aligned_stack_end = mem.alignForward(stack_end, self.stack_align);
if (Instruction.Operand.fromU32(@intCast(u32, aligned_stack_end))) |op| {
mem.writeIntLittle(u32, self.code.items[backpatch_reloc..][0..4], Instruction.sub(.al, .sp, .sp, op).toU32());
} else {
return self.fail(self.src, "TODO ARM: allow larger stacks", .{});
}
try self.dbgSetEpilogueBegin();
// exitlude jumps
if (self.exitlude_jump_relocs.items.len == 1) {
// There is only one relocation. Hence,
// this relocation must be at the end of
// the code. Therefore, we can just delete
// the space initially reserved for the
// jump
self.code.items.len -= 4;
} else for (self.exitlude_jump_relocs.items) |jmp_reloc| {
const amt = self.code.items.len - (jmp_reloc + 4);
if (amt == 0) {
// This return is at the end of the
// code block. We can't just delete
// the space because there may be
// other jumps we already relocated to
// the address. Instead, insert a nop
mem.writeIntLittle(u32, self.code.items[jmp_reloc..][0..4], Instruction.nop().toU32());
} else {
if (math.cast(i26, amt)) |offset| {
mem.writeIntLittle(u32, self.code.items[jmp_reloc..][0..4], Instruction.b(.al, offset).toU32());
} else |err| {
return self.fail(self.src, "exitlude jump is too large", .{});
}
}
}
// mov sp, fp
// pop {fp, pc}
// TODO: return by jumping to this code, use relocations
// mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, .sp, Instruction.Operand.reg(.fp, Instruction.Operand.Shift.none)).toU32());
// mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.pop(.al, .{ .fp, .pc }).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, .sp, Instruction.Operand.reg(.fp, Instruction.Operand.Shift.none)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.pop(.al, .{ .fp, .pc }).toU32());
} else {
try self.dbgSetPrologueEnd();
try self.genBody(self.mod_fn.analysis.success);
@ -1661,12 +1690,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.jalr(.zero, 0, .ra).toU32());
},
.arm => {
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, .sp, Instruction.Operand.reg(.fp, Instruction.Operand.Shift.none)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.pop(.al, .{ .fp, .pc }).toU32());
// TODO: jump to the end with relocation
// // Just add space for an instruction, patch this later
// try self.code.resize(self.code.items.len + 4);
// try self.exitlude_jump_relocs.append(self.gpa, self.code.items.len - 4);
// Just add space for an instruction, patch this later
try self.code.resize(self.code.items.len + 4);
try self.exitlude_jump_relocs.append(self.gpa, self.code.items.len - 4);
},
else => return self.fail(src, "TODO implement return for {}", .{self.target.cpu.arch}),
}
@ -1932,6 +1958,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
mem.writeIntLittle(i32, self.code.addManyAsArrayAssumeCapacity(4), delta);
}
},
.arm => {
if (math.cast(i26, @intCast(i32, index) - @intCast(i32, self.code.items.len))) |delta| {
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.b(.al, delta).toU32());
} else |err| {
return self.fail(src, "TODO: enable larger branch offset", .{});
}
},
else => return self.fail(src, "TODO implement jump for {}", .{self.target.cpu.arch}),
}
}
@ -2167,6 +2200,58 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
fn genSetStack(self: *Self, src: usize, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void {
switch (arch) {
.arm => switch (mcv) {
.dead => unreachable,
.ptr_stack_offset => unreachable,
.ptr_embedded_in_code => unreachable,
.unreach, .none => return, // Nothing to do.
.undef => {
if (!self.wantSafety())
return; // The already existing value will do just fine.
// TODO Upgrade this to a memset call when we have that available.
switch (ty.abiSize(self.target.*)) {
1 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaa }),
2 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaa }),
4 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaa }),
8 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaaaaaaaaaa }),
else => return self.fail(src, "TODO implement memset", .{}),
}
},
.compare_flags_unsigned => |op| {
return self.fail(src, "TODO implement set stack variable with compare flags value (unsigned)", .{});
},
.compare_flags_signed => |op| {
return self.fail(src, "TODO implement set stack variable with compare flags value (signed)", .{});
},
.immediate => {
const reg = try self.copyToTmpRegister(src, mcv);
return self.genSetStack(src, ty, stack_offset, MCValue{ .register = reg });
},
.embedded_in_code => |code_offset| {
return self.fail(src, "TODO implement set stack variable from embedded_in_code", .{});
},
.register => |reg| {
// TODO: strb, strh
if (stack_offset <= math.maxInt(u12)) {
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.str(.al, reg, .fp, .{
.offset = Instruction.Offset.imm(@intCast(u12, stack_offset)),
.positive = false,
}).toU32());
} else {
return self.fail(src, "TODO genSetStack with larger offsets", .{});
}
},
.memory => |vaddr| {
return self.fail(src, "TODO implement set stack variable from memory vaddr", .{});
},
.stack_offset => |off| {
if (stack_offset == off)
return; // Copy stack variable to itself; nothing to do.
const reg = try self.copyToTmpRegister(src, mcv);
return self.genSetStack(src, ty, stack_offset, MCValue{ .register = reg });
},
},
.x86_64 => switch (mcv) {
.dead => unreachable,
.ptr_stack_offset => unreachable,
@ -2274,35 +2359,39 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
return self.genSetReg(src, reg, .{ .immediate = 0xaaaaaaaa });
},
.immediate => |x| {
// TODO better analysis of x to determine the
// least amount of necessary instructions (use
// more intelligent rotating)
if (x <= math.maxInt(u8)) {
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
return;
} else if (x <= math.maxInt(u16)) {
// TODO Use movw Note: Not supported on
// all ARM targets!
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
} else if (x <= math.maxInt(u32)) {
// TODO Use movw and movt Note: Not
// supported on all ARM targets! Also TODO
// write constant to code and load
// relative to pc
if (x > math.maxInt(u32)) return self.fail(src, "ARM registers are 32-bit wide", .{});
// immediate: 0xaabbccdd
// mov reg, #0xaa
// orr reg, reg, #0xbb, 24
// orr reg, reg, #0xcc, 16
// orr reg, reg, #0xdd, 8
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 16), 8)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 24), 4)).toU32());
return;
if (Instruction.Operand.fromU32(@intCast(u32, x))) |op| {
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, op).toU32());
} else if (Instruction.Operand.fromU32(~@intCast(u32, x))) |op| {
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mvn(.al, reg, op).toU32());
} else if (x <= math.maxInt(u16)) {
if (Target.arm.featureSetHas(self.target.cpu.features, .has_v7)) {
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.movw(.al, reg, @intCast(u16, x)).toU32());
} else {
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
}
} else {
return self.fail(src, "ARM registers are 32-bit wide", .{});
// TODO write constant to code and load
// relative to pc
if (Target.arm.featureSetHas(self.target.cpu.features, .has_v7)) {
// immediate: 0xaaaabbbb
// movw reg, #0xbbbb
// movt reg, #0xaaaa
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.movw(.al, reg, @truncate(u16, x)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.movt(.al, reg, @truncate(u16, x >> 16)).toU32());
} else {
// immediate: 0xaabbccdd
// mov reg, #0xaa
// orr reg, reg, #0xbb, 24
// orr reg, reg, #0xcc, 16
// orr reg, reg, #0xdd, 8
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 16), 8)).toU32());
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 24), 4)).toU32());
}
}
},
.register => |src_reg| {
@ -2319,6 +2408,18 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
try self.genSetReg(src, reg, .{ .immediate = addr });
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldr(.al, reg, reg, .{ .offset = Instruction.Offset.none }).toU32());
},
.stack_offset => |unadjusted_off| {
// TODO: ldrb, ldrh
// TODO: maybe addressing from sp instead of fp
if (unadjusted_off <= math.maxInt(u12)) {
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldr(.al, reg, .fp, .{
.offset = Instruction.Offset.imm(@intCast(u12, unadjusted_off)),
.positive = false,
}).toU32());
} else {
return self.fail(src, "TODO genSetReg with larger stack offset", .{});
}
},
else => return self.fail(src, "TODO implement getSetReg for arm {}", .{mcv}),
},
.riscv64 => switch (mcv) {

View File

@ -138,6 +138,29 @@ pub const Instruction = union(enum) {
fixed: u2 = 0b00,
cond: u4,
},
Multiply: packed struct {
rn: u4,
fixed_1: u4 = 0b1001,
rm: u4,
ra: u4,
rd: u4,
set_cond: u1,
accumulate: u1,
fixed_2: u6 = 0b000000,
cond: u4,
},
MultiplyLong: packed struct {
rn: u4,
fixed_1: u4 = 0b1001,
rm: u4,
rdlo: u4,
rdhi: u4,
set_cond: u1,
accumulate: u1,
unsigned: u1,
fixed_2: u5 = 0b00001,
cond: u4,
},
SingleDataTransfer: packed struct {
offset: u12,
rd: u4,
@ -317,6 +340,29 @@ pub const Instruction = union(enum) {
},
};
}
/// Tries to convert an unsigned 32 bit integer into an
/// immediate operand using rotation. Returns null when there
/// is no conversion
pub fn fromU32(x: u32) ?Operand {
const masks = comptime blk: {
const base_mask: u32 = std.math.maxInt(u8);
var result = [_]u32{0} ** 16;
for (result) |*mask, i| mask.* = std.math.rotr(u32, base_mask, 2 * i);
break :blk result;
};
return for (masks) |mask, i| {
if (x & mask == x) {
break Operand{
.Immediate = .{
.imm = @intCast(u8, std.math.rotl(u32, x, 2 * i)),
.rotate = @intCast(u4, i),
},
};
}
} else null;
}
};
/// Represents the offset operand of a load or store
@ -349,7 +395,7 @@ pub const Instruction = union(enum) {
};
}
pub fn imm(immediate: u8) Offset {
pub fn imm(immediate: u12) Offset {
return Offset{
.Immediate = immediate,
};
@ -380,6 +426,8 @@ pub const Instruction = union(enum) {
pub fn toU32(self: Instruction) u32 {
return switch (self) {
.DataProcessing => |v| @bitCast(u32, v),
.Multiply => |v| @bitCast(u32, v),
.MultiplyLong => |v| @bitCast(u32, v),
.SingleDataTransfer => |v| @bitCast(u32, v),
.BlockDataTransfer => |v| @bitCast(u32, v),
.Branch => |v| @bitCast(u32, v),
@ -412,6 +460,70 @@ pub const Instruction = union(enum) {
};
}
fn specialMov(
cond: Condition,
rd: Register,
imm: u16,
top: bool,
) Instruction {
return Instruction{
.DataProcessing = .{
.cond = @enumToInt(cond),
.i = 1,
.opcode = if (top) 0b1010 else 0b1000,
.s = 0,
.rn = @truncate(u4, imm >> 12),
.rd = rd.id(),
.op2 = @truncate(u12, imm),
},
};
}
fn multiply(
cond: Condition,
set_cond: u1,
rd: Register,
rn: Register,
rm: Register,
ra: ?Register,
) Instruction {
return Instruction{
.Multiply = .{
.cond = @enumToInt(cond),
.accumulate = @boolToInt(ra != null),
.set_cond = set_cond,
.rd = rd.id(),
.rn = rn.id(),
.ra = if (ra) |reg| reg.id() else 0b0000,
.rm = rm.id(),
},
};
}
fn multiplyLong(
cond: Condition,
signed: u1,
accumulate: u1,
set_cond: u1,
rdhi: Register,
rdlo: Register,
rm: Register,
rn: Register,
) Instruction {
return Instruction{
.MultiplyLong = .{
.cond = @enumToInt(cond),
.unsigned = signed,
.accumulate = accumulate,
.set_cond = set_cond,
.rdlo = rdlo.id(),
.rdhi = rdhi.id(),
.rn = rn.id(),
.rm = rm.id(),
},
};
}
fn singleDataTransfer(
cond: Condition,
rd: Register,
@ -463,12 +575,12 @@ pub const Instruction = union(enum) {
};
}
fn branch(cond: Condition, offset: i24, link: u1) Instruction {
fn branch(cond: Condition, offset: i26, link: u1) Instruction {
return Instruction{
.Branch = .{
.cond = @enumToInt(cond),
.link = link,
.offset = @bitCast(u24, offset),
.offset = @bitCast(u24, @intCast(i24, offset >> 2)),
},
};
}
@ -618,10 +730,96 @@ pub const Instruction = union(enum) {
return dataProcessing(cond, .mvn, 1, rd, .r0, op2);
}
// movw and movt
pub fn movw(cond: Condition, rd: Register, imm: u16) Instruction {
return specialMov(cond, rd, imm, false);
}
pub fn movt(cond: Condition, rd: Register, imm: u16) Instruction {
return specialMov(cond, rd, imm, true);
}
// PSR transfer
pub fn mrs(cond: Condition, rd: Register, psr: Psr) Instruction {
return dataProcessing(cond, if (psr == .cpsr) .tst else .cmp, 0, rd, .r15, Operand.reg(.r0, Operand.Shift.none));
return Instruction{
.DataProcessing = .{
.cond = @enumToInt(cond),
.i = 0,
.opcode = if (psr == .spsr) 0b1010 else 0b1000,
.s = 0,
.rn = 0b1111,
.rd = rd.id(),
.op2 = 0b0000_0000_0000,
},
};
}
pub fn msr(cond: Condition, psr: Psr, op: Operand) Instruction {
return Instruction{
.DataProcessing = .{
.cond = @enumToInt(cond),
.i = 0,
.opcode = if (psr == .spsr) 0b1011 else 0b1001,
.s = 0,
.rn = 0b1111,
.rd = 0b1111,
.op2 = op.toU12(),
},
};
}
// Multiply
pub fn mul(cond: Condition, rd: Register, rn: Register, rm: Register) Instruction {
return multiply(cond, 0, rd, rn, rm, null);
}
pub fn muls(cond: Condition, rd: Register, rn: Register, rm: Register) Instruction {
return multiply(cond, 1, rd, rn, rm, null);
}
pub fn mla(cond: Condition, rd: Register, rn: Register, rm: Register, ra: Register) Instruction {
return multiply(cond, 0, rd, rn, rm, ra);
}
pub fn mlas(cond: Condition, rd: Register, rn: Register, rm: Register, ra: Register) Instruction {
return multiply(cond, 1, rd, rn, rm, ra);
}
// Multiply long
pub fn umull(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
return multiplyLong(cond, 0, 0, 0, rdhi, rdlo, rm, rn);
}
pub fn umulls(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
return multiplyLong(cond, 0, 0, 1, rdhi, rdlo, rm, rn);
}
pub fn umlal(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
return multiplyLong(cond, 0, 1, 0, rdhi, rdlo, rm, rn);
}
pub fn umlals(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
return multiplyLong(cond, 0, 1, 1, rdhi, rdlo, rm, rn);
}
pub fn smull(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
return multiplyLong(cond, 1, 0, 0, rdhi, rdlo, rm, rn);
}
pub fn smulls(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
return multiplyLong(cond, 1, 0, 1, rdhi, rdlo, rm, rn);
}
pub fn smlal(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
return multiplyLong(cond, 1, 1, 0, rdhi, rdlo, rm, rn);
}
pub fn smlals(cond: Condition, rdlo: Register, rdhi: Register, rn: Register, rm: Register) Instruction {
return multiplyLong(cond, 1, 1, 1, rdhi, rdlo, rm, rn);
}
// Single data transfer
@ -697,11 +895,11 @@ pub const Instruction = union(enum) {
// Branch
pub fn b(cond: Condition, offset: i24) Instruction {
pub fn b(cond: Condition, offset: i26) Instruction {
return branch(cond, offset, 0);
}
pub fn bl(cond: Condition, offset: i24) Instruction {
pub fn bl(cond: Condition, offset: i26) Instruction {
return branch(cond, offset, 1);
}
@ -731,6 +929,10 @@ pub const Instruction = union(enum) {
// Aliases
pub fn nop() Instruction {
return mov(.al, .r0, Instruction.Operand.reg(.r0, Instruction.Operand.Shift.none));
}
pub fn pop(cond: Condition, args: anytype) Instruction {
if (@typeInfo(@TypeOf(args)) != .Struct) {
@compileError("Expected tuple or struct argument, found " ++ @typeName(@TypeOf(args)));
@ -805,6 +1007,14 @@ test "serialize instructions" {
.inst = Instruction.mrs(.al, .r5, .cpsr),
.expected = 0b1110_00010_0_001111_0101_000000000000,
},
.{ // mul r0, r1, r2
.inst = Instruction.mul(.al, .r0, .r1, .r2),
.expected = 0b1110_000000_0_0_0000_0000_0010_1001_0001,
},
.{ // umlal r0, r1, r5, r6
.inst = Instruction.umlal(.al, .r0, .r1, .r5, .r6),
.expected = 0b1110_00001_0_1_0_0001_0000_0110_1001_0101,
},
.{ // ldr r0, [r2, #42]
.inst = Instruction.ldr(.al, .r0, .r2, .{
.offset = Instruction.Offset.imm(42),
@ -819,11 +1029,11 @@ test "serialize instructions" {
},
.{ // b #12
.inst = Instruction.b(.al, 12),
.expected = 0b1110_101_0_0000_0000_0000_0000_0000_1100,
.expected = 0b1110_101_0_0000_0000_0000_0000_0000_0011,
},
.{ // bl #-4
.inst = Instruction.bl(.al, -4),
.expected = 0b1110_101_1_1111_1111_1111_1111_1111_1100,
.expected = 0b1110_101_1_1111_1111_1111_1111_1111_1111,
},
.{ // bx lr
.inst = Instruction.bx(.al, .lr),