From d9be6e5dc693fcbcb5f4c343a3d2b0b9fc786e25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Larouche?= Date: Sat, 11 Jan 2020 17:10:00 -0500 Subject: [PATCH] Port clzsi2 from compiler_rt, required for using std.fmt.format on some ARM architecture. --- lib/std/special/compiler_rt.zig | 6 +- lib/std/special/compiler_rt/clzsi2.zig | 118 ++++++++ lib/std/special/compiler_rt/clzsi2_test.zig | 292 ++++++++++++++++++++ 3 files changed, 415 insertions(+), 1 deletion(-) create mode 100644 lib/std/special/compiler_rt/clzsi2.zig create mode 100644 lib/std/special/compiler_rt/clzsi2_test.zig diff --git a/lib/std/special/compiler_rt.zig b/lib/std/special/compiler_rt.zig index cf255804a..9b225dbad 100644 --- a/lib/std/special/compiler_rt.zig +++ b/lib/std/special/compiler_rt.zig @@ -146,6 +146,8 @@ comptime { @export(@import("compiler_rt/negXf2.zig").__negsf2, .{ .name = "__negsf2", .linkage = linkage }); @export(@import("compiler_rt/negXf2.zig").__negdf2, .{ .name = "__negdf2", .linkage = linkage }); + @export(@import("compiler_rt/clzsi2.zig").__clzsi2, .{ .name = "__clzsi2", .linkage = linkage }); + if (is_arm_arch and !is_arm_64 and !is_test) { @export(@import("compiler_rt/arm.zig").__aeabi_unwind_cpp_pr0, .{ .name = "__aeabi_unwind_cpp_pr0", .linkage = linkage }); @export(@import("compiler_rt/arm.zig").__aeabi_unwind_cpp_pr1, .{ .name = "__aeabi_unwind_cpp_pr1", .linkage = linkage }); @@ -177,7 +179,9 @@ comptime { @export(@import("compiler_rt/arm.zig").__aeabi_memclr, .{ .name = "__aeabi_memclr4", .linkage = linkage }); @export(@import("compiler_rt/arm.zig").__aeabi_memclr, .{ .name = "__aeabi_memclr8", .linkage = linkage }); - @export(@import("compiler_rt/arm.zig").__aeabi_read_tp, .{ .name = "__aeabi_read_tp", .linkage = linkage }); + if (builtin.os == .linux) { + @export(@import("compiler_rt/arm.zig").__aeabi_read_tp, .{ .name = "__aeabi_read_tp", .linkage = linkage }); + } @export(@import("compiler_rt/extendXfYf2.zig").__aeabi_f2d, .{ .name = "__aeabi_f2d", .linkage = linkage }); @export(@import("compiler_rt/floatsiXf.zig").__aeabi_i2d, .{ .name = "__aeabi_i2d", .linkage = linkage }); diff --git a/lib/std/special/compiler_rt/clzsi2.zig b/lib/std/special/compiler_rt/clzsi2.zig new file mode 100644 index 000000000..0cbfdb8db --- /dev/null +++ b/lib/std/special/compiler_rt/clzsi2.zig @@ -0,0 +1,118 @@ +// Ported from: +// +// https://github.com/llvm-mirror/compiler-rt/blob/f0745e8476f069296a7c71accedd061dce4cdf79/lib/builtins/clzsi2.c +// https://github.com/llvm-mirror/compiler-rt/blob/f0745e8476f069296a7c71accedd061dce4cdf79/lib/builtins/arm/clzsi2.S +const builtin = @import("builtin"); + +// Precondition: a != 0 +fn __clzsi2_generic(a: i32) callconv(.C) i32 { + @setRuntimeSafety(builtin.is_test); + + var x = @bitCast(u32, a); + var n: i32 = 32; + + // Count first bit set using binary search, from Hacker's Delight + var y: u32 = 0; + inline for ([_]i32{ 16, 8, 4, 2, 1 }) |shift| { + y = x >> shift; + if (y != 0) { + n = n - shift; + x = y; + } + } + + return n - @bitCast(i32, x); +} + +fn __clzsi2_arm_clz(a: i32) callconv(.Naked) noreturn { + asm volatile ( + \\ clz r0,r0 + \\ bx lr + ); + unreachable; +} + +fn __clzsi2_arm32(a: i32) callconv(.Naked) noreturn { + asm volatile ( + \\ // Assumption: n != 0 + \\ // r0: n + \\ // r1: count of leading zeros in n + 1 + \\ // r2: scratch register for shifted r0 + \\ mov r1, #1 + \\ + \\ // Basic block: + \\ // if ((r0 >> SHIFT) == 0) + \\ // r1 += SHIFT; + \\ // else + \\ // r0 >>= SHIFT; + \\ // for descending powers of two as SHIFT. + \\ lsrs r2, r0, #16 + \\ movne r0, r2 + \\ addeq r1, #16 + \\ + \\ lsrs r2, r0, #8 + \\ movne r0, r2 + \\ addeq r1, #8 + \\ + \\ lsrs r2, r0, #4 + \\ movne r0, r2 + \\ addeq r1, #4 + \\ + \\ lsrs r2, r0, #2 + \\ movne r0, r2 + \\ addeq r1, #2 + \\ + \\ // The basic block invariants at this point are (r0 >> 2) == 0 and + \\ // r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1. + \\ // + \\ // r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)f + \\ // ---+----------------+----------------+------------+-------------- + \\ // 1 | 1 | 0 | 0 | 1 + \\ // 2 | 0 | 1 | -1 | 0 + \\ // 3 | 0 | 1 | -1 | 0 + \\ // + \\ // The r1's initial value of 1 compensates for the 1 here. + \\ sub r0, r1, r0, lsr #1 + \\ bx lr + ); + unreachable; +} + +const can_use_arm_clz = switch (builtin.arch) { + .arm, .armeb => |sub_arch| switch (sub_arch) { + .v4t => false, + .v6m => false, + else => true, + }, + .thumb, .thumbeb => |sub_arch| switch (sub_arch) { + .v6, + .v6k, + .v5, + .v5te, + .v4t, + => false, + else => true, + }, + else => false, +}; + +const is_arm32_no_thumb = switch (builtin.arch) { + builtin.Arch.arm, + builtin.Arch.armeb, + => true, + else => false, +}; + +pub const __clzsi2 = blk: { + if (comptime can_use_arm_clz) { + break :blk __clzsi2_arm_clz; + } else if (comptime is_arm32_no_thumb) { + break :blk __clzsi2_arm32; + } else { + break :blk __clzsi2_generic; + } +}; + +test "test clzsi2" { + _ = @import("clzsi2_test.zig"); +} diff --git a/lib/std/special/compiler_rt/clzsi2_test.zig b/lib/std/special/compiler_rt/clzsi2_test.zig new file mode 100644 index 000000000..ff9445584 --- /dev/null +++ b/lib/std/special/compiler_rt/clzsi2_test.zig @@ -0,0 +1,292 @@ +const clzsi2 = @import("clzsi2.zig"); +const testing = @import("std").testing; + +fn test__clzsi2(a: u32, expected: i32) void { + var nakedClzsi2 = clzsi2.__clzsi2; + var actualClzsi2 = @ptrCast(fn (a: i32) callconv(.C) i32, nakedClzsi2); + var x = @intCast(i32, a); + var result = actualClzsi2(x); + testing.expectEqual(expected, result); +} + +test "clzsi2" { + test__clzsi2(0x00800000, 8); + test__clzsi2(0x01000000, 7); + test__clzsi2(0x02000000, 6); + test__clzsi2(0x03000000, 6); + test__clzsi2(0x04000000, 5); + test__clzsi2(0x05000000, 5); + test__clzsi2(0x06000000, 5); + test__clzsi2(0x07000000, 5); + test__clzsi2(0x08000000, 4); + test__clzsi2(0x09000000, 4); + test__clzsi2(0x0A000000, 4); + test__clzsi2(0x0B000000, 4); + test__clzsi2(0x0C000000, 4); + test__clzsi2(0x0D000000, 4); + test__clzsi2(0x0E000000, 4); + test__clzsi2(0x0F000000, 4); + test__clzsi2(0x10000000, 3); + test__clzsi2(0x11000000, 3); + test__clzsi2(0x12000000, 3); + test__clzsi2(0x13000000, 3); + test__clzsi2(0x14000000, 3); + test__clzsi2(0x15000000, 3); + test__clzsi2(0x16000000, 3); + test__clzsi2(0x17000000, 3); + test__clzsi2(0x18000000, 3); + test__clzsi2(0x19000000, 3); + test__clzsi2(0x1A000000, 3); + test__clzsi2(0x1B000000, 3); + test__clzsi2(0x1C000000, 3); + test__clzsi2(0x1D000000, 3); + test__clzsi2(0x1E000000, 3); + test__clzsi2(0x1F000000, 3); + test__clzsi2(0x20000000, 2); + test__clzsi2(0x21000000, 2); + test__clzsi2(0x22000000, 2); + test__clzsi2(0x23000000, 2); + test__clzsi2(0x24000000, 2); + test__clzsi2(0x25000000, 2); + test__clzsi2(0x26000000, 2); + test__clzsi2(0x27000000, 2); + test__clzsi2(0x28000000, 2); + test__clzsi2(0x29000000, 2); + test__clzsi2(0x2A000000, 2); + test__clzsi2(0x2B000000, 2); + test__clzsi2(0x2C000000, 2); + test__clzsi2(0x2D000000, 2); + test__clzsi2(0x2E000000, 2); + test__clzsi2(0x2F000000, 2); + test__clzsi2(0x30000000, 2); + test__clzsi2(0x31000000, 2); + test__clzsi2(0x32000000, 2); + test__clzsi2(0x33000000, 2); + test__clzsi2(0x34000000, 2); + test__clzsi2(0x35000000, 2); + test__clzsi2(0x36000000, 2); + test__clzsi2(0x37000000, 2); + test__clzsi2(0x38000000, 2); + test__clzsi2(0x39000000, 2); + test__clzsi2(0x3A000000, 2); + test__clzsi2(0x3B000000, 2); + test__clzsi2(0x3C000000, 2); + test__clzsi2(0x3D000000, 2); + test__clzsi2(0x3E000000, 2); + test__clzsi2(0x3F000000, 2); + test__clzsi2(0x40000000, 1); + test__clzsi2(0x41000000, 1); + test__clzsi2(0x42000000, 1); + test__clzsi2(0x43000000, 1); + test__clzsi2(0x44000000, 1); + test__clzsi2(0x45000000, 1); + test__clzsi2(0x46000000, 1); + test__clzsi2(0x47000000, 1); + test__clzsi2(0x48000000, 1); + test__clzsi2(0x49000000, 1); + test__clzsi2(0x4A000000, 1); + test__clzsi2(0x4B000000, 1); + test__clzsi2(0x4C000000, 1); + test__clzsi2(0x4D000000, 1); + test__clzsi2(0x4E000000, 1); + test__clzsi2(0x4F000000, 1); + test__clzsi2(0x50000000, 1); + test__clzsi2(0x51000000, 1); + test__clzsi2(0x52000000, 1); + test__clzsi2(0x53000000, 1); + test__clzsi2(0x54000000, 1); + test__clzsi2(0x55000000, 1); + test__clzsi2(0x56000000, 1); + test__clzsi2(0x57000000, 1); + test__clzsi2(0x58000000, 1); + test__clzsi2(0x59000000, 1); + test__clzsi2(0x5A000000, 1); + test__clzsi2(0x5B000000, 1); + test__clzsi2(0x5C000000, 1); + test__clzsi2(0x5D000000, 1); + test__clzsi2(0x5E000000, 1); + test__clzsi2(0x5F000000, 1); + test__clzsi2(0x60000000, 1); + test__clzsi2(0x61000000, 1); + test__clzsi2(0x62000000, 1); + test__clzsi2(0x63000000, 1); + test__clzsi2(0x64000000, 1); + test__clzsi2(0x65000000, 1); + test__clzsi2(0x66000000, 1); + test__clzsi2(0x67000000, 1); + test__clzsi2(0x68000000, 1); + test__clzsi2(0x69000000, 1); + test__clzsi2(0x6A000000, 1); + test__clzsi2(0x6B000000, 1); + test__clzsi2(0x6C000000, 1); + test__clzsi2(0x6D000000, 1); + test__clzsi2(0x6E000000, 1); + test__clzsi2(0x6F000000, 1); + test__clzsi2(0x70000000, 1); + test__clzsi2(0x71000000, 1); + test__clzsi2(0x72000000, 1); + test__clzsi2(0x73000000, 1); + test__clzsi2(0x74000000, 1); + test__clzsi2(0x75000000, 1); + test__clzsi2(0x76000000, 1); + test__clzsi2(0x77000000, 1); + test__clzsi2(0x78000000, 1); + test__clzsi2(0x79000000, 1); + test__clzsi2(0x7A000000, 1); + test__clzsi2(0x7B000000, 1); + test__clzsi2(0x7C000000, 1); + test__clzsi2(0x7D000000, 1); + test__clzsi2(0x7E000000, 1); + test__clzsi2(0x7F000000, 1); + test__clzsi2(0x80000000, 0); + test__clzsi2(0x81000000, 0); + test__clzsi2(0x82000000, 0); + test__clzsi2(0x83000000, 0); + test__clzsi2(0x84000000, 0); + test__clzsi2(0x85000000, 0); + test__clzsi2(0x86000000, 0); + test__clzsi2(0x87000000, 0); + test__clzsi2(0x88000000, 0); + test__clzsi2(0x89000000, 0); + test__clzsi2(0x8A000000, 0); + test__clzsi2(0x8B000000, 0); + test__clzsi2(0x8C000000, 0); + test__clzsi2(0x8D000000, 0); + test__clzsi2(0x8E000000, 0); + test__clzsi2(0x8F000000, 0); + test__clzsi2(0x90000000, 0); + test__clzsi2(0x91000000, 0); + test__clzsi2(0x92000000, 0); + test__clzsi2(0x93000000, 0); + test__clzsi2(0x94000000, 0); + test__clzsi2(0x95000000, 0); + test__clzsi2(0x96000000, 0); + test__clzsi2(0x97000000, 0); + test__clzsi2(0x98000000, 0); + test__clzsi2(0x99000000, 0); + test__clzsi2(0x9A000000, 0); + test__clzsi2(0x9B000000, 0); + test__clzsi2(0x9C000000, 0); + test__clzsi2(0x9D000000, 0); + test__clzsi2(0x9E000000, 0); + test__clzsi2(0x9F000000, 0); + test__clzsi2(0xA0000000, 0); + test__clzsi2(0xA1000000, 0); + test__clzsi2(0xA2000000, 0); + test__clzsi2(0xA3000000, 0); + test__clzsi2(0xA4000000, 0); + test__clzsi2(0xA5000000, 0); + test__clzsi2(0xA6000000, 0); + test__clzsi2(0xA7000000, 0); + test__clzsi2(0xA8000000, 0); + test__clzsi2(0xA9000000, 0); + test__clzsi2(0xAA000000, 0); + test__clzsi2(0xAB000000, 0); + test__clzsi2(0xAC000000, 0); + test__clzsi2(0xAD000000, 0); + test__clzsi2(0xAE000000, 0); + test__clzsi2(0xAF000000, 0); + test__clzsi2(0xB0000000, 0); + test__clzsi2(0xB1000000, 0); + test__clzsi2(0xB2000000, 0); + test__clzsi2(0xB3000000, 0); + test__clzsi2(0xB4000000, 0); + test__clzsi2(0xB5000000, 0); + test__clzsi2(0xB6000000, 0); + test__clzsi2(0xB7000000, 0); + test__clzsi2(0xB8000000, 0); + test__clzsi2(0xB9000000, 0); + test__clzsi2(0xBA000000, 0); + test__clzsi2(0xBB000000, 0); + test__clzsi2(0xBC000000, 0); + test__clzsi2(0xBD000000, 0); + test__clzsi2(0xBE000000, 0); + test__clzsi2(0xBF000000, 0); + test__clzsi2(0xC0000000, 0); + test__clzsi2(0xC1000000, 0); + test__clzsi2(0xC2000000, 0); + test__clzsi2(0xC3000000, 0); + test__clzsi2(0xC4000000, 0); + test__clzsi2(0xC5000000, 0); + test__clzsi2(0xC6000000, 0); + test__clzsi2(0xC7000000, 0); + test__clzsi2(0xC8000000, 0); + test__clzsi2(0xC9000000, 0); + test__clzsi2(0xCA000000, 0); + test__clzsi2(0xCB000000, 0); + test__clzsi2(0xCC000000, 0); + test__clzsi2(0xCD000000, 0); + test__clzsi2(0xCE000000, 0); + test__clzsi2(0xCF000000, 0); + test__clzsi2(0xD0000000, 0); + test__clzsi2(0xD1000000, 0); + test__clzsi2(0xD2000000, 0); + test__clzsi2(0xD3000000, 0); + test__clzsi2(0xD4000000, 0); + test__clzsi2(0xD5000000, 0); + test__clzsi2(0xD6000000, 0); + test__clzsi2(0xD7000000, 0); + test__clzsi2(0xD8000000, 0); + test__clzsi2(0xD9000000, 0); + test__clzsi2(0xDA000000, 0); + test__clzsi2(0xDB000000, 0); + test__clzsi2(0xDC000000, 0); + test__clzsi2(0xDD000000, 0); + test__clzsi2(0xDE000000, 0); + test__clzsi2(0xDF000000, 0); + test__clzsi2(0xE0000000, 0); + test__clzsi2(0xE1000000, 0); + test__clzsi2(0xE2000000, 0); + test__clzsi2(0xE3000000, 0); + test__clzsi2(0xE4000000, 0); + test__clzsi2(0xE5000000, 0); + test__clzsi2(0xE6000000, 0); + test__clzsi2(0xE7000000, 0); + test__clzsi2(0xE8000000, 0); + test__clzsi2(0xE9000000, 0); + test__clzsi2(0xEA000000, 0); + test__clzsi2(0xEB000000, 0); + test__clzsi2(0xEC000000, 0); + test__clzsi2(0xED000000, 0); + test__clzsi2(0xEE000000, 0); + test__clzsi2(0xEF000000, 0); + test__clzsi2(0xF0000000, 0); + test__clzsi2(0xF1000000, 0); + test__clzsi2(0xF2000000, 0); + test__clzsi2(0xF3000000, 0); + test__clzsi2(0xF4000000, 0); + test__clzsi2(0xF5000000, 0); + test__clzsi2(0xF6000000, 0); + test__clzsi2(0xF7000000, 0); + test__clzsi2(0xF8000000, 0); + test__clzsi2(0xF9000000, 0); + test__clzsi2(0xFA000000, 0); + test__clzsi2(0xFB000000, 0); + test__clzsi2(0xFC000000, 0); + test__clzsi2(0xFD000000, 0); + test__clzsi2(0xFE000000, 0); + test__clzsi2(0xFF000000, 0); + test__clzsi2(0x00000001, 31); + test__clzsi2(0x00000002, 30); + test__clzsi2(0x00000004, 29); + test__clzsi2(0x00000008, 28); + test__clzsi2(0x00000010, 27); + test__clzsi2(0x00000020, 26); + test__clzsi2(0x00000040, 25); + test__clzsi2(0x00000080, 24); + test__clzsi2(0x00000100, 23); + test__clzsi2(0x00000200, 22); + test__clzsi2(0x00000400, 21); + test__clzsi2(0x00000800, 20); + test__clzsi2(0x00001000, 19); + test__clzsi2(0x00002000, 18); + test__clzsi2(0x00004000, 17); + test__clzsi2(0x00008000, 16); + test__clzsi2(0x00010000, 15); + test__clzsi2(0x00020000, 14); + test__clzsi2(0x00040000, 13); + test__clzsi2(0x00080000, 12); + test__clzsi2(0x00100000, 11); + test__clzsi2(0x00200000, 10); + test__clzsi2(0x00400000, 9); +}