From b1a61a6d51919cb8d5d87c53d495406491c53ebf Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Mon, 29 Apr 2019 19:11:37 +0200 Subject: [PATCH] compiler-rt: Add __mulodi4 --- CMakeLists.txt | 1 + std/special/compiler_rt.zig | 1 + std/special/compiler_rt/mulodi4.zig | 44 ++++++++++++ std/special/compiler_rt/mulodi4_test.zig | 85 ++++++++++++++++++++++++ 4 files changed, 131 insertions(+) create mode 100644 std/special/compiler_rt/mulodi4.zig create mode 100644 std/special/compiler_rt/mulodi4_test.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index 39892e6c2..1b017a881 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -676,6 +676,7 @@ set(ZIG_STD_FILES "special/compiler_rt/modti3.zig" "special/compiler_rt/mulXf3.zig" "special/compiler_rt/muloti4.zig" + "special/compiler_rt/mulodi4.zig" "special/compiler_rt/multi3.zig" "special/compiler_rt/negXf2.zig" "special/compiler_rt/popcountdi2.zig" diff --git a/std/special/compiler_rt.zig b/std/special/compiler_rt.zig index d6af9535d..76aebc24d 100644 --- a/std/special/compiler_rt.zig +++ b/std/special/compiler_rt.zig @@ -221,6 +221,7 @@ comptime { @export("__umodti3", @import("compiler_rt/umodti3.zig").__umodti3, linkage); } @export("__muloti4", @import("compiler_rt/muloti4.zig").__muloti4, linkage); + @export("__mulodi4", @import("compiler_rt/mulodi4.zig").__mulodi4, linkage); } const std = @import("std"); diff --git a/std/special/compiler_rt/mulodi4.zig b/std/special/compiler_rt/mulodi4.zig new file mode 100644 index 000000000..82e9ef325 --- /dev/null +++ b/std/special/compiler_rt/mulodi4.zig @@ -0,0 +1,44 @@ +const builtin = @import("builtin"); +const compiler_rt = @import("../compiler_rt.zig"); +const maxInt = std.math.maxInt; +const minInt = std.math.minInt; + +pub extern fn __mulodi4(a: i64, b: i64, overflow: *c_int) i64 { + @setRuntimeSafety(builtin.is_test); + + const min = @bitCast(i64, u64(1 << (i64.bit_count - 1))); + const max = ~min; + + overflow.* = 0; + const result = a *% b; + + // Edge cases + if (a == min) { + if (b != 0 and b != 1) overflow.* = 1; + return result; + } + if (b == min) { + if (a != 0 and a != 1) overflow.* = 1; + return result; + } + + // Take absolute value of a and b via abs(x) = (x^(x >> 63)) - (x >> 63). + const abs_a = (a ^ (a >> 63)) -% (a >> 63); + const abs_b = (b ^ (b >> 63)) -% (b >> 63); + + // Unitary magnitude, cannot have overflow + if (abs_a < 2 or abs_b < 2) return result; + + // Compare the signs of the operands + if ((a ^ b) >> 63 != 0) { + if (abs_a > @divTrunc(max, abs_b)) overflow.* = 1; + } else { + if (abs_a > @divTrunc(min, -abs_b)) overflow.* = 1; + } + + return result; +} + +test "import mulodi4" { + _ = @import("mulodi4_test.zig"); +} diff --git a/std/special/compiler_rt/mulodi4_test.zig b/std/special/compiler_rt/mulodi4_test.zig new file mode 100644 index 000000000..7575c7704 --- /dev/null +++ b/std/special/compiler_rt/mulodi4_test.zig @@ -0,0 +1,85 @@ +const __mulodi4 = @import("mulodi4.zig").__mulodi4; +const testing = @import("std").testing; + +fn test__mulodi4(a: i64, b: i64, expected: i64, expected_overflow: c_int) void { + var overflow: c_int = undefined; + const x = __mulodi4(a, b, &overflow); + testing.expect(overflow == expected_overflow and (expected_overflow != 0 or x == expected)); +} + +test "mulodi4" { + test__mulodi4(0, 0, 0, 0); + test__mulodi4(0, 1, 0, 0); + test__mulodi4(1, 0, 0, 0); + test__mulodi4(0, 10, 0, 0); + test__mulodi4(10, 0, 0, 0); + test__mulodi4(0, 81985529216486895, 0, 0); + test__mulodi4(81985529216486895, 0, 0, 0); + + test__mulodi4(0, -1, 0, 0); + test__mulodi4(-1, 0, 0, 0); + test__mulodi4(0, -10, 0, 0); + test__mulodi4(-10, 0, 0, 0); + test__mulodi4(0, -81985529216486895, 0, 0); + test__mulodi4(-81985529216486895, 0, 0, 0); + + test__mulodi4(1, 1, 1, 0); + test__mulodi4(1, 10, 10, 0); + test__mulodi4(10, 1, 10, 0); + test__mulodi4(1, 81985529216486895, 81985529216486895, 0); + test__mulodi4(81985529216486895, 1, 81985529216486895, 0); + + test__mulodi4(1, -1, -1, 0); + test__mulodi4(1, -10, -10, 0); + test__mulodi4(-10, 1, -10, 0); + test__mulodi4(1, -81985529216486895, -81985529216486895, 0); + test__mulodi4(-81985529216486895, 1, -81985529216486895, 0); + + test__mulodi4(3037000499, 3037000499, 9223372030926249001, 0); + test__mulodi4(-3037000499, 3037000499, -9223372030926249001, 0); + test__mulodi4(3037000499, -3037000499, -9223372030926249001, 0); + test__mulodi4(-3037000499, -3037000499, 9223372030926249001, 0); + + test__mulodi4(4398046511103, 2097152, 9223372036852678656, 0); + test__mulodi4(-4398046511103, 2097152, -9223372036852678656, 0); + test__mulodi4(4398046511103, -2097152, -9223372036852678656, 0); + test__mulodi4(-4398046511103, -2097152, 9223372036852678656, 0); + + test__mulodi4(2097152, 4398046511103, 9223372036852678656, 0); + test__mulodi4(-2097152, 4398046511103, -9223372036852678656, 0); + test__mulodi4(2097152, -4398046511103, -9223372036852678656, 0); + test__mulodi4(-2097152, -4398046511103, 9223372036852678656, 0); + + test__mulodi4(0x7FFFFFFFFFFFFFFF, -2, 2, 1); + test__mulodi4(-2, 0x7FFFFFFFFFFFFFFF, 2, 1); + test__mulodi4(0x7FFFFFFFFFFFFFFF, -1, @bitCast(i64, u64(0x8000000000000001)), 0); + test__mulodi4(-1, 0x7FFFFFFFFFFFFFFF, @bitCast(i64, u64(0x8000000000000001)), 0); + test__mulodi4(0x7FFFFFFFFFFFFFFF, 0, 0, 0); + test__mulodi4(0, 0x7FFFFFFFFFFFFFFF, 0, 0); + test__mulodi4(0x7FFFFFFFFFFFFFFF, 1, 0x7FFFFFFFFFFFFFFF, 0); + test__mulodi4(1, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0); + test__mulodi4(0x7FFFFFFFFFFFFFFF, 2, @bitCast(i64, u64(0x8000000000000001)), 1); + test__mulodi4(2, 0x7FFFFFFFFFFFFFFF, @bitCast(i64, u64(0x8000000000000001)), 1); + + test__mulodi4(@bitCast(i64, u64(0x8000000000000000)), -2, @bitCast(i64, u64(0x8000000000000000)), 1); + test__mulodi4(-2, @bitCast(i64, u64(0x8000000000000000)), @bitCast(i64, u64(0x8000000000000000)), 1); + test__mulodi4(@bitCast(i64, u64(0x8000000000000000)), -1, @bitCast(i64, u64(0x8000000000000000)), 1); + test__mulodi4(-1, @bitCast(i64, u64(0x8000000000000000)), @bitCast(i64, u64(0x8000000000000000)), 1); + test__mulodi4(@bitCast(i64, u64(0x8000000000000000)), 0, 0, 0); + test__mulodi4(0, @bitCast(i64, u64(0x8000000000000000)), 0, 0); + test__mulodi4(@bitCast(i64, u64(0x8000000000000000)), 1, @bitCast(i64, u64(0x8000000000000000)), 0); + test__mulodi4(1, @bitCast(i64, u64(0x8000000000000000)), @bitCast(i64, u64(0x8000000000000000)), 0); + test__mulodi4(@bitCast(i64, u64(0x8000000000000000)), 2, @bitCast(i64, u64(0x8000000000000000)), 1); + test__mulodi4(2, @bitCast(i64, u64(0x8000000000000000)), @bitCast(i64, u64(0x8000000000000000)), 1); + + test__mulodi4(@bitCast(i64, u64(0x8000000000000001)), -2, @bitCast(i64, u64(0x8000000000000001)), 1); + test__mulodi4(-2, @bitCast(i64, u64(0x8000000000000001)), @bitCast(i64, u64(0x8000000000000001)), 1); + test__mulodi4(@bitCast(i64, u64(0x8000000000000001)), -1, 0x7FFFFFFFFFFFFFFF, 0); + test__mulodi4(-1, @bitCast(i64, u64(0x8000000000000001)), 0x7FFFFFFFFFFFFFFF, 0); + test__mulodi4(@bitCast(i64, u64(0x8000000000000001)), 0, 0, 0); + test__mulodi4(0, @bitCast(i64, u64(0x8000000000000001)), 0, 0); + test__mulodi4(@bitCast(i64, u64(0x8000000000000001)), 1, @bitCast(i64, u64(0x8000000000000001)), 0); + test__mulodi4(1, @bitCast(i64, u64(0x8000000000000001)), @bitCast(i64, u64(0x8000000000000001)), 0); + test__mulodi4(@bitCast(i64, u64(0x8000000000000001)), 2, @bitCast(i64, u64(0x8000000000000000)), 1); + test__mulodi4(2, @bitCast(i64, u64(0x8000000000000001)), @bitCast(i64, u64(0x8000000000000000)), 1); +}