make udivmod generic and add tests

master
Andrew Kelley 2017-08-18 17:20:03 -04:00
parent 51bde26842
commit 33c592e981
6 changed files with 20969 additions and 569 deletions

View File

@ -325,6 +325,8 @@ install(FILES "${CMAKE_SOURCE_DIR}/std/special/compiler_rt/fixunstfsi.zig" DESTI
install(FILES "${CMAKE_SOURCE_DIR}/std/special/compiler_rt/fixunstfti.zig" DESTINATION "${ZIG_STD_DEST}/special/compiler_rt")
install(FILES "${CMAKE_SOURCE_DIR}/std/special/compiler_rt/index.zig" DESTINATION "${ZIG_STD_DEST}/special/compiler_rt")
install(FILES "${CMAKE_SOURCE_DIR}/std/special/compiler_rt/udivti3.zig" DESTINATION "${ZIG_STD_DEST}/special/compiler_rt")
install(FILES "${CMAKE_SOURCE_DIR}/std/special/compiler_rt/udivmod.zig" DESTINATION "${ZIG_STD_DEST}/special/compiler_rt")
install(FILES "${CMAKE_SOURCE_DIR}/std/special/compiler_rt/udivmoddi4.zig" DESTINATION "${ZIG_STD_DEST}/special/compiler_rt")
install(FILES "${CMAKE_SOURCE_DIR}/std/special/compiler_rt/udivmodti4.zig" DESTINATION "${ZIG_STD_DEST}/special/compiler_rt")
install(FILES "${CMAKE_SOURCE_DIR}/std/special/compiler_rt/umodti3.zig" DESTINATION "${ZIG_STD_DEST}/special/compiler_rt")
install(FILES "${CMAKE_SOURCE_DIR}/std/special/test_runner.zig" DESTINATION "${ZIG_STD_DEST}/special")

View File

@ -1,4 +1,3 @@
// Find all the exported functions.
comptime {
_ = @import("comparetf2.zig");
_ = @import("fixunsdfdi.zig");
@ -10,223 +9,26 @@ comptime {
_ = @import("fixunstfdi.zig");
_ = @import("fixunstfsi.zig");
_ = @import("fixunstfti.zig");
_ = @import("udivti3.zig");
_ = @import("udivmoddi4.zig");
_ = @import("udivmodti4.zig");
_ = @import("udivti3.zig");
_ = @import("umodti3.zig");
}
const builtin = @import("builtin");
const is_test = builtin.is_test;
const du_int = u64;
const di_int = i64;
const si_int = c_int;
const su_int = c_uint;
const __udivmoddi4 = @import("udivmoddi4.zig").__udivmoddi4;
const udwords = [2]su_int;
const low = if (builtin.is_big_endian) 1 else 0;
const high = 1 - low;
export fn __udivdi3(a: du_int, b: du_int) -> du_int {
export fn __udivdi3(a: u64, b: u64) -> u64 {
@setDebugSafety(this, is_test);
return __udivmoddi4(a, b, null);
}
fn du_int_to_udwords(x: du_int) -> udwords {
@setDebugSafety(this, is_test);
return *@ptrCast(&udwords, &x);
}
export fn __udivmoddi4(a: du_int, b: du_int, maybe_rem: ?&du_int) -> du_int {
export fn __umoddi3(a: u64, b: u64) -> u64 {
@setDebugSafety(this, is_test);
const n_uword_bits = su_int.bit_count;
const n_udword_bits = du_int.bit_count;
var n = du_int_to_udwords(a);
var d = du_int_to_udwords(b);
var q: udwords = undefined;
var r: udwords = undefined;
var sr: c_uint = undefined;
// special cases, X is unknown, K != 0
if (n[high] == 0) {
if (d[high] == 0) {
// 0 X
// ---
// 0 X
if (maybe_rem) |rem| {
*rem = n[low] % d[low];
}
return n[low] / d[low];
}
// 0 X
// ---
// K X
if (maybe_rem) |rem| {
*rem = n[low];
}
return 0;
}
// n[high] != 0
if (d[low] == 0) {
if (d[high] == 0) {
// K X
// ---
// 0 0
if (maybe_rem) |rem| {
*rem = n[high] % d[low];
}
return n[high] / d[low];
}
// d[high] != 0
if (n[low] == 0) {
// K 0
// ---
// K 0
if (maybe_rem) |rem| {
r[high] = n[high] % d[high];
r[low] = 0;
*rem = *@ptrCast(&du_int, &r[0]);
}
return n[high] / d[high];
}
// K K
// ---
// K 0
// if d is a power of 2
if ((d[high] & (d[high] - 1)) == 0) {
if (maybe_rem) |rem| {
r[low] = n[low];
r[high] = n[high] & (d[high] - 1);
*rem = *@ptrCast(&du_int, &r[0]);
}
return n[high] >> @ctz(d[high]);
}
// K K
// ---
// K 0
sr = @clz(su_int(d[high])) - @clz(su_int(n[high]));
// 0 <= sr <= n_uword_bits - 2 or sr large
if (sr > n_uword_bits - 2) {
if (maybe_rem) |rem| {
*rem = *@ptrCast(&du_int, &n[0]);
}
return 0;
}
sr += 1;
// 1 <= sr <= n_uword_bits - 1
// q.all = n.all << (n_udword_bits - sr);
q[low] = 0;
q[high] = n[low] << (n_uword_bits - sr);
// r.all = n.all >> sr;
r[high] = n[high] >> sr;
r[low] = (n[high] << (n_uword_bits - sr)) | (n[low] >> sr);
} else {
// d[low] != 0
if (d[high] == 0) {
// K X
// ---
// 0 K
// if d is a power of 2
if ((d[low] & (d[low] - 1)) == 0) {
if (maybe_rem) |rem| {
*rem = n[low] & (d[low] - 1);
}
if (d[low] == 1) {
return *@ptrCast(&du_int, &n[0]);
}
sr = @ctz(d[low]);
q[high] = n[high] >> sr;
q[low] = (n[high] << (n_uword_bits - sr)) | (n[low] >> sr);
return *@ptrCast(&du_int, &q[0]);
}
// K X
// ---
// 0 K
sr = 1 + n_uword_bits + c_uint(@clz(su_int(d[low]))) - c_uint(@clz(su_int(n[high])));
// 2 <= sr <= n_udword_bits - 1
// q.all = n.all << (n_udword_bits - sr);
// r.all = n.all >> sr;
if (sr == n_uword_bits) {
q[low] = 0;
q[high] = n[low];
r[high] = 0;
r[low] = n[high];
} else if (sr < n_uword_bits) {
// 2 <= sr <= n_uword_bits - 1
q[low] = 0;
q[high] = n[low] << (n_uword_bits - sr);
r[high] = n[high] >> sr;
r[low] = (n[high] << (n_uword_bits - sr)) | (n[low] >> sr);
} else {
// n_uword_bits + 1 <= sr <= n_udword_bits - 1
q[low] = n[low] << (n_udword_bits - sr);
q[high] = (n[high] << (n_udword_bits - sr)) |
(n[low] >> (sr - n_uword_bits));
r[high] = 0;
r[low] = n[high] >> (sr - n_uword_bits);
}
} else {
// K X
// ---
// K K
sr = @bitCast(c_uint, c_int(@clz(su_int(d[high]))) - c_int(@clz(su_int(n[high]))));
// 0 <= sr <= n_uword_bits - 1 or sr large
if (sr > n_uword_bits - 1) {
if (maybe_rem) |rem| {
*rem = *@ptrCast(&du_int, &n[0]);
}
return 0;
}
sr += 1;
// 1 <= sr <= n_uword_bits
// q.all = n.all << (n_udword_bits - sr);
q[low] = 0;
if (sr == n_uword_bits) {
q[high] = n[low];
r[high] = 0;
r[low] = n[high];
} else {
q[high] = n[low] << (n_uword_bits - sr);
r[high] = n[high] >> sr;
r[low] = (n[high] << (n_uword_bits - sr)) | (n[low] >> sr);
}
}
}
// Not a special case
// q and r are initialized with:
// q.all = n.all << (n_udword_bits - sr);
// r.all = n.all >> sr;
// 1 <= sr <= n_udword_bits - 1
var carry: su_int = 0;
while (sr > 0) {
// r:q = ((r:q) << 1) | carry
r[high] = (r[high] << 1) | (r[low] >> (n_uword_bits - 1));
r[low] = (r[low] << 1) | (q[high] >> (n_uword_bits - 1));
q[high] = (q[high] << 1) | (q[low] >> (n_uword_bits - 1));
q[low] = (q[low] << 1) | carry;
// carry = 0;
// if (r.all >= d.all)
// {
// r.all -= d.all;
// carry = 1;
// }
const s: di_int = (di_int)(*@ptrCast(&du_int, &d[0]) -% *@ptrCast(&du_int, &r[0]) -% 1) >> (n_udword_bits - 1);
carry = su_int(s & 1);
*@ptrCast(&du_int, &r[0]) -= *@ptrCast(&du_int, &d[0]) & @bitCast(u64, s);
sr -= 1;
}
*@ptrCast(&du_int, &q[0]) = (*@ptrCast(&du_int, &q[0]) << 1) | u64(carry);
if (maybe_rem) |rem| {
*rem = *@ptrCast(&du_int, &r[0]);
}
return *@ptrCast(&du_int, &q[0]);
}
export fn __umoddi3(a: du_int, b: du_int) -> du_int {
@setDebugSafety(this, is_test);
var r: du_int = undefined;
var r: u64 = undefined;
_ = __udivmoddi4(a, b, &r);
return r;
}
@ -275,11 +77,11 @@ export nakedcc fn __aeabi_uidivmod() {
@setGlobalLinkage(__aeabi_uidivmod, builtin.GlobalLinkage.Internal);
}
export fn __udivmodsi4(a: su_int, b: su_int, rem: &su_int) -> su_int {
export fn __udivmodsi4(a: u32, b: u32, rem: &u32) -> u32 {
@setDebugSafety(this, is_test);
const d = __udivsi3(a, b);
*rem = su_int(si_int(a) -% (si_int(d) * si_int(b)));
*rem = u32(i32(a) -% (i32(d) * i32(b)));
return d;
}
@ -287,16 +89,16 @@ export fn __udivmodsi4(a: su_int, b: su_int, rem: &su_int) -> su_int {
// TODO make this an alias instead of an extra function call
// https://github.com/andrewrk/zig/issues/256
export fn __aeabi_uidiv(n: su_int, d: su_int) -> su_int {
export fn __aeabi_uidiv(n: u32, d: u32) -> u32 {
@setDebugSafety(this, is_test);
return __udivsi3(n, d);
}
export fn __udivsi3(n: su_int, d: su_int) -> su_int {
export fn __udivsi3(n: u32, d: u32) -> u32 {
@setDebugSafety(this, is_test);
const n_uword_bits: c_uint = su_int.bit_count;
const n_uword_bits: c_uint = u32.bit_count;
// special cases
if (d == 0)
return 0; // ?!
@ -311,9 +113,9 @@ export fn __udivsi3(n: su_int, d: su_int) -> su_int {
sr += 1;
// 1 <= sr <= n_uword_bits - 1
// Not a special case
var q: su_int = n << (n_uword_bits - sr);
var r: su_int = n >> sr;
var carry: su_int = 0;
var q: u32 = n << (n_uword_bits - sr);
var r: u32 = n >> sr;
var carry: u32 = 0;
while (sr > 0) : (sr -= 1) {
// r:q = ((r:q) << 1) | carry
r = (r << 1) | (q >> (n_uword_bits - 1));
@ -324,9 +126,9 @@ export fn __udivsi3(n: su_int, d: su_int) -> su_int {
// r.all -= d.all;
// carry = 1;
// }
const s = si_int(d -% r -% 1) >> si_int(n_uword_bits - 1);
carry = su_int(s & 1);
r -= d & @bitCast(su_int, s);
const s = i32(d -% r -% 1) >> i32(n_uword_bits - 1);
carry = u32(s & 1);
r -= d & @bitCast(u32, s);
}
q = (q << 1) | carry;
return q;
@ -340,173 +142,145 @@ test "test_umoddi3" {
test_one_umoddi3(0xFFFFFFFFFFFFFFFF, 2, 0x1);
}
fn test_one_umoddi3(a: du_int, b: du_int, expected_r: du_int) {
fn test_one_umoddi3(a: u64, b: u64, expected_r: u64) {
const r = __umoddi3(a, b);
assert(r == expected_r);
}
test "test_udivmoddi4" {
const cases = [][4]du_int {
[]du_int{0x0000000000000000, 0x0000000000000001, 0x0000000000000000, 0x0000000000000000},
[]du_int{0x0000000080000000, 0x0000000100000001, 0x0000000000000000, 0x0000000080000000},
[]du_int{0x7FFFFFFF00000001, 0x0000000000000001, 0x7FFFFFFF00000001, 0x0000000000000000},
[]du_int{0x7FFFFFFF7FFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000000000000000, 0x7FFFFFFF7FFFFFFF},
[]du_int{0x8000000000000002, 0xFFFFFFFFFFFFFFFE, 0x0000000000000000, 0x8000000000000002},
[]du_int{0x80000000FFFFFFFD, 0xFFFFFFFFFFFFFFFD, 0x0000000000000000, 0x80000000FFFFFFFD},
[]du_int{0xFFFFFFFD00000010, 0xFFFFFFFF80000000, 0x0000000000000000, 0xFFFFFFFD00000010},
[]du_int{0xFFFFFFFDFFFFFFFF, 0xFFFFFFFF7FFFFFFF, 0x0000000000000000, 0xFFFFFFFDFFFFFFFF},
[]du_int{0xFFFFFFFE0747AE14, 0xFFFFFFFF0747AE14, 0x0000000000000000, 0xFFFFFFFE0747AE14},
[]du_int{0xFFFFFFFF00000001, 0xFFFFFFFF078644FA, 0x0000000000000000, 0xFFFFFFFF00000001},
[]du_int{0xFFFFFFFF80000000, 0xFFFFFFFF00000010, 0x0000000000000001, 0x000000007FFFFFF0},
[]du_int{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000000000000001, 0x0000000000000000},
};
for (cases) |case| {
test_one_udivmoddi4(case[0], case[1], case[2], case[3]);
}
}
fn test_one_udivmoddi4(a: du_int, b: du_int, expected_q: du_int, expected_r: du_int) {
var r: du_int = undefined;
const q = __udivmoddi4(a, b, &r);
assert(q == expected_q);
assert(r == expected_r);
}
test "test_udivsi3" {
const cases = [][3]su_int {
[]su_int{0x00000000, 0x00000001, 0x00000000},
[]su_int{0x00000000, 0x00000002, 0x00000000},
[]su_int{0x00000000, 0x00000003, 0x00000000},
[]su_int{0x00000000, 0x00000010, 0x00000000},
[]su_int{0x00000000, 0x078644FA, 0x00000000},
[]su_int{0x00000000, 0x0747AE14, 0x00000000},
[]su_int{0x00000000, 0x7FFFFFFF, 0x00000000},
[]su_int{0x00000000, 0x80000000, 0x00000000},
[]su_int{0x00000000, 0xFFFFFFFD, 0x00000000},
[]su_int{0x00000000, 0xFFFFFFFE, 0x00000000},
[]su_int{0x00000000, 0xFFFFFFFF, 0x00000000},
[]su_int{0x00000001, 0x00000001, 0x00000001},
[]su_int{0x00000001, 0x00000002, 0x00000000},
[]su_int{0x00000001, 0x00000003, 0x00000000},
[]su_int{0x00000001, 0x00000010, 0x00000000},
[]su_int{0x00000001, 0x078644FA, 0x00000000},
[]su_int{0x00000001, 0x0747AE14, 0x00000000},
[]su_int{0x00000001, 0x7FFFFFFF, 0x00000000},
[]su_int{0x00000001, 0x80000000, 0x00000000},
[]su_int{0x00000001, 0xFFFFFFFD, 0x00000000},
[]su_int{0x00000001, 0xFFFFFFFE, 0x00000000},
[]su_int{0x00000001, 0xFFFFFFFF, 0x00000000},
[]su_int{0x00000002, 0x00000001, 0x00000002},
[]su_int{0x00000002, 0x00000002, 0x00000001},
[]su_int{0x00000002, 0x00000003, 0x00000000},
[]su_int{0x00000002, 0x00000010, 0x00000000},
[]su_int{0x00000002, 0x078644FA, 0x00000000},
[]su_int{0x00000002, 0x0747AE14, 0x00000000},
[]su_int{0x00000002, 0x7FFFFFFF, 0x00000000},
[]su_int{0x00000002, 0x80000000, 0x00000000},
[]su_int{0x00000002, 0xFFFFFFFD, 0x00000000},
[]su_int{0x00000002, 0xFFFFFFFE, 0x00000000},
[]su_int{0x00000002, 0xFFFFFFFF, 0x00000000},
[]su_int{0x00000003, 0x00000001, 0x00000003},
[]su_int{0x00000003, 0x00000002, 0x00000001},
[]su_int{0x00000003, 0x00000003, 0x00000001},
[]su_int{0x00000003, 0x00000010, 0x00000000},
[]su_int{0x00000003, 0x078644FA, 0x00000000},
[]su_int{0x00000003, 0x0747AE14, 0x00000000},
[]su_int{0x00000003, 0x7FFFFFFF, 0x00000000},
[]su_int{0x00000003, 0x80000000, 0x00000000},
[]su_int{0x00000003, 0xFFFFFFFD, 0x00000000},
[]su_int{0x00000003, 0xFFFFFFFE, 0x00000000},
[]su_int{0x00000003, 0xFFFFFFFF, 0x00000000},
[]su_int{0x00000010, 0x00000001, 0x00000010},
[]su_int{0x00000010, 0x00000002, 0x00000008},
[]su_int{0x00000010, 0x00000003, 0x00000005},
[]su_int{0x00000010, 0x00000010, 0x00000001},
[]su_int{0x00000010, 0x078644FA, 0x00000000},
[]su_int{0x00000010, 0x0747AE14, 0x00000000},
[]su_int{0x00000010, 0x7FFFFFFF, 0x00000000},
[]su_int{0x00000010, 0x80000000, 0x00000000},
[]su_int{0x00000010, 0xFFFFFFFD, 0x00000000},
[]su_int{0x00000010, 0xFFFFFFFE, 0x00000000},
[]su_int{0x00000010, 0xFFFFFFFF, 0x00000000},
[]su_int{0x078644FA, 0x00000001, 0x078644FA},
[]su_int{0x078644FA, 0x00000002, 0x03C3227D},
[]su_int{0x078644FA, 0x00000003, 0x028216FE},
[]su_int{0x078644FA, 0x00000010, 0x0078644F},
[]su_int{0x078644FA, 0x078644FA, 0x00000001},
[]su_int{0x078644FA, 0x0747AE14, 0x00000001},
[]su_int{0x078644FA, 0x7FFFFFFF, 0x00000000},
[]su_int{0x078644FA, 0x80000000, 0x00000000},
[]su_int{0x078644FA, 0xFFFFFFFD, 0x00000000},
[]su_int{0x078644FA, 0xFFFFFFFE, 0x00000000},
[]su_int{0x078644FA, 0xFFFFFFFF, 0x00000000},
[]su_int{0x0747AE14, 0x00000001, 0x0747AE14},
[]su_int{0x0747AE14, 0x00000002, 0x03A3D70A},
[]su_int{0x0747AE14, 0x00000003, 0x026D3A06},
[]su_int{0x0747AE14, 0x00000010, 0x00747AE1},
[]su_int{0x0747AE14, 0x078644FA, 0x00000000},
[]su_int{0x0747AE14, 0x0747AE14, 0x00000001},
[]su_int{0x0747AE14, 0x7FFFFFFF, 0x00000000},
[]su_int{0x0747AE14, 0x80000000, 0x00000000},
[]su_int{0x0747AE14, 0xFFFFFFFD, 0x00000000},
[]su_int{0x0747AE14, 0xFFFFFFFE, 0x00000000},
[]su_int{0x0747AE14, 0xFFFFFFFF, 0x00000000},
[]su_int{0x7FFFFFFF, 0x00000001, 0x7FFFFFFF},
[]su_int{0x7FFFFFFF, 0x00000002, 0x3FFFFFFF},
[]su_int{0x7FFFFFFF, 0x00000003, 0x2AAAAAAA},
[]su_int{0x7FFFFFFF, 0x00000010, 0x07FFFFFF},
[]su_int{0x7FFFFFFF, 0x078644FA, 0x00000011},
[]su_int{0x7FFFFFFF, 0x0747AE14, 0x00000011},
[]su_int{0x7FFFFFFF, 0x7FFFFFFF, 0x00000001},
[]su_int{0x7FFFFFFF, 0x80000000, 0x00000000},
[]su_int{0x7FFFFFFF, 0xFFFFFFFD, 0x00000000},
[]su_int{0x7FFFFFFF, 0xFFFFFFFE, 0x00000000},
[]su_int{0x7FFFFFFF, 0xFFFFFFFF, 0x00000000},
[]su_int{0x80000000, 0x00000001, 0x80000000},
[]su_int{0x80000000, 0x00000002, 0x40000000},
[]su_int{0x80000000, 0x00000003, 0x2AAAAAAA},
[]su_int{0x80000000, 0x00000010, 0x08000000},
[]su_int{0x80000000, 0x078644FA, 0x00000011},
[]su_int{0x80000000, 0x0747AE14, 0x00000011},
[]su_int{0x80000000, 0x7FFFFFFF, 0x00000001},
[]su_int{0x80000000, 0x80000000, 0x00000001},
[]su_int{0x80000000, 0xFFFFFFFD, 0x00000000},
[]su_int{0x80000000, 0xFFFFFFFE, 0x00000000},
[]su_int{0x80000000, 0xFFFFFFFF, 0x00000000},
[]su_int{0xFFFFFFFD, 0x00000001, 0xFFFFFFFD},
[]su_int{0xFFFFFFFD, 0x00000002, 0x7FFFFFFE},
[]su_int{0xFFFFFFFD, 0x00000003, 0x55555554},
[]su_int{0xFFFFFFFD, 0x00000010, 0x0FFFFFFF},
[]su_int{0xFFFFFFFD, 0x078644FA, 0x00000022},
[]su_int{0xFFFFFFFD, 0x0747AE14, 0x00000023},
[]su_int{0xFFFFFFFD, 0x7FFFFFFF, 0x00000001},
[]su_int{0xFFFFFFFD, 0x80000000, 0x00000001},
[]su_int{0xFFFFFFFD, 0xFFFFFFFD, 0x00000001},
[]su_int{0xFFFFFFFD, 0xFFFFFFFE, 0x00000000},
[]su_int{0xFFFFFFFD, 0xFFFFFFFF, 0x00000000},
[]su_int{0xFFFFFFFE, 0x00000001, 0xFFFFFFFE},
[]su_int{0xFFFFFFFE, 0x00000002, 0x7FFFFFFF},
[]su_int{0xFFFFFFFE, 0x00000003, 0x55555554},
[]su_int{0xFFFFFFFE, 0x00000010, 0x0FFFFFFF},
[]su_int{0xFFFFFFFE, 0x078644FA, 0x00000022},
[]su_int{0xFFFFFFFE, 0x0747AE14, 0x00000023},
[]su_int{0xFFFFFFFE, 0x7FFFFFFF, 0x00000002},
[]su_int{0xFFFFFFFE, 0x80000000, 0x00000001},
[]su_int{0xFFFFFFFE, 0xFFFFFFFD, 0x00000001},
[]su_int{0xFFFFFFFE, 0xFFFFFFFE, 0x00000001},
[]su_int{0xFFFFFFFE, 0xFFFFFFFF, 0x00000000},
[]su_int{0xFFFFFFFF, 0x00000001, 0xFFFFFFFF},
[]su_int{0xFFFFFFFF, 0x00000002, 0x7FFFFFFF},
[]su_int{0xFFFFFFFF, 0x00000003, 0x55555555},
[]su_int{0xFFFFFFFF, 0x00000010, 0x0FFFFFFF},
[]su_int{0xFFFFFFFF, 0x078644FA, 0x00000022},
[]su_int{0xFFFFFFFF, 0x0747AE14, 0x00000023},
[]su_int{0xFFFFFFFF, 0x7FFFFFFF, 0x00000002},
[]su_int{0xFFFFFFFF, 0x80000000, 0x00000001},
[]su_int{0xFFFFFFFF, 0xFFFFFFFD, 0x00000001},
[]su_int{0xFFFFFFFF, 0xFFFFFFFE, 0x00000001},
[]su_int{0xFFFFFFFF, 0xFFFFFFFF, 0x00000001},
const cases = [][3]u32 {
[]u32{0x00000000, 0x00000001, 0x00000000},
[]u32{0x00000000, 0x00000002, 0x00000000},
[]u32{0x00000000, 0x00000003, 0x00000000},
[]u32{0x00000000, 0x00000010, 0x00000000},
[]u32{0x00000000, 0x078644FA, 0x00000000},
[]u32{0x00000000, 0x0747AE14, 0x00000000},
[]u32{0x00000000, 0x7FFFFFFF, 0x00000000},
[]u32{0x00000000, 0x80000000, 0x00000000},
[]u32{0x00000000, 0xFFFFFFFD, 0x00000000},
[]u32{0x00000000, 0xFFFFFFFE, 0x00000000},
[]u32{0x00000000, 0xFFFFFFFF, 0x00000000},
[]u32{0x00000001, 0x00000001, 0x00000001},
[]u32{0x00000001, 0x00000002, 0x00000000},
[]u32{0x00000001, 0x00000003, 0x00000000},
[]u32{0x00000001, 0x00000010, 0x00000000},
[]u32{0x00000001, 0x078644FA, 0x00000000},
[]u32{0x00000001, 0x0747AE14, 0x00000000},
[]u32{0x00000001, 0x7FFFFFFF, 0x00000000},
[]u32{0x00000001, 0x80000000, 0x00000000},
[]u32{0x00000001, 0xFFFFFFFD, 0x00000000},
[]u32{0x00000001, 0xFFFFFFFE, 0x00000000},
[]u32{0x00000001, 0xFFFFFFFF, 0x00000000},
[]u32{0x00000002, 0x00000001, 0x00000002},
[]u32{0x00000002, 0x00000002, 0x00000001},
[]u32{0x00000002, 0x00000003, 0x00000000},
[]u32{0x00000002, 0x00000010, 0x00000000},
[]u32{0x00000002, 0x078644FA, 0x00000000},
[]u32{0x00000002, 0x0747AE14, 0x00000000},
[]u32{0x00000002, 0x7FFFFFFF, 0x00000000},
[]u32{0x00000002, 0x80000000, 0x00000000},
[]u32{0x00000002, 0xFFFFFFFD, 0x00000000},
[]u32{0x00000002, 0xFFFFFFFE, 0x00000000},
[]u32{0x00000002, 0xFFFFFFFF, 0x00000000},
[]u32{0x00000003, 0x00000001, 0x00000003},
[]u32{0x00000003, 0x00000002, 0x00000001},
[]u32{0x00000003, 0x00000003, 0x00000001},
[]u32{0x00000003, 0x00000010, 0x00000000},
[]u32{0x00000003, 0x078644FA, 0x00000000},
[]u32{0x00000003, 0x0747AE14, 0x00000000},
[]u32{0x00000003, 0x7FFFFFFF, 0x00000000},
[]u32{0x00000003, 0x80000000, 0x00000000},
[]u32{0x00000003, 0xFFFFFFFD, 0x00000000},
[]u32{0x00000003, 0xFFFFFFFE, 0x00000000},
[]u32{0x00000003, 0xFFFFFFFF, 0x00000000},
[]u32{0x00000010, 0x00000001, 0x00000010},
[]u32{0x00000010, 0x00000002, 0x00000008},
[]u32{0x00000010, 0x00000003, 0x00000005},
[]u32{0x00000010, 0x00000010, 0x00000001},
[]u32{0x00000010, 0x078644FA, 0x00000000},
[]u32{0x00000010, 0x0747AE14, 0x00000000},
[]u32{0x00000010, 0x7FFFFFFF, 0x00000000},
[]u32{0x00000010, 0x80000000, 0x00000000},
[]u32{0x00000010, 0xFFFFFFFD, 0x00000000},
[]u32{0x00000010, 0xFFFFFFFE, 0x00000000},
[]u32{0x00000010, 0xFFFFFFFF, 0x00000000},
[]u32{0x078644FA, 0x00000001, 0x078644FA},
[]u32{0x078644FA, 0x00000002, 0x03C3227D},
[]u32{0x078644FA, 0x00000003, 0x028216FE},
[]u32{0x078644FA, 0x00000010, 0x0078644F},
[]u32{0x078644FA, 0x078644FA, 0x00000001},
[]u32{0x078644FA, 0x0747AE14, 0x00000001},
[]u32{0x078644FA, 0x7FFFFFFF, 0x00000000},
[]u32{0x078644FA, 0x80000000, 0x00000000},
[]u32{0x078644FA, 0xFFFFFFFD, 0x00000000},
[]u32{0x078644FA, 0xFFFFFFFE, 0x00000000},
[]u32{0x078644FA, 0xFFFFFFFF, 0x00000000},
[]u32{0x0747AE14, 0x00000001, 0x0747AE14},
[]u32{0x0747AE14, 0x00000002, 0x03A3D70A},
[]u32{0x0747AE14, 0x00000003, 0x026D3A06},
[]u32{0x0747AE14, 0x00000010, 0x00747AE1},
[]u32{0x0747AE14, 0x078644FA, 0x00000000},
[]u32{0x0747AE14, 0x0747AE14, 0x00000001},
[]u32{0x0747AE14, 0x7FFFFFFF, 0x00000000},
[]u32{0x0747AE14, 0x80000000, 0x00000000},
[]u32{0x0747AE14, 0xFFFFFFFD, 0x00000000},
[]u32{0x0747AE14, 0xFFFFFFFE, 0x00000000},
[]u32{0x0747AE14, 0xFFFFFFFF, 0x00000000},
[]u32{0x7FFFFFFF, 0x00000001, 0x7FFFFFFF},
[]u32{0x7FFFFFFF, 0x00000002, 0x3FFFFFFF},
[]u32{0x7FFFFFFF, 0x00000003, 0x2AAAAAAA},
[]u32{0x7FFFFFFF, 0x00000010, 0x07FFFFFF},
[]u32{0x7FFFFFFF, 0x078644FA, 0x00000011},
[]u32{0x7FFFFFFF, 0x0747AE14, 0x00000011},
[]u32{0x7FFFFFFF, 0x7FFFFFFF, 0x00000001},
[]u32{0x7FFFFFFF, 0x80000000, 0x00000000},
[]u32{0x7FFFFFFF, 0xFFFFFFFD, 0x00000000},
[]u32{0x7FFFFFFF, 0xFFFFFFFE, 0x00000000},
[]u32{0x7FFFFFFF, 0xFFFFFFFF, 0x00000000},
[]u32{0x80000000, 0x00000001, 0x80000000},
[]u32{0x80000000, 0x00000002, 0x40000000},
[]u32{0x80000000, 0x00000003, 0x2AAAAAAA},
[]u32{0x80000000, 0x00000010, 0x08000000},
[]u32{0x80000000, 0x078644FA, 0x00000011},
[]u32{0x80000000, 0x0747AE14, 0x00000011},
[]u32{0x80000000, 0x7FFFFFFF, 0x00000001},
[]u32{0x80000000, 0x80000000, 0x00000001},
[]u32{0x80000000, 0xFFFFFFFD, 0x00000000},
[]u32{0x80000000, 0xFFFFFFFE, 0x00000000},
[]u32{0x80000000, 0xFFFFFFFF, 0x00000000},
[]u32{0xFFFFFFFD, 0x00000001, 0xFFFFFFFD},
[]u32{0xFFFFFFFD, 0x00000002, 0x7FFFFFFE},
[]u32{0xFFFFFFFD, 0x00000003, 0x55555554},
[]u32{0xFFFFFFFD, 0x00000010, 0x0FFFFFFF},
[]u32{0xFFFFFFFD, 0x078644FA, 0x00000022},
[]u32{0xFFFFFFFD, 0x0747AE14, 0x00000023},
[]u32{0xFFFFFFFD, 0x7FFFFFFF, 0x00000001},
[]u32{0xFFFFFFFD, 0x80000000, 0x00000001},
[]u32{0xFFFFFFFD, 0xFFFFFFFD, 0x00000001},
[]u32{0xFFFFFFFD, 0xFFFFFFFE, 0x00000000},
[]u32{0xFFFFFFFD, 0xFFFFFFFF, 0x00000000},
[]u32{0xFFFFFFFE, 0x00000001, 0xFFFFFFFE},
[]u32{0xFFFFFFFE, 0x00000002, 0x7FFFFFFF},
[]u32{0xFFFFFFFE, 0x00000003, 0x55555554},
[]u32{0xFFFFFFFE, 0x00000010, 0x0FFFFFFF},
[]u32{0xFFFFFFFE, 0x078644FA, 0x00000022},
[]u32{0xFFFFFFFE, 0x0747AE14, 0x00000023},
[]u32{0xFFFFFFFE, 0x7FFFFFFF, 0x00000002},
[]u32{0xFFFFFFFE, 0x80000000, 0x00000001},
[]u32{0xFFFFFFFE, 0xFFFFFFFD, 0x00000001},
[]u32{0xFFFFFFFE, 0xFFFFFFFE, 0x00000001},
[]u32{0xFFFFFFFE, 0xFFFFFFFF, 0x00000000},
[]u32{0xFFFFFFFF, 0x00000001, 0xFFFFFFFF},
[]u32{0xFFFFFFFF, 0x00000002, 0x7FFFFFFF},
[]u32{0xFFFFFFFF, 0x00000003, 0x55555555},
[]u32{0xFFFFFFFF, 0x00000010, 0x0FFFFFFF},
[]u32{0xFFFFFFFF, 0x078644FA, 0x00000022},
[]u32{0xFFFFFFFF, 0x0747AE14, 0x00000023},
[]u32{0xFFFFFFFF, 0x7FFFFFFF, 0x00000002},
[]u32{0xFFFFFFFF, 0x80000000, 0x00000001},
[]u32{0xFFFFFFFF, 0xFFFFFFFD, 0x00000001},
[]u32{0xFFFFFFFF, 0xFFFFFFFE, 0x00000001},
[]u32{0xFFFFFFFF, 0xFFFFFFFF, 0x00000001},
};
for (cases) |case| {
@ -514,8 +288,8 @@ test "test_udivsi3" {
}
}
fn test_one_udivsi3(a: su_int, b: su_int, expected_q: su_int) {
const q: su_int = __udivsi3(a, b);
fn test_one_udivsi3(a: u32, b: u32, expected_q: u32) {
const q: u32 = __udivsi3(a, b);
assert(q == expected_q);
}

View File

@ -0,0 +1,193 @@
const builtin = @import("builtin");
const is_test = builtin.is_test;
const low = if (builtin.is_big_endian) 1 else 0;
const high = 1 - low;
pub fn udivmod(comptime DoubleInt: type, a: DoubleInt, b: DoubleInt, maybe_rem: ?&DoubleInt) -> DoubleInt {
@setDebugSafety(this, is_test);
const SingleInt = @IntType(false, @divExact(DoubleInt.bit_count, 2));
const SignedDoubleInt = @IntType(true, DoubleInt.bit_count);
const n = *@ptrCast(&[2]SingleInt, &a); // TODO issue #421
const d = *@ptrCast(&[2]SingleInt, &b); // TODO issue #421
var q: [2]SingleInt = undefined;
var r: [2]SingleInt = undefined;
var sr: c_uint = undefined;
// special cases, X is unknown, K != 0
if (n[high] == 0) {
if (d[high] == 0) {
// 0 X
// ---
// 0 X
if (maybe_rem) |rem| {
*rem = n[low] % d[low];
}
return n[low] / d[low];
}
// 0 X
// ---
// K X
if (maybe_rem) |rem| {
*rem = n[low];
}
return 0;
}
// n[high] != 0
if (d[low] == 0) {
if (d[high] == 0) {
// K X
// ---
// 0 0
if (maybe_rem) |rem| {
*rem = n[high] % d[low];
}
return n[high] / d[low];
}
// d[high] != 0
if (n[low] == 0) {
// K 0
// ---
// K 0
if (maybe_rem) |rem| {
r[high] = n[high] % d[high];
r[low] = 0;
*rem = *@ptrCast(&DoubleInt, &r[0]); // TODO issue #421
}
return n[high] / d[high];
}
// K K
// ---
// K 0
if ((d[high] & (d[high] - 1)) == 0) {
// d is a power of 2
if (maybe_rem) |rem| {
r[low] = n[low];
r[high] = n[high] & (d[high] - 1);
*rem = *@ptrCast(&DoubleInt, &r[0]); // TODO issue #421
}
return n[high] >> @ctz(d[high]);
}
// K K
// ---
// K 0
sr = @bitCast(c_uint, c_int(@clz(d[high])) - c_int(@clz(n[high])));
// 0 <= sr <= SingleInt.bit_count - 2 or sr large
if (sr > SingleInt.bit_count - 2) {
if (maybe_rem) |rem| {
*rem = a;
}
return 0;
}
sr += 1;
// 1 <= sr <= SingleInt.bit_count - 1
// q.all = a << (DoubleInt.bit_count - sr);
q[low] = 0;
q[high] = n[low] << (SingleInt.bit_count - sr);
// r.all = a >> sr;
r[high] = n[high] >> sr;
r[low] = (n[high] << (SingleInt.bit_count - sr)) | (n[low] >> sr);
} else {
// d[low] != 0
if (d[high] == 0) {
// K X
// ---
// 0 K
if ((d[low] & (d[low] - 1)) == 0) {
// d is a power of 2
if (maybe_rem) |rem| {
*rem = n[low] & (d[low] - 1);
}
if (d[low] == 1) {
return a;
}
sr = @ctz(d[low]);
q[high] = n[high] >> sr;
q[low] = (n[high] << (SingleInt.bit_count - sr)) | (n[low] >> sr);
return *@ptrCast(&DoubleInt, &q[0]); // TODO issue #421
}
// K X
// ---
// 0 K
sr = 1 + SingleInt.bit_count + c_uint(@clz(d[low])) - c_uint(@clz(n[high]));
// 2 <= sr <= DoubleInt.bit_count - 1
// q.all = a << (DoubleInt.bit_count - sr);
// r.all = a >> sr;
if (sr == SingleInt.bit_count) {
q[low] = 0;
q[high] = n[low];
r[high] = 0;
r[low] = n[high];
} else if (sr < SingleInt.bit_count) {
// 2 <= sr <= SingleInt.bit_count - 1
q[low] = 0;
q[high] = n[low] << (SingleInt.bit_count - sr);
r[high] = n[high] >> sr;
r[low] = (n[high] << (SingleInt.bit_count - sr)) | (n[low] >> sr);
} else {
// SingleInt.bit_count + 1 <= sr <= DoubleInt.bit_count - 1
q[low] = n[low] << (DoubleInt.bit_count - sr);
q[high] = (n[high] << (DoubleInt.bit_count - sr)) | (n[low] >> (sr - SingleInt.bit_count));
r[high] = 0;
r[low] = n[high] >> (sr - SingleInt.bit_count);
}
} else {
// K X
// ---
// K K
sr = @bitCast(c_uint, c_int(@clz(d[high])) - c_int(@clz(n[high])));
// 0 <= sr <= SingleInt.bit_count - 1 or sr large
if (sr > SingleInt.bit_count - 1) {
if (maybe_rem) |rem| {
*rem = a;
}
return 0;
}
sr += 1;
// 1 <= sr <= SingleInt.bit_count
// q.all = a << (DoubleInt.bit_count - sr);
// r.all = a >> sr;
q[low] = 0;
if (sr == SingleInt.bit_count) {
q[high] = n[low];
r[high] = 0;
r[low] = n[high];
} else {
r[high] = n[high] >> sr;
r[low] = (n[high] << (SingleInt.bit_count - sr)) | (n[low] >> sr);
q[high] = n[low] << (SingleInt.bit_count - sr);
}
}
}
// Not a special case
// q and r are initialized with:
// q.all = a << (DoubleInt.bit_count - sr);
// r.all = a >> sr;
// 1 <= sr <= DoubleInt.bit_count - 1
var carry: u32 = 0;
var r_all: DoubleInt = undefined;
while (sr > 0) : (sr -= 1) {
// r:q = ((r:q) << 1) | carry
r[high] = (r[high] << 1) | (r[low] >> (SingleInt.bit_count - 1));
r[low] = (r[low] << 1) | (q[high] >> (SingleInt.bit_count - 1));
q[high] = (q[high] << 1) | (q[low] >> (SingleInt.bit_count - 1));
q[low] = (q[low] << 1) | carry;
// carry = 0;
// if (r.all >= b)
// {
// r.all -= b;
// carry = 1;
// }
r_all = *@ptrCast(&DoubleInt, &r[0]); // TODO issue #421
const s: SignedDoubleInt = SignedDoubleInt(b -% r_all -% 1) >> (DoubleInt.bit_count - 1);
carry = u32(s & 1);
r_all -= b & @bitCast(DoubleInt, s);
r = *@ptrCast(&[2]SingleInt, &r_all); // TODO issue #421
}
const q_all = ((*@ptrCast(&DoubleInt, &q[0])) << 1) | carry; // TODO issue #421
if (maybe_rem) |rem| {
*rem = r_all;
}
return q_all;
}

View File

@ -0,0 +1,9 @@
const udivmod = @import("udivmod.zig").udivmod;
export fn __udivmoddi4(a: u64, b: u64, maybe_rem: ?&u64) -> u64 {
return udivmod(u64, a, b, maybe_rem);
}
test "import udivmoddi4" {
_ = @import("udivmoddi4_test.zig");
}

File diff suppressed because it is too large Load Diff

View File

@ -1,195 +1,7 @@
const builtin = @import("builtin");
const is_test = builtin.is_test;
const low = if (builtin.is_big_endian) 1 else 0;
const high = 1 - low;
const udivmod = @import("udivmod.zig").udivmod;
export fn __udivmodti4(a: u128, b: u128, maybe_rem: ?&u128) -> u128 {
@setDebugSafety(this, is_test);
const n_udword_bits = u64.bit_count;
const n_utword_bits = u128.bit_count;
const n = *@ptrCast(&[2]u64, &a); // TODO issue #421
const d = *@ptrCast(&[2]u64, &b); // TODO issue #421
var q: [2]u64 = undefined;
var r: [2]u64 = undefined;
var sr: c_uint = undefined;
// special cases, X is unknown, K != 0
if (n[high] == 0) {
if (d[high] == 0) {
// 0 X
// ---
// 0 X
if (maybe_rem) |rem| {
*rem = n[low] % d[low];
}
return n[low] / d[low];
}
// 0 X
// ---
// K X
if (maybe_rem) |rem| {
*rem = n[low];
}
return 0;
}
// n[high] != 0
if (d[low] == 0) {
if (d[high] == 0) {
// K X
// ---
// 0 0
if (maybe_rem) |rem| {
*rem = n[high] % d[low];
}
return n[high] / d[low];
}
// d[high] != 0 */
if (n[low] == 0) {
// K 0
// ---
// K 0
if (maybe_rem) |rem| {
r[high] = n[high] % d[high];
r[low] = 0;
*rem = *@ptrCast(&u128, &r[0]); // TODO issue #421
}
return n[high] / d[high];
}
// K K
// ---
// K 0
if ((d[high] & (d[high] - 1)) == 0) {
// d is a power of 2
if (maybe_rem) |rem| {
r[low] = n[low];
r[high] = n[high] & (d[high] - 1);
*rem = *@ptrCast(&u128, &r[0]); // TODO issue #421
}
return n[high] >> @ctz(d[high]);
}
// K K
// ---
// K 0
sr = @bitCast(c_uint, c_int(@clz(d[high])) - c_int(@clz(n[high])));
// 0 <= sr <= n_udword_bits - 2 or sr large
if (sr > n_udword_bits - 2) {
if (maybe_rem) |rem| {
*rem = a;
}
return 0;
}
sr += 1;
// 1 <= sr <= n_udword_bits - 1
// q.all = a << (n_utword_bits - sr);
q[low] = 0;
q[high] = n[low] << (n_udword_bits - sr);
// r.all = a >> sr;
r[high] = n[high] >> sr;
r[low] = (n[high] << (n_udword_bits - sr)) | (n[low] >> sr);
} else {
// d[low] != 0
if (d[high] == 0) {
// K X
// ---
// 0 K
if ((d[low] & (d[low] - 1)) == 0) {
// if d is a power of 2
if (maybe_rem) |rem| {
*rem = n[low] & (d[low] - 1);
}
if (d[low] == 1)
return a;
sr = @ctz(d[low]);
q[high] = n[high] >> sr;
q[low] = (n[high] << (n_udword_bits - sr)) | (n[low] >> sr);
return *@ptrCast(&u128, &q[0]); // TODO issue #421
}
// K X
// ---
// 0 K
sr = 1 + n_udword_bits + c_uint(@clz(d[low]))
- c_uint(@clz(n[high]));
// 2 <= sr <= n_utword_bits - 1
// q.all = a << (n_utword_bits - sr);
// r.all = a >> sr;
if (sr == n_udword_bits) {
q[low] = 0;
q[high] = n[low];
r[high] = 0;
r[low] = n[high];
} else if (sr < n_udword_bits) {
// 2 <= sr <= n_udword_bits - 1
q[low] = 0;
q[high] = n[low] << (n_udword_bits - sr);
r[high] = n[high] >> sr;
r[low] = (n[high] << (n_udword_bits - sr)) | (n[low] >> sr);
} else {
// n_udword_bits + 1 <= sr <= n_utword_bits - 1
q[low] = n[low] << (n_utword_bits - sr);
q[high] = (n[high] << (n_utword_bits - sr)) |
(n[low] >> (sr - n_udword_bits));
r[high] = 0;
r[low] = n[high] >> (sr - n_udword_bits);
}
} else {
// K X
// ---
// K K
sr = @bitCast(c_uint, c_int(@clz(d[high])) - c_int(@clz(n[high])));
// 0 <= sr <= n_udword_bits - 1 or sr large
if (sr > n_udword_bits - 1) {
if (maybe_rem) |rem| {
*rem = a;
}
return 0;
}
sr += 1;
// 1 <= sr <= n_udword_bits
// q.all = a << (n_utword_bits - sr);
// r.all = a >> sr;
q[low] = 0;
if (sr == n_udword_bits) {
q[high] = n[low];
r[high] = 0;
r[low] = n[high];
} else {
r[high] = n[high] >> sr;
r[low] = (n[high] << (n_udword_bits - sr)) | (n[low] >> sr);
q[high] = n[low] << (n_udword_bits - sr);
}
}
}
// Not a special case
// q and r are initialized with:
// q.all = a << (n_utword_bits - sr);
// r.all = a >> sr;
// 1 <= sr <= n_utword_bits - 1
var carry: u32 = 0;
var r_all: u128 = undefined;
while (sr > 0) : (sr -= 1) {
// r:q = ((r:q) << 1) | carry
r[high] = (r[high] << 1) | (r[low] >> (n_udword_bits - 1));
r[low] = (r[low] << 1) | (q[high] >> (n_udword_bits - 1));
q[high] = (q[high] << 1) | (q[low] >> (n_udword_bits - 1));
q[low] = (q[low] << 1) | carry;
// carry = 0;
// if (r.all >= b)
// {
// r.all -= b;
// carry = 1;
// }
r_all = *@ptrCast(&u128, &r[0]); // TODO issue #421
const s: i128 = i128(b -% r_all -% 1) >> (n_utword_bits - 1);
carry = u32(s & 1);
r_all -= b & @bitCast(u128, s);
r = *@ptrCast(&[2]u64, &r_all); // TODO issue #421
}
const q_all = ((*@ptrCast(&u128, &q[0])) << 1) | carry; // TODO issue #421
if (maybe_rem) |rem| {
*rem = r_all;
}
return q_all;
return udivmod(u128, a, b, maybe_rem);
}
test "import udivmodti4" {