implement bigint div and rem

See #405
This commit is contained in:
Andrew Kelley 2018-01-15 22:17:22 -05:00
parent 92fc5947fc
commit 84d8584c5b
3 changed files with 458 additions and 11 deletions

View File

@ -125,17 +125,20 @@ libc. Create demo games using Zig.
##### POSIX
* gcc >= 5.0.0 or clang >= 3.6.0
* cmake >= 2.8.5
* gcc >= 5.0.0 or clang >= 3.6.0
* LLVM, Clang, LLD libraries == 5.x, compiled with the same gcc or clang version above
##### Windows
* cmake >= 2.8.5
* Microsoft Visual Studio 2015
* LLVM, Clang, LLD libraries == 5.x, compiled with the same MSVC version above
#### Instructions
##### POSIX
If you have gcc or clang installed, you can find out what `ZIG_LIBC_LIB_DIR`,
`ZIG_LIBC_STATIC_LIB_DIR`, and `ZIG_LIBC_INCLUDE_DIR` should be set to
(example below).

View File

@ -12,6 +12,9 @@
#include "os.hpp"
#include "softfloat.hpp"
#include <limits>
#include <algorithm>
static void bigint_normalize(BigInt *dest) {
const uint64_t *digits = bigint_ptr(dest);
@ -539,7 +542,7 @@ void bigint_add(BigInt *dest, const BigInt *op1, const BigInt *op2) {
dest->data.digits[i] = x;
i += 1;
if (!found_digit)
if (!found_digit || i >= bigger_op->digit_count)
break;
}
assert(overflow == 0);
@ -670,19 +673,409 @@ void bigint_mul_wrap(BigInt *dest, const BigInt *op1, const BigInt *op2, size_t
bigint_truncate(dest, &unwrapped, bit_count, is_signed);
}
enum ZeroBehavior {
/// \brief The returned value is undefined.
ZB_Undefined,
/// \brief The returned value is numeric_limits<T>::max()
ZB_Max,
/// \brief The returned value is numeric_limits<T>::digits
ZB_Width
};
template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
static std::size_t count(T Val, ZeroBehavior) {
if (!Val)
return std::numeric_limits<T>::digits;
// Bisection method.
std::size_t ZeroBits = 0;
for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
T Tmp = Val >> Shift;
if (Tmp)
Val = Tmp;
else
ZeroBits |= Shift;
}
return ZeroBits;
}
};
#if __GNUC__ >= 4 || defined(_MSC_VER)
template <typename T> struct LeadingZerosCounter<T, 4> {
static std::size_t count(T Val, ZeroBehavior ZB) {
if (ZB != ZB_Undefined && Val == 0)
return 32;
#if defined(_MSC_VER)
unsigned long Index;
_BitScanReverse(&Index, Val);
return Index ^ 31;
#else
return __builtin_clz(Val);
#endif
}
};
#if !defined(_MSC_VER) || defined(_M_X64)
template <typename T> struct LeadingZerosCounter<T, 8> {
static std::size_t count(T Val, ZeroBehavior ZB) {
if (ZB != ZB_Undefined && Val == 0)
return 64;
#if defined(_MSC_VER)
unsigned long Index;
_BitScanReverse64(&Index, Val);
return Index ^ 63;
#else
return __builtin_clzll(Val);
#endif
}
};
#endif
#endif
/// \brief Count number of 0's from the most significant bit to the least
/// stopping at the first 1.
///
/// Only unsigned integral types are allowed.
///
/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
/// valid arguments.
template <typename T>
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
static_assert(std::numeric_limits<T>::is_integer &&
!std::numeric_limits<T>::is_signed,
"Only unsigned integral types are allowed.");
return LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
}
/// Make a 64-bit integer from a high / low pair of 32-bit integers.
constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
return ((uint64_t)High << 32) | (uint64_t)Low;
}
/// Return the high 32 bits of a 64 bit value.
constexpr inline uint32_t Hi_32(uint64_t Value) {
return static_cast<uint32_t>(Value >> 32);
}
/// Return the low 32 bits of a 64 bit value.
constexpr inline uint32_t Lo_32(uint64_t Value) {
return static_cast<uint32_t>(Value);
}
/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
/// variables here have the same names as in the algorithm. Comments explain
/// the algorithm and any deviation from it.
static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
unsigned m, unsigned n)
{
assert(u && "Must provide dividend");
assert(v && "Must provide divisor");
assert(q && "Must provide quotient");
assert(u != v && u != q && v != q && "Must use different memory");
assert(n>1 && "n must be > 1");
// b denotes the base of the number system. In our case b is 2^32.
const uint64_t b = uint64_t(1) << 32;
// D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
// u and v by d. Note that we have taken Knuth's advice here to use a power
// of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
// 2 allows us to shift instead of multiply and it is easy to determine the
// shift amount from the leading zeros. We are basically normalizing the u
// and v so that its high bits are shifted to the top of v's range without
// overflow. Note that this can require an extra word in u so that u must
// be of length m+n+1.
unsigned shift = countLeadingZeros(v[n-1]);
uint32_t v_carry = 0;
uint32_t u_carry = 0;
if (shift) {
for (unsigned i = 0; i < m+n; ++i) {
uint32_t u_tmp = u[i] >> (32 - shift);
u[i] = (u[i] << shift) | u_carry;
u_carry = u_tmp;
}
for (unsigned i = 0; i < n; ++i) {
uint32_t v_tmp = v[i] >> (32 - shift);
v[i] = (v[i] << shift) | v_carry;
v_carry = v_tmp;
}
}
u[m+n] = u_carry;
// D2. [Initialize j.] Set j to m. This is the loop counter over the places.
int j = m;
do {
// D3. [Calculate q'.].
// Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
// Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
// Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
// qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
// on v[n-2] determines at high speed most of the cases in which the trial
// value qp is one too large, and it eliminates all cases where qp is two
// too large.
uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
uint64_t qp = dividend / v[n-1];
uint64_t rp = dividend % v[n-1];
if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
qp--;
rp += v[n-1];
if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
qp--;
}
// D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
// (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
// consists of a simple multiplication by a one-place number, combined with
// a subtraction.
// The digits (u[j+n]...u[j]) should be kept positive; if the result of
// this step is actually negative, (u[j+n]...u[j]) should be left as the
// true value plus b**(n+1), namely as the b's complement of
// the true value, and a "borrow" to the left should be remembered.
int64_t borrow = 0;
for (unsigned i = 0; i < n; ++i) {
uint64_t p = uint64_t(qp) * uint64_t(v[i]);
int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
u[j+i] = Lo_32(subres);
borrow = Hi_32(p) - Hi_32(subres);
}
bool isNeg = u[j+n] < borrow;
u[j+n] -= Lo_32(borrow);
// D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
// negative, go to step D6; otherwise go on to step D7.
q[j] = Lo_32(qp);
if (isNeg) {
// D6. [Add back]. The probability that this step is necessary is very
// small, on the order of only 2/b. Make sure that test data accounts for
// this possibility. Decrease q[j] by 1
q[j]--;
// and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
// A carry will occur to the left of u[j+n], and it should be ignored
// since it cancels with the borrow that occurred in D4.
bool carry = false;
for (unsigned i = 0; i < n; i++) {
uint32_t limit = std::min(u[j+i],v[i]);
u[j+i] += v[i] + carry;
carry = u[j+i] < limit || (carry && u[j+i] == limit);
}
u[j+n] += carry;
}
// D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
} while (--j >= 0);
// D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
// remainder may be obtained by dividing u[...] by d. If r is non-null we
// compute the remainder (urem uses this).
if (r) {
// The value d is expressed by the "shift" value above since we avoided
// multiplication by d by using a shift left. So, all we have to do is
// shift right here.
if (shift) {
uint32_t carry = 0;
for (int i = n-1; i >= 0; i--) {
r[i] = (u[i] >> shift) | carry;
carry = u[i] << (32 - shift);
}
} else {
for (int i = n-1; i >= 0; i--) {
r[i] = u[i];
}
}
}
}
// Implementation ported from LLVM/lib/Support/APInt.cpp
static void bigint_unsigned_division(const BigInt *op1, const BigInt *op2, BigInt *Quotient, BigInt *Remainder) {
Cmp cmp = bigint_cmp(op1, op2);
if (cmp == CmpLT) {
if (Quotient != nullptr) {
bigint_init_unsigned(Quotient, 0);
}
if (Remainder != nullptr) {
bigint_init_bigint(Remainder, op1);
}
return;
}
if (cmp == CmpEQ) {
if (Quotient != nullptr) {
bigint_init_unsigned(Quotient, 1);
}
if (Remainder != nullptr) {
bigint_init_unsigned(Remainder, 0);
}
return;
}
const uint64_t *LHS = bigint_ptr(op1);
const uint64_t *RHS = bigint_ptr(op2);
unsigned lhsWords = op1->digit_count;
unsigned rhsWords = op2->digit_count;
// First, compose the values into an array of 32-bit words instead of
// 64-bit words. This is a necessity of both the "short division" algorithm
// and the Knuth "classical algorithm" which requires there to be native
// operations for +, -, and * on an m bit value with an m*2 bit result. We
// can't use 64-bit operands here because we don't have native results of
// 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
// work on large-endian machines.
unsigned n = rhsWords * 2;
unsigned m = (lhsWords * 2) - n;
// Allocate space for the temporary values we need either on the stack, if
// it will fit, or on the heap if it won't.
uint32_t SPACE[128];
uint32_t *U = nullptr;
uint32_t *V = nullptr;
uint32_t *Q = nullptr;
uint32_t *R = nullptr;
if ((Remainder?4:3)*n+2*m+1 <= 128) {
U = &SPACE[0];
V = &SPACE[m+n+1];
Q = &SPACE[(m+n+1) + n];
if (Remainder)
R = &SPACE[(m+n+1) + n + (m+n)];
} else {
U = new uint32_t[m + n + 1];
V = new uint32_t[n];
Q = new uint32_t[m+n];
if (Remainder)
R = new uint32_t[n];
}
// Initialize the dividend
memset(U, 0, (m+n+1)*sizeof(uint32_t));
for (unsigned i = 0; i < lhsWords; ++i) {
uint64_t tmp = LHS[i];
U[i * 2] = Lo_32(tmp);
U[i * 2 + 1] = Hi_32(tmp);
}
U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
// Initialize the divisor
memset(V, 0, (n)*sizeof(uint32_t));
for (unsigned i = 0; i < rhsWords; ++i) {
uint64_t tmp = RHS[i];
V[i * 2] = Lo_32(tmp);
V[i * 2 + 1] = Hi_32(tmp);
}
// initialize the quotient and remainder
memset(Q, 0, (m+n) * sizeof(uint32_t));
if (Remainder)
memset(R, 0, n * sizeof(uint32_t));
// Now, adjust m and n for the Knuth division. n is the number of words in
// the divisor. m is the number of words by which the dividend exceeds the
// divisor (i.e. m+n is the length of the dividend). These sizes must not
// contain any zero words or the Knuth algorithm fails.
for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
n--;
m++;
}
for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
m--;
// If we're left with only a single word for the divisor, Knuth doesn't work
// so we implement the short division algorithm here. This is much simpler
// and faster because we are certain that we can divide a 64-bit quantity
// by a 32-bit quantity at hardware speed and short division is simply a
// series of such operations. This is just like doing short division but we
// are using base 2^32 instead of base 10.
assert(n != 0 && "Divide by zero?");
if (n == 1) {
uint32_t divisor = V[0];
uint32_t remainder = 0;
for (int i = m; i >= 0; i--) {
uint64_t partial_dividend = Make_64(remainder, U[i]);
if (partial_dividend == 0) {
Q[i] = 0;
remainder = 0;
} else if (partial_dividend < divisor) {
Q[i] = 0;
remainder = Lo_32(partial_dividend);
} else if (partial_dividend == divisor) {
Q[i] = 1;
remainder = 0;
} else {
Q[i] = Lo_32(partial_dividend / divisor);
remainder = Lo_32(partial_dividend - (Q[i] * divisor));
}
}
if (R)
R[0] = remainder;
} else {
// Now we're ready to invoke the Knuth classical divide algorithm. In this
// case n > 1.
KnuthDiv(U, V, Q, R, m, n);
}
// If the caller wants the quotient
if (Quotient) {
Quotient->digit_count = lhsWords;
Quotient->data.digits = allocate<uint64_t>(lhsWords);
Quotient->is_negative = false;
for (size_t i = 0; i < lhsWords; i += 1) {
Quotient->data.digits[i] = Make_64(Q[i*2+1], Q[i*2]);
}
}
// If the caller wants the remainder
if (Remainder) {
Remainder->digit_count = rhsWords;
Remainder->data.digits = allocate<uint64_t>(rhsWords);
Remainder->is_negative = false;
for (size_t i = 0; i < rhsWords; i += 1) {
Remainder->data.digits[i] = Make_64(R[i*2+1], R[i*2]);
}
}
}
void bigint_div_trunc(BigInt *dest, const BigInt *op1, const BigInt *op2) {
assert(op2->digit_count != 0); // division by zero
if (op1->digit_count == 0) {
bigint_init_unsigned(dest, 0);
return;
}
if (op1->digit_count != 1 || op2->digit_count != 1) {
zig_panic("TODO bigint div_trunc with >1 digits");
}
const uint64_t *op1_digits = bigint_ptr(op1);
const uint64_t *op2_digits = bigint_ptr(op2);
dest->data.digit = op1_digits[0] / op2_digits[0];
dest->digit_count = 1;
if (op1->digit_count == 1 && op2->digit_count == 1) {
dest->data.digit = op1_digits[0] / op2_digits[0];
dest->digit_count = 1;
dest->is_negative = op1->is_negative != op2->is_negative;
bigint_normalize(dest);
return;
}
if (op2->digit_count == 1 && op2_digits[0] == 1) {
// X / 1 == X
bigint_init_bigint(dest, op1);
dest->is_negative = op1->is_negative != op2->is_negative;
bigint_normalize(dest);
return;
}
const BigInt *op1_positive;
BigInt op1_positive_data;
if (op1->is_negative) {
bigint_negate(&op1_positive_data, op1);
op1_positive = &op1_positive_data;
} else {
op1_positive = op1;
}
const BigInt *op2_positive;
BigInt op2_positive_data;
if (op2->is_negative) {
bigint_negate(&op2_positive_data, op2);
op2_positive = &op2_positive_data;
} else {
op2_positive = op2;
}
bigint_unsigned_division(op1_positive, op2_positive, dest, nullptr);
dest->is_negative = op1->is_negative != op2->is_negative;
bigint_normalize(dest);
}
@ -714,6 +1107,14 @@ void bigint_rem(BigInt *dest, const BigInt *op1, const BigInt *op2) {
}
const uint64_t *op1_digits = bigint_ptr(op1);
const uint64_t *op2_digits = bigint_ptr(op2);
if (op1->digit_count == 1 && op2->digit_count == 1) {
dest->data.digit = op1_digits[0] % op2_digits[0];
dest->digit_count = 1;
dest->is_negative = op1->is_negative;
bigint_normalize(dest);
return;
}
if (op2->digit_count == 2 && op2_digits[0] == 0 && op2_digits[1] == 1) {
// special case this divisor
bigint_init_unsigned(dest, op1_digits[0]);
@ -721,11 +1122,32 @@ void bigint_rem(BigInt *dest, const BigInt *op1, const BigInt *op2) {
bigint_normalize(dest);
return;
}
if (op1->digit_count != 1 || op2->digit_count != 1) {
zig_panic("TODO bigint rem with >1 digits");
if (op2->digit_count == 1 && op2_digits[0] == 1) {
// X % 1 == 0
bigint_init_unsigned(dest, 0);
return;
}
dest->data.digit = op1_digits[0] % op2_digits[0];
dest->digit_count = 1;
const BigInt *op1_positive;
BigInt op1_positive_data;
if (op1->is_negative) {
bigint_negate(&op1_positive_data, op1);
op1_positive = &op1_positive_data;
} else {
op1_positive = op1;
}
const BigInt *op2_positive;
BigInt op2_positive_data;
if (op2->is_negative) {
bigint_negate(&op2_positive_data, op2);
op2_positive = &op2_positive_data;
} else {
op2_positive = op2;
}
bigint_unsigned_division(op1_positive, op2_positive, nullptr, dest);
dest->is_negative = op1->is_negative;
bigint_normalize(dest);
}

View File

@ -26,6 +26,28 @@ fn testDivision() {
assert(divTrunc(i32, -5, 3) == -1);
assert(divTrunc(f32, 5.0, 3.0) == 1.0);
assert(divTrunc(f32, -5.0, 3.0) == -1.0);
comptime {
assert(
1194735857077236777412821811143690633098347576 %
508740759824825164163191790951174292733114988 ==
177254337427586449086438229241342047632117600);
assert(@rem(-1194735857077236777412821811143690633098347576,
508740759824825164163191790951174292733114988) ==
-177254337427586449086438229241342047632117600);
assert(1194735857077236777412821811143690633098347576 /
508740759824825164163191790951174292733114988 ==
2);
assert(@divTrunc(-1194735857077236777412821811143690633098347576,
508740759824825164163191790951174292733114988) ==
-2);
assert(@divTrunc(1194735857077236777412821811143690633098347576,
-508740759824825164163191790951174292733114988) ==
-2);
assert(@divTrunc(-1194735857077236777412821811143690633098347576,
-508740759824825164163191790951174292733114988) ==
2);
}
}
fn div(comptime T: type, a: T, b: T) -> T {
return a / b;