implement bigint div and rem

See #405
2018-01-15 22:17:22 -05:00 · 2018-01-15 22:17:22 -05:00 · 84d8584c5b
commit 84d8584c5b
parent 92fc5947fc
3 changed files with 458 additions and 11 deletions
--- a/README.md
+++ b/README.md
@ -125,17 +125,20 @@ libc. Create demo games using Zig.

 ##### POSIX

- * gcc >= 5.0.0 or clang >= 3.6.0
 * cmake >= 2.8.5
+ * gcc >= 5.0.0 or clang >= 3.6.0
 * LLVM, Clang, LLD libraries == 5.x, compiled with the same gcc or clang version above

 ##### Windows

+ * cmake >= 2.8.5
 * Microsoft Visual Studio 2015
 * LLVM, Clang, LLD libraries == 5.x, compiled with the same MSVC version above

 #### Instructions

+##### POSIX
+
 If you have gcc or clang installed, you can find out what `ZIG_LIBC_LIB_DIR`,
 `ZIG_LIBC_STATIC_LIB_DIR`, and `ZIG_LIBC_INCLUDE_DIR` should be set to
 (example below).
--- a/src/bigint.cpp
+++ b/src/bigint.cpp
@ -12,6 +12,9 @@
 #include "os.hpp"
 #include "softfloat.hpp"

+#include <limits>
+#include <algorithm>
+
 static void bigint_normalize(BigInt *dest) {
    const uint64_t *digits = bigint_ptr(dest);

@ -539,7 +542,7 @@ void bigint_add(BigInt *dest, const BigInt *op1, const BigInt *op2) {
        dest->data.digits[i] = x;
        i += 1;

-        if (!found_digit)
+        if (!found_digit || i >= bigger_op->digit_count)
            break;
    }
    assert(overflow == 0);
@ -670,19 +673,409 @@ void bigint_mul_wrap(BigInt *dest, const BigInt *op1, const BigInt *op2, size_t
    bigint_truncate(dest, &unwrapped, bit_count, is_signed);
 }

+enum ZeroBehavior {
+  /// \brief The returned value is undefined.
+  ZB_Undefined,
+  /// \brief The returned value is numeric_limits<T>::max()
+  ZB_Max,
+  /// \brief The returned value is numeric_limits<T>::digits
+  ZB_Width
+};
+
+template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
+  static std::size_t count(T Val, ZeroBehavior) {
+    if (!Val)
+      return std::numeric_limits<T>::digits;
+
+    // Bisection method.
+    std::size_t ZeroBits = 0;
+    for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
+      T Tmp = Val >> Shift;
+      if (Tmp)
+        Val = Tmp;
+      else
+        ZeroBits |= Shift;
+    }
+    return ZeroBits;
+  }
+};
+
+#if __GNUC__ >= 4 || defined(_MSC_VER)
+template <typename T> struct LeadingZerosCounter<T, 4> {
+  static std::size_t count(T Val, ZeroBehavior ZB) {
+    if (ZB != ZB_Undefined && Val == 0)
+      return 32;
+
+#if defined(_MSC_VER)
+    unsigned long Index;
+    _BitScanReverse(&Index, Val);
+    return Index ^ 31;
+#else
+    return __builtin_clz(Val);
+#endif
+  }
+};
+
+#if !defined(_MSC_VER) || defined(_M_X64)
+template <typename T> struct LeadingZerosCounter<T, 8> {
+  static std::size_t count(T Val, ZeroBehavior ZB) {
+    if (ZB != ZB_Undefined && Val == 0)
+      return 64;
+
+#if defined(_MSC_VER)
+    unsigned long Index;
+    _BitScanReverse64(&Index, Val);
+    return Index ^ 63;
+#else
+    return __builtin_clzll(Val);
+#endif
+  }
+};
+#endif
+#endif
+
+/// \brief Count number of 0's from the most significant bit to the least
+///   stopping at the first 1.
+///
+/// Only unsigned integral types are allowed.
+///
+/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
+///   valid arguments.
+template <typename T>
+std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
+  static_assert(std::numeric_limits<T>::is_integer &&
+                    !std::numeric_limits<T>::is_signed,
+                "Only unsigned integral types are allowed.");
+  return LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
+}
+
+/// Make a 64-bit integer from a high / low pair of 32-bit integers.
+constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
+  return ((uint64_t)High << 32) | (uint64_t)Low;
+}
+
+/// Return the high 32 bits of a 64 bit value.
+constexpr inline uint32_t Hi_32(uint64_t Value) {
+  return static_cast<uint32_t>(Value >> 32);
+}
+
+/// Return the low 32 bits of a 64 bit value.
+constexpr inline uint32_t Lo_32(uint64_t Value) {
+  return static_cast<uint32_t>(Value);
+}
+
+/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
+/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
+/// variables here have the same names as in the algorithm. Comments explain
+/// the algorithm and any deviation from it.
+static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
+                     unsigned m, unsigned n)
+{
+    assert(u && "Must provide dividend");
+    assert(v && "Must provide divisor");
+    assert(q && "Must provide quotient");
+    assert(u != v && u != q && v != q && "Must use different memory");
+    assert(n>1 && "n must be > 1");
+
+    // b denotes the base of the number system. In our case b is 2^32.
+    const uint64_t b = uint64_t(1) << 32;
+
+    // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
+    // u and v by d. Note that we have taken Knuth's advice here to use a power
+    // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
+    // 2 allows us to shift instead of multiply and it is easy to determine the
+    // shift amount from the leading zeros.  We are basically normalizing the u
+    // and v so that its high bits are shifted to the top of v's range without
+    // overflow. Note that this can require an extra word in u so that u must
+    // be of length m+n+1.
+    unsigned shift = countLeadingZeros(v[n-1]);
+    uint32_t v_carry = 0;
+    uint32_t u_carry = 0;
+    if (shift) {
+        for (unsigned i = 0; i < m+n; ++i) {
+            uint32_t u_tmp = u[i] >> (32 - shift);
+            u[i] = (u[i] << shift) | u_carry;
+            u_carry = u_tmp;
+        }
+        for (unsigned i = 0; i < n; ++i) {
+            uint32_t v_tmp = v[i] >> (32 - shift);
+            v[i] = (v[i] << shift) | v_carry;
+            v_carry = v_tmp;
+        }
+    }
+    u[m+n] = u_carry;
+
+    // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
+    int j = m;
+    do {
+        // D3. [Calculate q'.].
+        //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
+        //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
+        // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
+        // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
+        // on v[n-2] determines at high speed most of the cases in which the trial
+        // value qp is one too large, and it eliminates all cases where qp is two
+        // too large.
+        uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
+        uint64_t qp = dividend / v[n-1];
+        uint64_t rp = dividend % v[n-1];
+        if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
+            qp--;
+            rp += v[n-1];
+            if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
+                qp--;
+        }
+
+        // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
+        // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
+        // consists of a simple multiplication by a one-place number, combined with
+        // a subtraction.
+        // The digits (u[j+n]...u[j]) should be kept positive; if the result of
+        // this step is actually negative, (u[j+n]...u[j]) should be left as the
+        // true value plus b**(n+1), namely as the b's complement of
+        // the true value, and a "borrow" to the left should be remembered.
+        int64_t borrow = 0;
+        for (unsigned i = 0; i < n; ++i) {
+            uint64_t p = uint64_t(qp) * uint64_t(v[i]);
+            int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
+            u[j+i] = Lo_32(subres);
+            borrow = Hi_32(p) - Hi_32(subres);
+        }
+        bool isNeg = u[j+n] < borrow;
+        u[j+n] -= Lo_32(borrow);
+
+        // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
+        // negative, go to step D6; otherwise go on to step D7.
+        q[j] = Lo_32(qp);
+        if (isNeg) {
+            // D6. [Add back]. The probability that this step is necessary is very
+            // small, on the order of only 2/b. Make sure that test data accounts for
+            // this possibility. Decrease q[j] by 1
+            q[j]--;
+            // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
+            // A carry will occur to the left of u[j+n], and it should be ignored
+            // since it cancels with the borrow that occurred in D4.
+            bool carry = false;
+            for (unsigned i = 0; i < n; i++) {
+                uint32_t limit = std::min(u[j+i],v[i]);
+                u[j+i] += v[i] + carry;
+                carry = u[j+i] < limit || (carry && u[j+i] == limit);
+            }
+            u[j+n] += carry;
+        }
+
+        // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
+    } while (--j >= 0);
+
+    // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
+    // remainder may be obtained by dividing u[...] by d. If r is non-null we
+    // compute the remainder (urem uses this).
+    if (r) {
+        // The value d is expressed by the "shift" value above since we avoided
+        // multiplication by d by using a shift left. So, all we have to do is
+        // shift right here.
+        if (shift) {
+            uint32_t carry = 0;
+            for (int i = n-1; i >= 0; i--) {
+                r[i] = (u[i] >> shift) | carry;
+                carry = u[i] << (32 - shift);
+            }
+        } else {
+            for (int i = n-1; i >= 0; i--) {
+                r[i] = u[i];
+            }
+        }
+    }
+}
+
+// Implementation ported from LLVM/lib/Support/APInt.cpp
+static void bigint_unsigned_division(const BigInt *op1, const BigInt *op2, BigInt *Quotient, BigInt *Remainder) {
+    Cmp cmp = bigint_cmp(op1, op2);
+    if (cmp == CmpLT) {
+        if (Quotient != nullptr) {
+            bigint_init_unsigned(Quotient, 0);
+        }
+        if (Remainder != nullptr) {
+            bigint_init_bigint(Remainder, op1);
+        }
+        return;
+    }
+    if (cmp == CmpEQ) {
+        if (Quotient != nullptr) {
+            bigint_init_unsigned(Quotient, 1);
+        }
+        if (Remainder != nullptr) {
+            bigint_init_unsigned(Remainder, 0);
+        }
+        return;
+    }
+
+    const uint64_t *LHS = bigint_ptr(op1);
+    const uint64_t *RHS = bigint_ptr(op2);
+    unsigned lhsWords = op1->digit_count;
+    unsigned rhsWords = op2->digit_count;
+
+    // First, compose the values into an array of 32-bit words instead of
+    // 64-bit words. This is a necessity of both the "short division" algorithm
+    // and the Knuth "classical algorithm" which requires there to be native
+    // operations for +, -, and * on an m bit value with an m*2 bit result. We
+    // can't use 64-bit operands here because we don't have native results of
+    // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
+    // work on large-endian machines.
+    unsigned n = rhsWords * 2;
+    unsigned m = (lhsWords * 2) - n;
+
+    // Allocate space for the temporary values we need either on the stack, if
+    // it will fit, or on the heap if it won't.
+    uint32_t SPACE[128];
+    uint32_t *U = nullptr;
+    uint32_t *V = nullptr;
+    uint32_t *Q = nullptr;
+    uint32_t *R = nullptr;
+    if ((Remainder?4:3)*n+2*m+1 <= 128) {
+        U = &SPACE[0];
+        V = &SPACE[m+n+1];
+        Q = &SPACE[(m+n+1) + n];
+        if (Remainder)
+            R = &SPACE[(m+n+1) + n + (m+n)];
+    } else {
+        U = new uint32_t[m + n + 1];
+        V = new uint32_t[n];
+        Q = new uint32_t[m+n];
+        if (Remainder)
+            R = new uint32_t[n];
+    }
+
+    // Initialize the dividend
+    memset(U, 0, (m+n+1)*sizeof(uint32_t));
+    for (unsigned i = 0; i < lhsWords; ++i) {
+        uint64_t tmp = LHS[i];
+        U[i * 2] = Lo_32(tmp);
+        U[i * 2 + 1] = Hi_32(tmp);
+    }
+    U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
+
+    // Initialize the divisor
+    memset(V, 0, (n)*sizeof(uint32_t));
+    for (unsigned i = 0; i < rhsWords; ++i) {
+        uint64_t tmp = RHS[i];
+        V[i * 2] = Lo_32(tmp);
+        V[i * 2 + 1] = Hi_32(tmp);
+    }
+
+    // initialize the quotient and remainder
+    memset(Q, 0, (m+n) * sizeof(uint32_t));
+    if (Remainder)
+        memset(R, 0, n * sizeof(uint32_t));
+
+    // Now, adjust m and n for the Knuth division. n is the number of words in
+    // the divisor. m is the number of words by which the dividend exceeds the
+    // divisor (i.e. m+n is the length of the dividend). These sizes must not
+    // contain any zero words or the Knuth algorithm fails.
+    for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
+        n--;
+        m++;
+    }
+    for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
+        m--;
+
+    // If we're left with only a single word for the divisor, Knuth doesn't work
+    // so we implement the short division algorithm here. This is much simpler
+    // and faster because we are certain that we can divide a 64-bit quantity
+    // by a 32-bit quantity at hardware speed and short division is simply a
+    // series of such operations. This is just like doing short division but we
+    // are using base 2^32 instead of base 10.
+    assert(n != 0 && "Divide by zero?");
+    if (n == 1) {
+        uint32_t divisor = V[0];
+        uint32_t remainder = 0;
+        for (int i = m; i >= 0; i--) {
+            uint64_t partial_dividend = Make_64(remainder, U[i]);
+            if (partial_dividend == 0) {
+                Q[i] = 0;
+                remainder = 0;
+            } else if (partial_dividend < divisor) {
+                Q[i] = 0;
+                remainder = Lo_32(partial_dividend);
+            } else if (partial_dividend == divisor) {
+                Q[i] = 1;
+                remainder = 0;
+            } else {
+                Q[i] = Lo_32(partial_dividend / divisor);
+                remainder = Lo_32(partial_dividend - (Q[i] * divisor));
+            }
+        }
+        if (R)
+            R[0] = remainder;
+    } else {
+        // Now we're ready to invoke the Knuth classical divide algorithm. In this
+        // case n > 1.
+        KnuthDiv(U, V, Q, R, m, n);
+    }
+
+    // If the caller wants the quotient
+    if (Quotient) {
+        Quotient->digit_count = lhsWords;
+        Quotient->data.digits = allocate<uint64_t>(lhsWords);
+        Quotient->is_negative = false;
+        for (size_t i = 0; i < lhsWords; i += 1) {
+            Quotient->data.digits[i] = Make_64(Q[i*2+1], Q[i*2]);
+        }
+    }
+
+    // If the caller wants the remainder
+    if (Remainder) {
+        Remainder->digit_count = rhsWords;
+        Remainder->data.digits = allocate<uint64_t>(rhsWords);
+        Remainder->is_negative = false;
+        for (size_t i = 0; i < rhsWords; i += 1) {
+            Remainder->data.digits[i] = Make_64(R[i*2+1], R[i*2]);
+        }
+    }
+}
+
 void bigint_div_trunc(BigInt *dest, const BigInt *op1, const BigInt *op2) {
    assert(op2->digit_count != 0); // division by zero
    if (op1->digit_count == 0) {
        bigint_init_unsigned(dest, 0);
        return;
    }
-    if (op1->digit_count != 1 || op2->digit_count != 1) {
-        zig_panic("TODO bigint div_trunc with >1 digits");
-    }
    const uint64_t *op1_digits = bigint_ptr(op1);
    const uint64_t *op2_digits = bigint_ptr(op2);
-    dest->data.digit = op1_digits[0] / op2_digits[0];
-    dest->digit_count = 1;
+    if (op1->digit_count == 1 && op2->digit_count == 1) {
+        dest->data.digit = op1_digits[0] / op2_digits[0];
+        dest->digit_count = 1;
+        dest->is_negative = op1->is_negative != op2->is_negative;
+        bigint_normalize(dest);
+        return;
+    }
+    if (op2->digit_count == 1 && op2_digits[0] == 1) {
+        // X / 1 == X
+        bigint_init_bigint(dest, op1);
+        dest->is_negative = op1->is_negative != op2->is_negative;
+        bigint_normalize(dest);
+        return;
+    }
+
+    const BigInt *op1_positive;
+    BigInt op1_positive_data;
+    if (op1->is_negative) {
+        bigint_negate(&op1_positive_data, op1);
+        op1_positive = &op1_positive_data;
+    } else {
+        op1_positive = op1;
+    }
+
+    const BigInt *op2_positive;
+    BigInt op2_positive_data;
+    if (op2->is_negative) {
+        bigint_negate(&op2_positive_data, op2);
+        op2_positive = &op2_positive_data;
+    } else {
+        op2_positive = op2;
+    }
+
+    bigint_unsigned_division(op1_positive, op2_positive, dest, nullptr);
    dest->is_negative = op1->is_negative != op2->is_negative;
    bigint_normalize(dest);
 }
@ -714,6 +1107,14 @@ void bigint_rem(BigInt *dest, const BigInt *op1, const BigInt *op2) {
    }
    const uint64_t *op1_digits = bigint_ptr(op1);
    const uint64_t *op2_digits = bigint_ptr(op2);
+
+    if (op1->digit_count == 1 && op2->digit_count == 1) {
+        dest->data.digit = op1_digits[0] % op2_digits[0];
+        dest->digit_count = 1;
+        dest->is_negative = op1->is_negative;
+        bigint_normalize(dest);
+        return;
+    }
    if (op2->digit_count == 2 && op2_digits[0] == 0 && op2_digits[1] == 1) {
        // special case this divisor
        bigint_init_unsigned(dest, op1_digits[0]);
@ -721,11 +1122,32 @@ void bigint_rem(BigInt *dest, const BigInt *op1, const BigInt *op2) {
        bigint_normalize(dest);
        return;
    }
-    if (op1->digit_count != 1 || op2->digit_count != 1) {
-        zig_panic("TODO bigint rem with >1 digits");
+
+    if (op2->digit_count == 1 && op2_digits[0] == 1) {
+        // X % 1 == 0
+        bigint_init_unsigned(dest, 0);
+        return;
    }
-    dest->data.digit = op1_digits[0] % op2_digits[0];
-    dest->digit_count = 1;
+
+    const BigInt *op1_positive;
+    BigInt op1_positive_data;
+    if (op1->is_negative) {
+        bigint_negate(&op1_positive_data, op1);
+        op1_positive = &op1_positive_data;
+    } else {
+        op1_positive = op1;
+    }
+
+    const BigInt *op2_positive;
+    BigInt op2_positive_data;
+    if (op2->is_negative) {
+        bigint_negate(&op2_positive_data, op2);
+        op2_positive = &op2_positive_data;
+    } else {
+        op2_positive = op2;
+    }
+
+    bigint_unsigned_division(op1_positive, op2_positive, nullptr, dest);
    dest->is_negative = op1->is_negative;
    bigint_normalize(dest);
 }
--- a/test/cases/math.zig
+++ b/test/cases/math.zig
@ -26,6 +26,28 @@ fn testDivision() {
    assert(divTrunc(i32, -5, 3) == -1);
    assert(divTrunc(f32, 5.0, 3.0) == 1.0);
    assert(divTrunc(f32, -5.0, 3.0) == -1.0);
+
+    comptime {
+        assert(
+            1194735857077236777412821811143690633098347576 %
+            508740759824825164163191790951174292733114988 ==
+            177254337427586449086438229241342047632117600);
+        assert(@rem(-1194735857077236777412821811143690633098347576,
+            508740759824825164163191790951174292733114988) ==
+            -177254337427586449086438229241342047632117600);
+        assert(1194735857077236777412821811143690633098347576 /
+            508740759824825164163191790951174292733114988 ==
+            2);
+        assert(@divTrunc(-1194735857077236777412821811143690633098347576,
+            508740759824825164163191790951174292733114988) ==
+            -2);
+        assert(@divTrunc(1194735857077236777412821811143690633098347576,
+            -508740759824825164163191790951174292733114988) ==
+            -2);
+        assert(@divTrunc(-1194735857077236777412821811143690633098347576,
+            -508740759824825164163191790951174292733114988) ==
+            2);
+    }
 }
 fn div(comptime T: type, a: T, b: T) -> T {
    return a / b;