From 379950f81debb1e4df4e69511fbebd61911013b4 Mon Sep 17 00:00:00 2001
From: Marc Tiehuis <marctiehuis@gmail.com>
Date: Thu, 28 Jun 2018 20:26:35 +1200
Subject: [PATCH] compiler_rt: Add trunc f128 narrowing functions

---
 std/special/compiler_rt/truncXfYf2.zig      |  16 ++-
 std/special/compiler_rt/truncXfYf2_test.zig | 138 +++++++++++++++-----
 2 files changed, 115 insertions(+), 39 deletions(-)

diff --git a/std/special/compiler_rt/truncXfYf2.zig b/std/special/compiler_rt/truncXfYf2.zig
index f08c6ae34..04b815e86 100644
--- a/std/special/compiler_rt/truncXfYf2.zig
+++ b/std/special/compiler_rt/truncXfYf2.zig
@@ -4,7 +4,13 @@ pub extern fn __truncsfhf2(a: f32) u16 {
     return @bitCast(u16, truncXfYf2(f16, f32, a));
 }
 
-const CHAR_BIT = 8;
+pub extern fn __trunctfsf2(a: f128) f32 {
+    return truncXfYf2(f32, f128, a);
+}
+
+pub extern fn __trunctfdf2(a: f128) f64 {
+    return truncXfYf2(f64, f128, a);
+}
 
 inline fn truncXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t {
     const src_rep_t = @IntType(false, @typeInfo(src_t).Float.bits);
@@ -16,7 +22,7 @@ inline fn truncXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t
 
     // Various constants whose values follow from the type parameters.
     // Any reasonable optimizer will fold and propagate all of these.
-    const srcBits = @sizeOf(src_t) * CHAR_BIT;
+    const srcBits = src_t.bit_count;
     const srcExpBits = srcBits - srcSigBits - 1;
     const srcInfExp = (1 << srcExpBits) - 1;
     const srcExpBias = srcInfExp >> 1;
@@ -31,7 +37,7 @@ inline fn truncXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t
     const srcQNaN = 1 << (srcSigBits - 1);
     const srcNaNCode = srcQNaN - 1;
 
-    const dstBits = @sizeOf(dst_t) * CHAR_BIT;
+    const dstBits = dst_t.bit_count;
     const dstExpBits = dstBits - dstSigBits - 1;
     const dstInfExp = (1 << dstExpBits) - 1;
     const dstExpBias = dstInfExp >> 1;
@@ -79,8 +85,8 @@ inline fn truncXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t
         // a underflows on conversion to the destination type or is an exact
         // zero.  The result may be a denormal or zero.  Extract the exponent
         // to get the shift amount for the denormalization.
-        const aExp: u32 = aAbs >> srcSigBits;
-        const shift: u32 = srcExpBias - dstExpBias - aExp + 1;
+        const aExp = aAbs >> srcSigBits;
+        const shift = srcExpBias - dstExpBias - aExp + 1;
 
         const significand: src_rep_t = (aRep & srcSignificandMask) | srcMinNormal;
 
diff --git a/std/special/compiler_rt/truncXfYf2_test.zig b/std/special/compiler_rt/truncXfYf2_test.zig
index e4dae7b5b..c4bf2db73 100644
--- a/std/special/compiler_rt/truncXfYf2_test.zig
+++ b/std/special/compiler_rt/truncXfYf2_test.zig
@@ -11,54 +11,124 @@ fn test__truncsfhf2(a: u32, expected: u16) void {
 }
 
 test "truncsfhf2" {
-    test__truncsfhf2(0x7fc00000, 0x7e00);  // qNaN
-    test__truncsfhf2(0x7fe00000, 0x7f00);  // sNaN
+    test__truncsfhf2(0x7fc00000, 0x7e00); // qNaN
+    test__truncsfhf2(0x7fe00000, 0x7f00); // sNaN
 
-    test__truncsfhf2(0, 0);  // 0
-    test__truncsfhf2(0x80000000, 0x8000);  // -0
+    test__truncsfhf2(0, 0); // 0
+    test__truncsfhf2(0x80000000, 0x8000); // -0
 
-    test__truncsfhf2(0x7f800000, 0x7c00);  // inf
-    test__truncsfhf2(0xff800000, 0xfc00);  // -inf
+    test__truncsfhf2(0x7f800000, 0x7c00); // inf
+    test__truncsfhf2(0xff800000, 0xfc00); // -inf
 
-    test__truncsfhf2(0x477ff000, 0x7c00);  // 65520 -> inf
-    test__truncsfhf2(0xc77ff000, 0xfc00);  // -65520 -> -inf
+    test__truncsfhf2(0x477ff000, 0x7c00); // 65520 -> inf
+    test__truncsfhf2(0xc77ff000, 0xfc00); // -65520 -> -inf
 
-    test__truncsfhf2(0x71cc3892, 0x7c00);  // 0x1.987124876876324p+100 -> inf
-    test__truncsfhf2(0xf1cc3892, 0xfc00);  // -0x1.987124876876324p+100 -> -inf
+    test__truncsfhf2(0x71cc3892, 0x7c00); // 0x1.987124876876324p+100 -> inf
+    test__truncsfhf2(0xf1cc3892, 0xfc00); // -0x1.987124876876324p+100 -> -inf
 
-    test__truncsfhf2(0x38800000, 0x0400);  // normal (min), 2**-14
-    test__truncsfhf2(0xb8800000, 0x8400);  // normal (min), -2**-14
+    test__truncsfhf2(0x38800000, 0x0400); // normal (min), 2**-14
+    test__truncsfhf2(0xb8800000, 0x8400); // normal (min), -2**-14
 
-    test__truncsfhf2(0x477fe000, 0x7bff);  // normal (max), 65504
-    test__truncsfhf2(0xc77fe000, 0xfbff);  // normal (max), -65504
+    test__truncsfhf2(0x477fe000, 0x7bff); // normal (max), 65504
+    test__truncsfhf2(0xc77fe000, 0xfbff); // normal (max), -65504
 
-    test__truncsfhf2(0x477fe100, 0x7bff);  // normal, 65505 -> 65504
-    test__truncsfhf2(0xc77fe100, 0xfbff);  // normal, -65505 -> -65504
+    test__truncsfhf2(0x477fe100, 0x7bff); // normal, 65505 -> 65504
+    test__truncsfhf2(0xc77fe100, 0xfbff); // normal, -65505 -> -65504
 
-    test__truncsfhf2(0x477fef00, 0x7bff);  // normal, 65519 -> 65504
-    test__truncsfhf2(0xc77fef00, 0xfbff);  // normal, -65519 -> -65504
+    test__truncsfhf2(0x477fef00, 0x7bff); // normal, 65519 -> 65504
+    test__truncsfhf2(0xc77fef00, 0xfbff); // normal, -65519 -> -65504
 
-    test__truncsfhf2(0x3f802000, 0x3c01);  // normal, 1 + 2**-10
-    test__truncsfhf2(0xbf802000, 0xbc01);  // normal, -1 - 2**-10
+    test__truncsfhf2(0x3f802000, 0x3c01); // normal, 1 + 2**-10
+    test__truncsfhf2(0xbf802000, 0xbc01); // normal, -1 - 2**-10
 
-    test__truncsfhf2(0x3eaaa000, 0x3555);  // normal, approx. 1/3
-    test__truncsfhf2(0xbeaaa000, 0xb555);  // normal, approx. -1/3
+    test__truncsfhf2(0x3eaaa000, 0x3555); // normal, approx. 1/3
+    test__truncsfhf2(0xbeaaa000, 0xb555); // normal, approx. -1/3
 
-    test__truncsfhf2(0x40490fdb, 0x4248);  // normal, 3.1415926535
-    test__truncsfhf2(0xc0490fdb, 0xc248);  // normal, -3.1415926535
+    test__truncsfhf2(0x40490fdb, 0x4248); // normal, 3.1415926535
+    test__truncsfhf2(0xc0490fdb, 0xc248); // normal, -3.1415926535
 
-    test__truncsfhf2(0x45cc3892, 0x6e62);  // normal, 0x1.987124876876324p+12
+    test__truncsfhf2(0x45cc3892, 0x6e62); // normal, 0x1.987124876876324p+12
 
-    test__truncsfhf2(0x3f800000, 0x3c00);  // normal, 1
-    test__truncsfhf2(0x38800000, 0x0400);  // normal, 0x1.0p-14
+    test__truncsfhf2(0x3f800000, 0x3c00); // normal, 1
+    test__truncsfhf2(0x38800000, 0x0400); // normal, 0x1.0p-14
 
-    test__truncsfhf2(0x33800000, 0x0001);  // denormal (min), 2**-24
-    test__truncsfhf2(0xb3800000, 0x8001);  // denormal (min), -2**-24
+    test__truncsfhf2(0x33800000, 0x0001); // denormal (min), 2**-24
+    test__truncsfhf2(0xb3800000, 0x8001); // denormal (min), -2**-24
 
-    test__truncsfhf2(0x387fc000, 0x03ff);  // denormal (max), 2**-14 - 2**-24
-    test__truncsfhf2(0xb87fc000, 0x83ff);  // denormal (max), -2**-14 + 2**-24
+    test__truncsfhf2(0x387fc000, 0x03ff); // denormal (max), 2**-14 - 2**-24
+    test__truncsfhf2(0xb87fc000, 0x83ff); // denormal (max), -2**-14 + 2**-24
 
-    test__truncsfhf2(0x35800000, 0x0010);  // denormal, 0x1.0p-20
-    test__truncsfhf2(0x33280000, 0x0001);  // denormal, 0x1.5p-25 -> 0x1.0p-24
-    test__truncsfhf2(0x33000000, 0x0000);  // 0x1.0p-25 -> zero
+    test__truncsfhf2(0x35800000, 0x0010); // denormal, 0x1.0p-20
+    test__truncsfhf2(0x33280000, 0x0001); // denormal, 0x1.5p-25 -> 0x1.0p-24
+    test__truncsfhf2(0x33000000, 0x0000); // 0x1.0p-25 -> zero
+}
+
+const __trunctfsf2 = @import("truncXfYf2.zig").__trunctfsf2;
+
+fn test__trunctfsf2(a: f128, expected: u32) void {
+    const x = __trunctfsf2(a);
+
+    const rep = @bitCast(u32, x);
+    if (rep == expected) {
+        return;
+    }
+    // test other possible NaN representation(signal NaN)
+    else if (expected == 0x7fc00000) {
+        if ((rep & 0x7f800000) == 0x7f800000 and (rep & 0x7fffff) > 0) {
+            return;
+        }
+    }
+
+    @panic("__trunctfsf2 test failure");
+}
+
+test "trunctfsf2" {
+    // qnan
+    test__trunctfsf2(@bitCast(f128, u128(0x7fff800000000000 << 64)), 0x7fc00000);
+    // nan
+    test__trunctfsf2(@bitCast(f128, u128((0x7fff000000000000 | (0x810000000000 & 0xffffffffffff)) << 64)), 0x7fc08000);
+    // inf
+    test__trunctfsf2(@bitCast(f128, u128(0x7fff000000000000 << 64)), 0x7f800000);
+    // zero
+    test__trunctfsf2(0.0, 0x0);
+
+    test__trunctfsf2(0x1.23a2abb4a2ddee355f36789abcdep+5, 0x4211d156);
+    test__trunctfsf2(0x1.e3d3c45bd3abfd98b76a54cc321fp-9, 0x3b71e9e2);
+    test__trunctfsf2(0x1.234eebb5faa678f4488693abcdefp+4534, 0x7f800000);
+    test__trunctfsf2(0x1.edcba9bb8c76a5a43dd21f334634p-435, 0x0);
+}
+
+const __trunctfdf2 = @import("truncXfYf2.zig").__trunctfdf2;
+
+fn test__trunctfdf2(a: f128, expected: u64) void {
+    const x = __trunctfdf2(a);
+
+    const rep = @bitCast(u64, x);
+    if (rep == expected) {
+        return;
+    }
+    // test other possible NaN representation(signal NaN)
+    else if (expected == 0x7ff8000000000000) {
+        if ((rep & 0x7ff0000000000000) == 0x7ff0000000000000 and (rep & 0xfffffffffffff) > 0) {
+            return;
+        }
+    }
+
+    @panic("__trunctfsf2 test failure");
+}
+
+test "trunctfdf2" {
+    // qnan
+    test__trunctfdf2(@bitCast(f128, u128(0x7fff800000000000 << 64)), 0x7ff8000000000000);
+    // nan
+    test__trunctfdf2(@bitCast(f128, u128((0x7fff000000000000 | (0x810000000000 & 0xffffffffffff)) << 64)), 0x7ff8100000000000);
+    // inf
+    test__trunctfdf2(@bitCast(f128, u128(0x7fff000000000000 << 64)), 0x7ff0000000000000);
+    // zero
+    test__trunctfdf2(0.0, 0x0);
+
+    test__trunctfdf2(0x1.af23456789bbaaab347645365cdep+5, 0x404af23456789bbb);
+    test__trunctfdf2(0x1.dedafcff354b6ae9758763545432p-9, 0x3f6dedafcff354b7);
+    test__trunctfdf2(0x1.2f34dd5f437e849b4baab754cdefp+4534, 0x7ff0000000000000);
+    test__trunctfdf2(0x1.edcbff8ad76ab5bf46463233214fp-435, 0x24cedcbff8ad76ab);
 }