add strict float mode to some math functions

fixes a test failure for acosh32
2018-05-26 14:57:53 -04:00 · 2018-05-26 14:57:53 -04:00 · 7fa97b752e
parent 8efb3f5e19
commit 7fa97b752e
4 changed files with 13 additions and 2 deletions
--- a/std/math/acosh.zig
+++ b/std/math/acosh.zig
@ -19,6 +19,8 @@ pub fn acosh(x: var) @typeOf(x) {

 // acosh(x) = log(x + sqrt(x * x - 1))
 fn acosh32(x: f32) f32 {
+    @setFloatMode(this, builtin.FloatMode.Strict);
+
    const u = @bitCast(u32, x);
    const i = u & 0x7FFFFFFF;

@ -37,6 +39,8 @@ fn acosh32(x: f32) f32 {
 }

 fn acosh64(x: f64) f64 {
+    @setFloatMode(this, builtin.FloatMode.Strict);
+
    const u = @bitCast(u64, x);
    const e = (u >> 52) & 0x7FF;

--- a/std/math/isnan.zig
+++ b/std/math/isnan.zig
@ -19,8 +19,8 @@ pub fn isNan(x: var) bool {
    }
 }

-// Note: A signalling nan is identical to a standard right now by may have a different bit
-// representation in the future when required.
+/// Note: A signalling nan is identical to a standard nan right now but may have a different bit
+/// representation in the future when required.
 pub fn isSignalNan(x: var) bool {
    return isNan(x);
 }
--- a/std/math/log1p.zig
+++ b/std/math/log1p.zig
@ -6,6 +6,7 @@
 // - log1p(x)     = nan if x < -1
 // - log1p(nan)   = nan

+const builtin = @import("builtin");
 const std = @import("../index.zig");
 const math = std.math;
 const assert = std.debug.assert;
@ -20,6 +21,8 @@ pub fn log1p(x: var) @typeOf(x) {
 }

 fn log1p_32(x: f32) f32 {
+    @setFloatMode(this, builtin.FloatMode.Strict);
+
    const ln2_hi = 6.9313812256e-01;
    const ln2_lo = 9.0580006145e-06;
    const Lg1: f32 = 0xaaaaaa.0p-24;
@ -96,6 +99,8 @@ fn log1p_32(x: f32) f32 {
 }

 fn log1p_64(x: f64) f64 {
+    @setFloatMode(this, builtin.FloatMode.Strict);
+
    const ln2_hi: f64 = 6.93147180369123816490e-01;
    const ln2_lo: f64 = 1.90821492927058770002e-10;
    const Lg1: f64 = 6.666666666666735130e-01;
--- a/std/special/builtin.zig
+++ b/std/special/builtin.zig
@ -201,6 +201,8 @@ fn isNan(comptime T: type, bits: T) bool {
 // behaviour. Most intermediate i32 values are changed to u32 where appropriate but there are
 // potentially some edge cases remaining that are not handled in the same way.
 export fn sqrt(x: f64) f64 {
+    @setFloatMode(this, builtin.FloatMode.Strict);
+
    const tiny: f64 = 1.0e-300;
    const sign: u32 = 0x80000000;
    const u = @bitCast(u64, x);