add strict float mode to some math functions

fixes a test failure for acosh32
master
Andrew Kelley 2018-05-26 14:57:53 -04:00
parent 8efb3f5e19
commit 7fa97b752e
4 changed files with 13 additions and 2 deletions

View File

@ -19,6 +19,8 @@ pub fn acosh(x: var) @typeOf(x) {
// acosh(x) = log(x + sqrt(x * x - 1))
fn acosh32(x: f32) f32 {
@setFloatMode(this, builtin.FloatMode.Strict);
const u = @bitCast(u32, x);
const i = u & 0x7FFFFFFF;
@ -37,6 +39,8 @@ fn acosh32(x: f32) f32 {
}
fn acosh64(x: f64) f64 {
@setFloatMode(this, builtin.FloatMode.Strict);
const u = @bitCast(u64, x);
const e = (u >> 52) & 0x7FF;

View File

@ -19,8 +19,8 @@ pub fn isNan(x: var) bool {
}
}
// Note: A signalling nan is identical to a standard right now by may have a different bit
// representation in the future when required.
/// Note: A signalling nan is identical to a standard nan right now but may have a different bit
/// representation in the future when required.
pub fn isSignalNan(x: var) bool {
return isNan(x);
}

View File

@ -6,6 +6,7 @@
// - log1p(x) = nan if x < -1
// - log1p(nan) = nan
const builtin = @import("builtin");
const std = @import("../index.zig");
const math = std.math;
const assert = std.debug.assert;
@ -20,6 +21,8 @@ pub fn log1p(x: var) @typeOf(x) {
}
fn log1p_32(x: f32) f32 {
@setFloatMode(this, builtin.FloatMode.Strict);
const ln2_hi = 6.9313812256e-01;
const ln2_lo = 9.0580006145e-06;
const Lg1: f32 = 0xaaaaaa.0p-24;
@ -96,6 +99,8 @@ fn log1p_32(x: f32) f32 {
}
fn log1p_64(x: f64) f64 {
@setFloatMode(this, builtin.FloatMode.Strict);
const ln2_hi: f64 = 6.93147180369123816490e-01;
const ln2_lo: f64 = 1.90821492927058770002e-10;
const Lg1: f64 = 6.666666666666735130e-01;

View File

@ -201,6 +201,8 @@ fn isNan(comptime T: type, bits: T) bool {
// behaviour. Most intermediate i32 values are changed to u32 where appropriate but there are
// potentially some edge cases remaining that are not handled in the same way.
export fn sqrt(x: f64) f64 {
@setFloatMode(this, builtin.FloatMode.Strict);
const tiny: f64 = 1.0e-300;
const sign: u32 = 0x80000000;
const u = @bitCast(u64, x);