Default to strict IEEE floating point

Closes #1227.
2018-08-23 21:42:09 +12:00 · 2018-08-23 21:42:09 +12:00 · 353419f82d
commit 353419f82d
parent 4b68ef45af
18 changed files with 17 additions and 61 deletions
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@ -744,19 +744,19 @@ const yet_another_hex_float = 0x103.70P-5;
      {#code_end#}
      {#header_close#}
      {#header_open|Floating Point Operations#}
-      <p>By default floating point operations use <code>Optimized</code> mode,
+      <p>By default floating point operations use <code>Strict</code> mode,
-      but you can switch to <code>Strict</code> mode on a per-block basis:</p>
+      but you can switch to <code>Optimized</code> mode on a per-block basis:</p>
      {#code_begin|obj|foo#}
      {#code_release_fast#}
 const builtin = @import("builtin");
 const big = f64(1 << 40);
 export fn foo_strict(x: f64) f64 {
    @setFloatMode(this, builtin.FloatMode.Strict);
    return x + big - big;
 }
 export fn foo_optimized(x: f64) f64 {
    @setFloatMode(this, builtin.FloatMode.Optimized);
    return x + big - big;
 }
      {#code_end#}
@ -5948,7 +5948,7 @@ pub const FloatMode = enum {
      {#code_end#}
      <ul>
        <li>
-          <code>Optimized</code> (default) - Floating point operations may do all of the following:
+          <code>Optimized</code> - Floating point operations may do all of the following:
          <ul>
            <li>Assume the arguments and result are not NaN. Optimizations are required to retain defined behavior over NaNs, but the value of the result is undefined.</li>
            <li>Assume the arguments and result are not +/-Inf. Optimizations are required to retain defined behavior over +/-Inf, but the value of the result is undefined.</li>
@ -5960,7 +5960,7 @@ pub const FloatMode = enum {
          This is equivalent to <code>-ffast-math</code> in GCC.
        </li>
        <li>
-          <code>Strict</code> - Floating point operations follow strict IEEE compliance.
+          <code>Strict</code> (default) - Floating point operations follow strict IEEE compliance.
        </li>
      </ul>
      {#see_also|Floating Point Operations#}
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@ -1852,7 +1852,7 @@ struct ScopeDecls {
    HashMap<Buf *, Tld *, buf_hash, buf_eql_buf> decl_table;
    bool safety_off;
    AstNode *safety_set_node;
-    bool fast_math_off;
+    bool fast_math_on;
    AstNode *fast_math_set_node;
    ImportTableEntry *import;
    // If this is a scope from a container, this is the type entry, otherwise null
@ -1872,7 +1872,7 @@ struct ScopeBlock {
    bool safety_off;
    AstNode *safety_set_node;
-    bool fast_math_off;
+    bool fast_math_on;
    AstNode *fast_math_set_node;
 };
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@ -829,15 +829,15 @@ static bool ir_want_fast_math(CodeGen *g, IrInstruction *instruction) {
        if (scope->id == ScopeIdBlock) {
            ScopeBlock *block_scope = (ScopeBlock *)scope;
            if (block_scope->fast_math_set_node)
-                return !block_scope->fast_math_off;
+                return block_scope->fast_math_on;
        } else if (scope->id == ScopeIdDecls) {
            ScopeDecls *decls_scope = (ScopeDecls *)scope;
            if (decls_scope->fast_math_set_node)
-                return !decls_scope->fast_math_off;
+                return decls_scope->fast_math_on;
        }
        scope = scope->parent;
    }
-    return true;
+    return false;
 }
 static bool ir_want_runtime_safety(CodeGen *g, IrInstruction *instruction) {
--- a/src/ir.cpp
+++ b/src/ir.cpp
@ -15200,17 +15200,17 @@ static TypeTableEntry *ir_analyze_instruction_set_float_mode(IrAnalyze *ira,
        return ira->codegen->builtin_types.entry_void;
    }
-    bool *fast_math_off_ptr;
+    bool *fast_math_on_ptr;
    AstNode **fast_math_set_node_ptr;
    if (target_type->id == TypeTableEntryIdBlock) {
        ScopeBlock *block_scope = (ScopeBlock *)target_val->data.x_block;
-        fast_math_off_ptr = &block_scope->fast_math_off;
+        fast_math_on_ptr = &block_scope->fast_math_on;
        fast_math_set_node_ptr = &block_scope->fast_math_set_node;
    } else if (target_type->id == TypeTableEntryIdFn) {
        assert(target_val->data.x_ptr.special == ConstPtrSpecialFunction);
        FnTableEntry *target_fn = target_val->data.x_ptr.data.fn.fn_entry;
        assert(target_fn->def_scope);
-        fast_math_off_ptr = &target_fn->def_scope->fast_math_off;
+        fast_math_on_ptr = &target_fn->def_scope->fast_math_on;
        fast_math_set_node_ptr = &target_fn->def_scope->fast_math_set_node;
    } else if (target_type->id == TypeTableEntryIdMetaType) {
        ScopeDecls *decls_scope;
@ -15226,7 +15226,7 @@ static TypeTableEntry *ir_analyze_instruction_set_float_mode(IrAnalyze *ira,
                buf_sprintf("expected scope reference, found type '%s'", buf_ptr(&type_arg->name)));
            return ira->codegen->builtin_types.entry_invalid;
        }
-        fast_math_off_ptr = &decls_scope->fast_math_off;
+        fast_math_on_ptr = &decls_scope->fast_math_on;
        fast_math_set_node_ptr = &decls_scope->fast_math_set_node;
    } else {
        ir_add_error_node(ira, target_instruction->source_node,
@ -15248,7 +15248,7 @@ static TypeTableEntry *ir_analyze_instruction_set_float_mode(IrAnalyze *ira,
        return ira->codegen->builtin_types.entry_invalid;
    }
    *fast_math_set_node_ptr = source_node;
-    *fast_math_off_ptr = (float_mode_scalar == FloatModeStrict);
+    *fast_math_on_ptr = (float_mode_scalar == FloatModeOptimized);
    ir_build_const_from(ira, &instruction->base);
    return ira->codegen->builtin_types.entry_void;
--- a/std/fmt/errol/index.zig
+++ b/std/fmt/errol/index.zig
@ -253,11 +253,7 @@ fn gethi(in: f64) f64 {
 /// Normalize the number by factoring in the error.
 ///   @hp: The float pair.
 fn hpNormalize(hp: *HP) void {
    // Required to avoid segfaults causing buffer overrun during errol3 digit output termination.
    @setFloatMode(this, @import("builtin").FloatMode.Strict);
    const val = hp.val;
    hp.val += hp.off;
    hp.off += val - hp.val;
 }
--- a/std/math/ceil.zig
+++ b/std/math/ceil.zig
@ -61,10 +61,8 @@ fn ceil64(x: f64) f64 {
    }
    if (u >> 63 != 0) {
        @setFloatMode(this, builtin.FloatMode.Strict);
        y = x - math.f64_toint + math.f64_toint - x;
    } else {
        @setFloatMode(this, builtin.FloatMode.Strict);
        y = x + math.f64_toint - math.f64_toint - x;
    }
--- a/std/math/complex/exp.zig
+++ b/std/math/complex/exp.zig
@ -17,8 +17,6 @@ pub fn exp(z: var) @typeOf(z) {
 }
 fn exp32(z: Complex(f32)) Complex(f32) {
    @setFloatMode(this, @import("builtin").FloatMode.Strict);
    const exp_overflow = 0x42b17218; // max_exp * ln2 ~= 88.72283955
    const cexp_overflow = 0x43400074; // (max_exp - min_denom_exp) * ln2
--- a/std/math/cos.zig
+++ b/std/math/cos.zig
@ -37,8 +37,6 @@ const C5 = 4.16666666666665929218E-2;
 //
 // This may have slight differences on some edge cases and may need to replaced if so.
 fn cos32(x_: f32) f32 {
    @setFloatMode(this, @import("builtin").FloatMode.Strict);
    const pi4a = 7.85398125648498535156e-1;
    const pi4b = 3.77489470793079817668E-8;
    const pi4c = 2.69515142907905952645E-15;
--- a/std/math/exp.zig
+++ b/std/math/exp.zig
@ -18,8 +18,6 @@ pub fn exp(x: var) @typeOf(x) {
 }
 fn exp32(x_: f32) f32 {
    @setFloatMode(this, builtin.FloatMode.Strict);
    const half = []f32{ 0.5, -0.5 };
    const ln2hi = 6.9314575195e-1;
    const ln2lo = 1.4286067653e-6;
@ -95,8 +93,6 @@ fn exp32(x_: f32) f32 {
 }
 fn exp64(x_: f64) f64 {
    @setFloatMode(this, builtin.FloatMode.Strict);
    const half = []const f64{ 0.5, -0.5 };
    const ln2hi: f64 = 6.93147180369123816490e-01;
    const ln2lo: f64 = 1.90821492927058770002e-10;
--- a/std/math/exp2.zig
+++ b/std/math/exp2.zig
@ -36,8 +36,6 @@ const exp2ft = []const f64{
 };
 fn exp2_32(x: f32) f32 {
    @setFloatMode(this, @import("builtin").FloatMode.Strict);
    const tblsiz = @intCast(u32, exp2ft.len);
    const redux: f32 = 0x1.8p23 / @intToFloat(f32, tblsiz);
    const P1: f32 = 0x1.62e430p-1;
@ -353,8 +351,6 @@ const exp2dt = []f64{
 };
 fn exp2_64(x: f64) f64 {
    @setFloatMode(this, @import("builtin").FloatMode.Strict);
    const tblsiz = @intCast(u32, exp2dt.len / 2);
    const redux: f64 = 0x1.8p52 / @intToFloat(f64, tblsiz);
    const P1: f64 = 0x1.62e42fefa39efp-1;
--- a/std/math/expm1.zig
+++ b/std/math/expm1.zig
@ -19,8 +19,6 @@ pub fn expm1(x: var) @typeOf(x) {
 }
 fn expm1_32(x_: f32) f32 {
    @setFloatMode(this, builtin.FloatMode.Strict);
    if (math.isNan(x_))
        return math.nan(f32);
@ -149,8 +147,6 @@ fn expm1_32(x_: f32) f32 {
 }
 fn expm1_64(x_: f64) f64 {
    @setFloatMode(this, builtin.FloatMode.Strict);
    if (math.isNan(x_))
        return math.nan(f64);
--- a/std/math/floor.zig
+++ b/std/math/floor.zig
@ -97,10 +97,8 @@ fn floor64(x: f64) f64 {
    }
    if (u >> 63 != 0) {
        @setFloatMode(this, builtin.FloatMode.Strict);
        y = x - math.f64_toint + math.f64_toint - x;
    } else {
        @setFloatMode(this, builtin.FloatMode.Strict);
        y = x + math.f64_toint - math.f64_toint - x;
    }
--- a/std/math/ln.zig
+++ b/std/math/ln.zig
@ -35,8 +35,6 @@ pub fn ln(x: var) @typeOf(x) {
 }
 pub fn ln_32(x_: f32) f32 {
    @setFloatMode(this, @import("builtin").FloatMode.Strict);
    const ln2_hi: f32 = 6.9313812256e-01;
    const ln2_lo: f32 = 9.0580006145e-06;
    const Lg1: f32 = 0xaaaaaa.0p-24;
@ -89,8 +87,6 @@ pub fn ln_32(x_: f32) f32 {
 }
 pub fn ln_64(x_: f64) f64 {
    @setFloatMode(this, @import("builtin").FloatMode.Strict);
    const ln2_hi: f64 = 6.93147180369123816490e-01;
    const ln2_lo: f64 = 1.90821492927058770002e-10;
    const Lg1: f64 = 6.666666666666735130e-01;
--- a/std/math/pow.zig
+++ b/std/math/pow.zig
@ -28,8 +28,6 @@ const assert = std.debug.assert;
 // This implementation is taken from the go stlib, musl is a bit more complex.
 pub fn pow(comptime T: type, x: T, y: T) T {
    @setFloatMode(this, @import("builtin").FloatMode.Strict);
    if (T != f32 and T != f64) {
        @compileError("pow not implemented for " ++ @typeName(T));
    }
--- a/std/math/round.zig
+++ b/std/math/round.zig
@ -35,11 +35,7 @@ fn round32(x_: f32) f32 {
        return 0 * @bitCast(f32, u);
    }
-    {
+    y = x + math.f32_toint - math.f32_toint - x;
        @setFloatMode(this, builtin.FloatMode.Strict);
        y = x + math.f32_toint - math.f32_toint - x;
    }
    if (y > 0.5) {
        y = y + x - 1;
    } else if (y <= -0.5) {
@ -72,11 +68,7 @@ fn round64(x_: f64) f64 {
        return 0 * @bitCast(f64, u);
    }
-    {
+    y = x + math.f64_toint - math.f64_toint - x;
        @setFloatMode(this, builtin.FloatMode.Strict);
        y = x + math.f64_toint - math.f64_toint - x;
    }
    if (y > 0.5) {
        y = y + x - 1;
    } else if (y <= -0.5) {
--- a/std/math/sin.zig
+++ b/std/math/sin.zig
@ -38,8 +38,6 @@ const C5 = 4.16666666666665929218E-2;
 //
 // This may have slight differences on some edge cases and may need to replaced if so.
 fn sin32(x_: f32) f32 {
    @setFloatMode(this, @import("builtin").FloatMode.Strict);
    const pi4a = 7.85398125648498535156e-1;
    const pi4b = 3.77489470793079817668E-8;
    const pi4c = 2.69515142907905952645E-15;
--- a/std/math/sinh.zig
+++ b/std/math/sinh.zig
@ -54,8 +54,6 @@ fn sinh32(x: f32) f32 {
 }
 fn sinh64(x: f64) f64 {
    @setFloatMode(this, @import("builtin").FloatMode.Strict);
    const u = @bitCast(u64, x);
    const w = @intCast(u32, u >> 32);
    const ax = @bitCast(f64, u & (@maxValue(u64) >> 1));
--- a/std/math/tan.zig
+++ b/std/math/tan.zig
@ -31,8 +31,6 @@ const Tq4 = -5.38695755929454629881E7;
 //
 // This may have slight differences on some edge cases and may need to replaced if so.
 fn tan32(x_: f32) f32 {
    @setFloatMode(this, @import("builtin").FloatMode.Strict);
    const pi4a = 7.85398125648498535156e-1;
    const pi4b = 3.77489470793079817668E-8;
    const pi4c = 2.69515142907905952645E-15;