Default to strict IEEE floating point

Closes #1227.
This commit is contained in:
Marc Tiehuis 2018-08-23 21:42:09 +12:00
parent 4b68ef45af
commit 353419f82d
18 changed files with 17 additions and 61 deletions

View File

@ -744,19 +744,19 @@ const yet_another_hex_float = 0x103.70P-5;
{#code_end#} {#code_end#}
{#header_close#} {#header_close#}
{#header_open|Floating Point Operations#} {#header_open|Floating Point Operations#}
<p>By default floating point operations use <code>Optimized</code> mode, <p>By default floating point operations use <code>Strict</code> mode,
but you can switch to <code>Strict</code> mode on a per-block basis:</p> but you can switch to <code>Optimized</code> mode on a per-block basis:</p>
{#code_begin|obj|foo#} {#code_begin|obj|foo#}
{#code_release_fast#} {#code_release_fast#}
const builtin = @import("builtin"); const builtin = @import("builtin");
const big = f64(1 << 40); const big = f64(1 << 40);
export fn foo_strict(x: f64) f64 { export fn foo_strict(x: f64) f64 {
@setFloatMode(this, builtin.FloatMode.Strict);
return x + big - big; return x + big - big;
} }
export fn foo_optimized(x: f64) f64 { export fn foo_optimized(x: f64) f64 {
@setFloatMode(this, builtin.FloatMode.Optimized);
return x + big - big; return x + big - big;
} }
{#code_end#} {#code_end#}
@ -5948,7 +5948,7 @@ pub const FloatMode = enum {
{#code_end#} {#code_end#}
<ul> <ul>
<li> <li>
<code>Optimized</code> (default) - Floating point operations may do all of the following: <code>Optimized</code> - Floating point operations may do all of the following:
<ul> <ul>
<li>Assume the arguments and result are not NaN. Optimizations are required to retain defined behavior over NaNs, but the value of the result is undefined.</li> <li>Assume the arguments and result are not NaN. Optimizations are required to retain defined behavior over NaNs, but the value of the result is undefined.</li>
<li>Assume the arguments and result are not +/-Inf. Optimizations are required to retain defined behavior over +/-Inf, but the value of the result is undefined.</li> <li>Assume the arguments and result are not +/-Inf. Optimizations are required to retain defined behavior over +/-Inf, but the value of the result is undefined.</li>
@ -5960,7 +5960,7 @@ pub const FloatMode = enum {
This is equivalent to <code>-ffast-math</code> in GCC. This is equivalent to <code>-ffast-math</code> in GCC.
</li> </li>
<li> <li>
<code>Strict</code> - Floating point operations follow strict IEEE compliance. <code>Strict</code> (default) - Floating point operations follow strict IEEE compliance.
</li> </li>
</ul> </ul>
{#see_also|Floating Point Operations#} {#see_also|Floating Point Operations#}

View File

@ -1852,7 +1852,7 @@ struct ScopeDecls {
HashMap<Buf *, Tld *, buf_hash, buf_eql_buf> decl_table; HashMap<Buf *, Tld *, buf_hash, buf_eql_buf> decl_table;
bool safety_off; bool safety_off;
AstNode *safety_set_node; AstNode *safety_set_node;
bool fast_math_off; bool fast_math_on;
AstNode *fast_math_set_node; AstNode *fast_math_set_node;
ImportTableEntry *import; ImportTableEntry *import;
// If this is a scope from a container, this is the type entry, otherwise null // If this is a scope from a container, this is the type entry, otherwise null
@ -1872,7 +1872,7 @@ struct ScopeBlock {
bool safety_off; bool safety_off;
AstNode *safety_set_node; AstNode *safety_set_node;
bool fast_math_off; bool fast_math_on;
AstNode *fast_math_set_node; AstNode *fast_math_set_node;
}; };

View File

@ -829,15 +829,15 @@ static bool ir_want_fast_math(CodeGen *g, IrInstruction *instruction) {
if (scope->id == ScopeIdBlock) { if (scope->id == ScopeIdBlock) {
ScopeBlock *block_scope = (ScopeBlock *)scope; ScopeBlock *block_scope = (ScopeBlock *)scope;
if (block_scope->fast_math_set_node) if (block_scope->fast_math_set_node)
return !block_scope->fast_math_off; return block_scope->fast_math_on;
} else if (scope->id == ScopeIdDecls) { } else if (scope->id == ScopeIdDecls) {
ScopeDecls *decls_scope = (ScopeDecls *)scope; ScopeDecls *decls_scope = (ScopeDecls *)scope;
if (decls_scope->fast_math_set_node) if (decls_scope->fast_math_set_node)
return !decls_scope->fast_math_off; return decls_scope->fast_math_on;
} }
scope = scope->parent; scope = scope->parent;
} }
return true; return false;
} }
static bool ir_want_runtime_safety(CodeGen *g, IrInstruction *instruction) { static bool ir_want_runtime_safety(CodeGen *g, IrInstruction *instruction) {

View File

@ -15200,17 +15200,17 @@ static TypeTableEntry *ir_analyze_instruction_set_float_mode(IrAnalyze *ira,
return ira->codegen->builtin_types.entry_void; return ira->codegen->builtin_types.entry_void;
} }
bool *fast_math_off_ptr; bool *fast_math_on_ptr;
AstNode **fast_math_set_node_ptr; AstNode **fast_math_set_node_ptr;
if (target_type->id == TypeTableEntryIdBlock) { if (target_type->id == TypeTableEntryIdBlock) {
ScopeBlock *block_scope = (ScopeBlock *)target_val->data.x_block; ScopeBlock *block_scope = (ScopeBlock *)target_val->data.x_block;
fast_math_off_ptr = &block_scope->fast_math_off; fast_math_on_ptr = &block_scope->fast_math_on;
fast_math_set_node_ptr = &block_scope->fast_math_set_node; fast_math_set_node_ptr = &block_scope->fast_math_set_node;
} else if (target_type->id == TypeTableEntryIdFn) { } else if (target_type->id == TypeTableEntryIdFn) {
assert(target_val->data.x_ptr.special == ConstPtrSpecialFunction); assert(target_val->data.x_ptr.special == ConstPtrSpecialFunction);
FnTableEntry *target_fn = target_val->data.x_ptr.data.fn.fn_entry; FnTableEntry *target_fn = target_val->data.x_ptr.data.fn.fn_entry;
assert(target_fn->def_scope); assert(target_fn->def_scope);
fast_math_off_ptr = &target_fn->def_scope->fast_math_off; fast_math_on_ptr = &target_fn->def_scope->fast_math_on;
fast_math_set_node_ptr = &target_fn->def_scope->fast_math_set_node; fast_math_set_node_ptr = &target_fn->def_scope->fast_math_set_node;
} else if (target_type->id == TypeTableEntryIdMetaType) { } else if (target_type->id == TypeTableEntryIdMetaType) {
ScopeDecls *decls_scope; ScopeDecls *decls_scope;
@ -15226,7 +15226,7 @@ static TypeTableEntry *ir_analyze_instruction_set_float_mode(IrAnalyze *ira,
buf_sprintf("expected scope reference, found type '%s'", buf_ptr(&type_arg->name))); buf_sprintf("expected scope reference, found type '%s'", buf_ptr(&type_arg->name)));
return ira->codegen->builtin_types.entry_invalid; return ira->codegen->builtin_types.entry_invalid;
} }
fast_math_off_ptr = &decls_scope->fast_math_off; fast_math_on_ptr = &decls_scope->fast_math_on;
fast_math_set_node_ptr = &decls_scope->fast_math_set_node; fast_math_set_node_ptr = &decls_scope->fast_math_set_node;
} else { } else {
ir_add_error_node(ira, target_instruction->source_node, ir_add_error_node(ira, target_instruction->source_node,
@ -15248,7 +15248,7 @@ static TypeTableEntry *ir_analyze_instruction_set_float_mode(IrAnalyze *ira,
return ira->codegen->builtin_types.entry_invalid; return ira->codegen->builtin_types.entry_invalid;
} }
*fast_math_set_node_ptr = source_node; *fast_math_set_node_ptr = source_node;
*fast_math_off_ptr = (float_mode_scalar == FloatModeStrict); *fast_math_on_ptr = (float_mode_scalar == FloatModeOptimized);
ir_build_const_from(ira, &instruction->base); ir_build_const_from(ira, &instruction->base);
return ira->codegen->builtin_types.entry_void; return ira->codegen->builtin_types.entry_void;

View File

@ -253,11 +253,7 @@ fn gethi(in: f64) f64 {
/// Normalize the number by factoring in the error. /// Normalize the number by factoring in the error.
/// @hp: The float pair. /// @hp: The float pair.
fn hpNormalize(hp: *HP) void { fn hpNormalize(hp: *HP) void {
// Required to avoid segfaults causing buffer overrun during errol3 digit output termination.
@setFloatMode(this, @import("builtin").FloatMode.Strict);
const val = hp.val; const val = hp.val;
hp.val += hp.off; hp.val += hp.off;
hp.off += val - hp.val; hp.off += val - hp.val;
} }

View File

@ -61,10 +61,8 @@ fn ceil64(x: f64) f64 {
} }
if (u >> 63 != 0) { if (u >> 63 != 0) {
@setFloatMode(this, builtin.FloatMode.Strict);
y = x - math.f64_toint + math.f64_toint - x; y = x - math.f64_toint + math.f64_toint - x;
} else { } else {
@setFloatMode(this, builtin.FloatMode.Strict);
y = x + math.f64_toint - math.f64_toint - x; y = x + math.f64_toint - math.f64_toint - x;
} }

View File

@ -17,8 +17,6 @@ pub fn exp(z: var) @typeOf(z) {
} }
fn exp32(z: Complex(f32)) Complex(f32) { fn exp32(z: Complex(f32)) Complex(f32) {
@setFloatMode(this, @import("builtin").FloatMode.Strict);
const exp_overflow = 0x42b17218; // max_exp * ln2 ~= 88.72283955 const exp_overflow = 0x42b17218; // max_exp * ln2 ~= 88.72283955
const cexp_overflow = 0x43400074; // (max_exp - min_denom_exp) * ln2 const cexp_overflow = 0x43400074; // (max_exp - min_denom_exp) * ln2

View File

@ -37,8 +37,6 @@ const C5 = 4.16666666666665929218E-2;
// //
// This may have slight differences on some edge cases and may need to replaced if so. // This may have slight differences on some edge cases and may need to replaced if so.
fn cos32(x_: f32) f32 { fn cos32(x_: f32) f32 {
@setFloatMode(this, @import("builtin").FloatMode.Strict);
const pi4a = 7.85398125648498535156e-1; const pi4a = 7.85398125648498535156e-1;
const pi4b = 3.77489470793079817668E-8; const pi4b = 3.77489470793079817668E-8;
const pi4c = 2.69515142907905952645E-15; const pi4c = 2.69515142907905952645E-15;

View File

@ -18,8 +18,6 @@ pub fn exp(x: var) @typeOf(x) {
} }
fn exp32(x_: f32) f32 { fn exp32(x_: f32) f32 {
@setFloatMode(this, builtin.FloatMode.Strict);
const half = []f32{ 0.5, -0.5 }; const half = []f32{ 0.5, -0.5 };
const ln2hi = 6.9314575195e-1; const ln2hi = 6.9314575195e-1;
const ln2lo = 1.4286067653e-6; const ln2lo = 1.4286067653e-6;
@ -95,8 +93,6 @@ fn exp32(x_: f32) f32 {
} }
fn exp64(x_: f64) f64 { fn exp64(x_: f64) f64 {
@setFloatMode(this, builtin.FloatMode.Strict);
const half = []const f64{ 0.5, -0.5 }; const half = []const f64{ 0.5, -0.5 };
const ln2hi: f64 = 6.93147180369123816490e-01; const ln2hi: f64 = 6.93147180369123816490e-01;
const ln2lo: f64 = 1.90821492927058770002e-10; const ln2lo: f64 = 1.90821492927058770002e-10;

View File

@ -36,8 +36,6 @@ const exp2ft = []const f64{
}; };
fn exp2_32(x: f32) f32 { fn exp2_32(x: f32) f32 {
@setFloatMode(this, @import("builtin").FloatMode.Strict);
const tblsiz = @intCast(u32, exp2ft.len); const tblsiz = @intCast(u32, exp2ft.len);
const redux: f32 = 0x1.8p23 / @intToFloat(f32, tblsiz); const redux: f32 = 0x1.8p23 / @intToFloat(f32, tblsiz);
const P1: f32 = 0x1.62e430p-1; const P1: f32 = 0x1.62e430p-1;
@ -353,8 +351,6 @@ const exp2dt = []f64{
}; };
fn exp2_64(x: f64) f64 { fn exp2_64(x: f64) f64 {
@setFloatMode(this, @import("builtin").FloatMode.Strict);
const tblsiz = @intCast(u32, exp2dt.len / 2); const tblsiz = @intCast(u32, exp2dt.len / 2);
const redux: f64 = 0x1.8p52 / @intToFloat(f64, tblsiz); const redux: f64 = 0x1.8p52 / @intToFloat(f64, tblsiz);
const P1: f64 = 0x1.62e42fefa39efp-1; const P1: f64 = 0x1.62e42fefa39efp-1;

View File

@ -19,8 +19,6 @@ pub fn expm1(x: var) @typeOf(x) {
} }
fn expm1_32(x_: f32) f32 { fn expm1_32(x_: f32) f32 {
@setFloatMode(this, builtin.FloatMode.Strict);
if (math.isNan(x_)) if (math.isNan(x_))
return math.nan(f32); return math.nan(f32);
@ -149,8 +147,6 @@ fn expm1_32(x_: f32) f32 {
} }
fn expm1_64(x_: f64) f64 { fn expm1_64(x_: f64) f64 {
@setFloatMode(this, builtin.FloatMode.Strict);
if (math.isNan(x_)) if (math.isNan(x_))
return math.nan(f64); return math.nan(f64);

View File

@ -97,10 +97,8 @@ fn floor64(x: f64) f64 {
} }
if (u >> 63 != 0) { if (u >> 63 != 0) {
@setFloatMode(this, builtin.FloatMode.Strict);
y = x - math.f64_toint + math.f64_toint - x; y = x - math.f64_toint + math.f64_toint - x;
} else { } else {
@setFloatMode(this, builtin.FloatMode.Strict);
y = x + math.f64_toint - math.f64_toint - x; y = x + math.f64_toint - math.f64_toint - x;
} }

View File

@ -35,8 +35,6 @@ pub fn ln(x: var) @typeOf(x) {
} }
pub fn ln_32(x_: f32) f32 { pub fn ln_32(x_: f32) f32 {
@setFloatMode(this, @import("builtin").FloatMode.Strict);
const ln2_hi: f32 = 6.9313812256e-01; const ln2_hi: f32 = 6.9313812256e-01;
const ln2_lo: f32 = 9.0580006145e-06; const ln2_lo: f32 = 9.0580006145e-06;
const Lg1: f32 = 0xaaaaaa.0p-24; const Lg1: f32 = 0xaaaaaa.0p-24;
@ -89,8 +87,6 @@ pub fn ln_32(x_: f32) f32 {
} }
pub fn ln_64(x_: f64) f64 { pub fn ln_64(x_: f64) f64 {
@setFloatMode(this, @import("builtin").FloatMode.Strict);
const ln2_hi: f64 = 6.93147180369123816490e-01; const ln2_hi: f64 = 6.93147180369123816490e-01;
const ln2_lo: f64 = 1.90821492927058770002e-10; const ln2_lo: f64 = 1.90821492927058770002e-10;
const Lg1: f64 = 6.666666666666735130e-01; const Lg1: f64 = 6.666666666666735130e-01;

View File

@ -28,8 +28,6 @@ const assert = std.debug.assert;
// This implementation is taken from the go stlib, musl is a bit more complex. // This implementation is taken from the go stlib, musl is a bit more complex.
pub fn pow(comptime T: type, x: T, y: T) T { pub fn pow(comptime T: type, x: T, y: T) T {
@setFloatMode(this, @import("builtin").FloatMode.Strict);
if (T != f32 and T != f64) { if (T != f32 and T != f64) {
@compileError("pow not implemented for " ++ @typeName(T)); @compileError("pow not implemented for " ++ @typeName(T));
} }

View File

@ -35,11 +35,7 @@ fn round32(x_: f32) f32 {
return 0 * @bitCast(f32, u); return 0 * @bitCast(f32, u);
} }
{ y = x + math.f32_toint - math.f32_toint - x;
@setFloatMode(this, builtin.FloatMode.Strict);
y = x + math.f32_toint - math.f32_toint - x;
}
if (y > 0.5) { if (y > 0.5) {
y = y + x - 1; y = y + x - 1;
} else if (y <= -0.5) { } else if (y <= -0.5) {
@ -72,11 +68,7 @@ fn round64(x_: f64) f64 {
return 0 * @bitCast(f64, u); return 0 * @bitCast(f64, u);
} }
{ y = x + math.f64_toint - math.f64_toint - x;
@setFloatMode(this, builtin.FloatMode.Strict);
y = x + math.f64_toint - math.f64_toint - x;
}
if (y > 0.5) { if (y > 0.5) {
y = y + x - 1; y = y + x - 1;
} else if (y <= -0.5) { } else if (y <= -0.5) {

View File

@ -38,8 +38,6 @@ const C5 = 4.16666666666665929218E-2;
// //
// This may have slight differences on some edge cases and may need to replaced if so. // This may have slight differences on some edge cases and may need to replaced if so.
fn sin32(x_: f32) f32 { fn sin32(x_: f32) f32 {
@setFloatMode(this, @import("builtin").FloatMode.Strict);
const pi4a = 7.85398125648498535156e-1; const pi4a = 7.85398125648498535156e-1;
const pi4b = 3.77489470793079817668E-8; const pi4b = 3.77489470793079817668E-8;
const pi4c = 2.69515142907905952645E-15; const pi4c = 2.69515142907905952645E-15;

View File

@ -54,8 +54,6 @@ fn sinh32(x: f32) f32 {
} }
fn sinh64(x: f64) f64 { fn sinh64(x: f64) f64 {
@setFloatMode(this, @import("builtin").FloatMode.Strict);
const u = @bitCast(u64, x); const u = @bitCast(u64, x);
const w = @intCast(u32, u >> 32); const w = @intCast(u32, u >> 32);
const ax = @bitCast(f64, u & (@maxValue(u64) >> 1)); const ax = @bitCast(f64, u & (@maxValue(u64) >> 1));

View File

@ -31,8 +31,6 @@ const Tq4 = -5.38695755929454629881E7;
// //
// This may have slight differences on some edge cases and may need to replaced if so. // This may have slight differences on some edge cases and may need to replaced if so.
fn tan32(x_: f32) f32 { fn tan32(x_: f32) f32 {
@setFloatMode(this, @import("builtin").FloatMode.Strict);
const pi4a = 7.85398125648498535156e-1; const pi4a = 7.85398125648498535156e-1;
const pi4b = 3.77489470793079817668E-8; const pi4b = 3.77489470793079817668E-8;
const pi4c = 2.69515142907905952645E-15; const pi4c = 2.69515142907905952645E-15;