parent
4b68ef45af
commit
353419f82d
@ -744,19 +744,19 @@ const yet_another_hex_float = 0x103.70P-5;
|
||||
{#code_end#}
|
||||
{#header_close#}
|
||||
{#header_open|Floating Point Operations#}
|
||||
<p>By default floating point operations use <code>Optimized</code> mode,
|
||||
but you can switch to <code>Strict</code> mode on a per-block basis:</p>
|
||||
<p>By default floating point operations use <code>Strict</code> mode,
|
||||
but you can switch to <code>Optimized</code> mode on a per-block basis:</p>
|
||||
{#code_begin|obj|foo#}
|
||||
{#code_release_fast#}
|
||||
const builtin = @import("builtin");
|
||||
const big = f64(1 << 40);
|
||||
|
||||
export fn foo_strict(x: f64) f64 {
|
||||
@setFloatMode(this, builtin.FloatMode.Strict);
|
||||
return x + big - big;
|
||||
}
|
||||
|
||||
export fn foo_optimized(x: f64) f64 {
|
||||
@setFloatMode(this, builtin.FloatMode.Optimized);
|
||||
return x + big - big;
|
||||
}
|
||||
{#code_end#}
|
||||
@ -5948,7 +5948,7 @@ pub const FloatMode = enum {
|
||||
{#code_end#}
|
||||
<ul>
|
||||
<li>
|
||||
<code>Optimized</code> (default) - Floating point operations may do all of the following:
|
||||
<code>Optimized</code> - Floating point operations may do all of the following:
|
||||
<ul>
|
||||
<li>Assume the arguments and result are not NaN. Optimizations are required to retain defined behavior over NaNs, but the value of the result is undefined.</li>
|
||||
<li>Assume the arguments and result are not +/-Inf. Optimizations are required to retain defined behavior over +/-Inf, but the value of the result is undefined.</li>
|
||||
@ -5960,7 +5960,7 @@ pub const FloatMode = enum {
|
||||
This is equivalent to <code>-ffast-math</code> in GCC.
|
||||
</li>
|
||||
<li>
|
||||
<code>Strict</code> - Floating point operations follow strict IEEE compliance.
|
||||
<code>Strict</code> (default) - Floating point operations follow strict IEEE compliance.
|
||||
</li>
|
||||
</ul>
|
||||
{#see_also|Floating Point Operations#}
|
||||
|
@ -1852,7 +1852,7 @@ struct ScopeDecls {
|
||||
HashMap<Buf *, Tld *, buf_hash, buf_eql_buf> decl_table;
|
||||
bool safety_off;
|
||||
AstNode *safety_set_node;
|
||||
bool fast_math_off;
|
||||
bool fast_math_on;
|
||||
AstNode *fast_math_set_node;
|
||||
ImportTableEntry *import;
|
||||
// If this is a scope from a container, this is the type entry, otherwise null
|
||||
@ -1872,7 +1872,7 @@ struct ScopeBlock {
|
||||
|
||||
bool safety_off;
|
||||
AstNode *safety_set_node;
|
||||
bool fast_math_off;
|
||||
bool fast_math_on;
|
||||
AstNode *fast_math_set_node;
|
||||
};
|
||||
|
||||
|
@ -829,15 +829,15 @@ static bool ir_want_fast_math(CodeGen *g, IrInstruction *instruction) {
|
||||
if (scope->id == ScopeIdBlock) {
|
||||
ScopeBlock *block_scope = (ScopeBlock *)scope;
|
||||
if (block_scope->fast_math_set_node)
|
||||
return !block_scope->fast_math_off;
|
||||
return block_scope->fast_math_on;
|
||||
} else if (scope->id == ScopeIdDecls) {
|
||||
ScopeDecls *decls_scope = (ScopeDecls *)scope;
|
||||
if (decls_scope->fast_math_set_node)
|
||||
return !decls_scope->fast_math_off;
|
||||
return decls_scope->fast_math_on;
|
||||
}
|
||||
scope = scope->parent;
|
||||
}
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool ir_want_runtime_safety(CodeGen *g, IrInstruction *instruction) {
|
||||
|
10
src/ir.cpp
10
src/ir.cpp
@ -15200,17 +15200,17 @@ static TypeTableEntry *ir_analyze_instruction_set_float_mode(IrAnalyze *ira,
|
||||
return ira->codegen->builtin_types.entry_void;
|
||||
}
|
||||
|
||||
bool *fast_math_off_ptr;
|
||||
bool *fast_math_on_ptr;
|
||||
AstNode **fast_math_set_node_ptr;
|
||||
if (target_type->id == TypeTableEntryIdBlock) {
|
||||
ScopeBlock *block_scope = (ScopeBlock *)target_val->data.x_block;
|
||||
fast_math_off_ptr = &block_scope->fast_math_off;
|
||||
fast_math_on_ptr = &block_scope->fast_math_on;
|
||||
fast_math_set_node_ptr = &block_scope->fast_math_set_node;
|
||||
} else if (target_type->id == TypeTableEntryIdFn) {
|
||||
assert(target_val->data.x_ptr.special == ConstPtrSpecialFunction);
|
||||
FnTableEntry *target_fn = target_val->data.x_ptr.data.fn.fn_entry;
|
||||
assert(target_fn->def_scope);
|
||||
fast_math_off_ptr = &target_fn->def_scope->fast_math_off;
|
||||
fast_math_on_ptr = &target_fn->def_scope->fast_math_on;
|
||||
fast_math_set_node_ptr = &target_fn->def_scope->fast_math_set_node;
|
||||
} else if (target_type->id == TypeTableEntryIdMetaType) {
|
||||
ScopeDecls *decls_scope;
|
||||
@ -15226,7 +15226,7 @@ static TypeTableEntry *ir_analyze_instruction_set_float_mode(IrAnalyze *ira,
|
||||
buf_sprintf("expected scope reference, found type '%s'", buf_ptr(&type_arg->name)));
|
||||
return ira->codegen->builtin_types.entry_invalid;
|
||||
}
|
||||
fast_math_off_ptr = &decls_scope->fast_math_off;
|
||||
fast_math_on_ptr = &decls_scope->fast_math_on;
|
||||
fast_math_set_node_ptr = &decls_scope->fast_math_set_node;
|
||||
} else {
|
||||
ir_add_error_node(ira, target_instruction->source_node,
|
||||
@ -15248,7 +15248,7 @@ static TypeTableEntry *ir_analyze_instruction_set_float_mode(IrAnalyze *ira,
|
||||
return ira->codegen->builtin_types.entry_invalid;
|
||||
}
|
||||
*fast_math_set_node_ptr = source_node;
|
||||
*fast_math_off_ptr = (float_mode_scalar == FloatModeStrict);
|
||||
*fast_math_on_ptr = (float_mode_scalar == FloatModeOptimized);
|
||||
|
||||
ir_build_const_from(ira, &instruction->base);
|
||||
return ira->codegen->builtin_types.entry_void;
|
||||
|
@ -253,11 +253,7 @@ fn gethi(in: f64) f64 {
|
||||
/// Normalize the number by factoring in the error.
|
||||
/// @hp: The float pair.
|
||||
fn hpNormalize(hp: *HP) void {
|
||||
// Required to avoid segfaults causing buffer overrun during errol3 digit output termination.
|
||||
@setFloatMode(this, @import("builtin").FloatMode.Strict);
|
||||
|
||||
const val = hp.val;
|
||||
|
||||
hp.val += hp.off;
|
||||
hp.off += val - hp.val;
|
||||
}
|
||||
|
@ -61,10 +61,8 @@ fn ceil64(x: f64) f64 {
|
||||
}
|
||||
|
||||
if (u >> 63 != 0) {
|
||||
@setFloatMode(this, builtin.FloatMode.Strict);
|
||||
y = x - math.f64_toint + math.f64_toint - x;
|
||||
} else {
|
||||
@setFloatMode(this, builtin.FloatMode.Strict);
|
||||
y = x + math.f64_toint - math.f64_toint - x;
|
||||
}
|
||||
|
||||
|
@ -17,8 +17,6 @@ pub fn exp(z: var) @typeOf(z) {
|
||||
}
|
||||
|
||||
fn exp32(z: Complex(f32)) Complex(f32) {
|
||||
@setFloatMode(this, @import("builtin").FloatMode.Strict);
|
||||
|
||||
const exp_overflow = 0x42b17218; // max_exp * ln2 ~= 88.72283955
|
||||
const cexp_overflow = 0x43400074; // (max_exp - min_denom_exp) * ln2
|
||||
|
||||
|
@ -37,8 +37,6 @@ const C5 = 4.16666666666665929218E-2;
|
||||
//
|
||||
// This may have slight differences on some edge cases and may need to replaced if so.
|
||||
fn cos32(x_: f32) f32 {
|
||||
@setFloatMode(this, @import("builtin").FloatMode.Strict);
|
||||
|
||||
const pi4a = 7.85398125648498535156e-1;
|
||||
const pi4b = 3.77489470793079817668E-8;
|
||||
const pi4c = 2.69515142907905952645E-15;
|
||||
|
@ -18,8 +18,6 @@ pub fn exp(x: var) @typeOf(x) {
|
||||
}
|
||||
|
||||
fn exp32(x_: f32) f32 {
|
||||
@setFloatMode(this, builtin.FloatMode.Strict);
|
||||
|
||||
const half = []f32{ 0.5, -0.5 };
|
||||
const ln2hi = 6.9314575195e-1;
|
||||
const ln2lo = 1.4286067653e-6;
|
||||
@ -95,8 +93,6 @@ fn exp32(x_: f32) f32 {
|
||||
}
|
||||
|
||||
fn exp64(x_: f64) f64 {
|
||||
@setFloatMode(this, builtin.FloatMode.Strict);
|
||||
|
||||
const half = []const f64{ 0.5, -0.5 };
|
||||
const ln2hi: f64 = 6.93147180369123816490e-01;
|
||||
const ln2lo: f64 = 1.90821492927058770002e-10;
|
||||
|
@ -36,8 +36,6 @@ const exp2ft = []const f64{
|
||||
};
|
||||
|
||||
fn exp2_32(x: f32) f32 {
|
||||
@setFloatMode(this, @import("builtin").FloatMode.Strict);
|
||||
|
||||
const tblsiz = @intCast(u32, exp2ft.len);
|
||||
const redux: f32 = 0x1.8p23 / @intToFloat(f32, tblsiz);
|
||||
const P1: f32 = 0x1.62e430p-1;
|
||||
@ -353,8 +351,6 @@ const exp2dt = []f64{
|
||||
};
|
||||
|
||||
fn exp2_64(x: f64) f64 {
|
||||
@setFloatMode(this, @import("builtin").FloatMode.Strict);
|
||||
|
||||
const tblsiz = @intCast(u32, exp2dt.len / 2);
|
||||
const redux: f64 = 0x1.8p52 / @intToFloat(f64, tblsiz);
|
||||
const P1: f64 = 0x1.62e42fefa39efp-1;
|
||||
|
@ -19,8 +19,6 @@ pub fn expm1(x: var) @typeOf(x) {
|
||||
}
|
||||
|
||||
fn expm1_32(x_: f32) f32 {
|
||||
@setFloatMode(this, builtin.FloatMode.Strict);
|
||||
|
||||
if (math.isNan(x_))
|
||||
return math.nan(f32);
|
||||
|
||||
@ -149,8 +147,6 @@ fn expm1_32(x_: f32) f32 {
|
||||
}
|
||||
|
||||
fn expm1_64(x_: f64) f64 {
|
||||
@setFloatMode(this, builtin.FloatMode.Strict);
|
||||
|
||||
if (math.isNan(x_))
|
||||
return math.nan(f64);
|
||||
|
||||
|
@ -97,10 +97,8 @@ fn floor64(x: f64) f64 {
|
||||
}
|
||||
|
||||
if (u >> 63 != 0) {
|
||||
@setFloatMode(this, builtin.FloatMode.Strict);
|
||||
y = x - math.f64_toint + math.f64_toint - x;
|
||||
} else {
|
||||
@setFloatMode(this, builtin.FloatMode.Strict);
|
||||
y = x + math.f64_toint - math.f64_toint - x;
|
||||
}
|
||||
|
||||
|
@ -35,8 +35,6 @@ pub fn ln(x: var) @typeOf(x) {
|
||||
}
|
||||
|
||||
pub fn ln_32(x_: f32) f32 {
|
||||
@setFloatMode(this, @import("builtin").FloatMode.Strict);
|
||||
|
||||
const ln2_hi: f32 = 6.9313812256e-01;
|
||||
const ln2_lo: f32 = 9.0580006145e-06;
|
||||
const Lg1: f32 = 0xaaaaaa.0p-24;
|
||||
@ -89,8 +87,6 @@ pub fn ln_32(x_: f32) f32 {
|
||||
}
|
||||
|
||||
pub fn ln_64(x_: f64) f64 {
|
||||
@setFloatMode(this, @import("builtin").FloatMode.Strict);
|
||||
|
||||
const ln2_hi: f64 = 6.93147180369123816490e-01;
|
||||
const ln2_lo: f64 = 1.90821492927058770002e-10;
|
||||
const Lg1: f64 = 6.666666666666735130e-01;
|
||||
|
@ -28,8 +28,6 @@ const assert = std.debug.assert;
|
||||
|
||||
// This implementation is taken from the go stlib, musl is a bit more complex.
|
||||
pub fn pow(comptime T: type, x: T, y: T) T {
|
||||
@setFloatMode(this, @import("builtin").FloatMode.Strict);
|
||||
|
||||
if (T != f32 and T != f64) {
|
||||
@compileError("pow not implemented for " ++ @typeName(T));
|
||||
}
|
||||
|
@ -35,11 +35,7 @@ fn round32(x_: f32) f32 {
|
||||
return 0 * @bitCast(f32, u);
|
||||
}
|
||||
|
||||
{
|
||||
@setFloatMode(this, builtin.FloatMode.Strict);
|
||||
y = x + math.f32_toint - math.f32_toint - x;
|
||||
}
|
||||
|
||||
if (y > 0.5) {
|
||||
y = y + x - 1;
|
||||
} else if (y <= -0.5) {
|
||||
@ -72,11 +68,7 @@ fn round64(x_: f64) f64 {
|
||||
return 0 * @bitCast(f64, u);
|
||||
}
|
||||
|
||||
{
|
||||
@setFloatMode(this, builtin.FloatMode.Strict);
|
||||
y = x + math.f64_toint - math.f64_toint - x;
|
||||
}
|
||||
|
||||
if (y > 0.5) {
|
||||
y = y + x - 1;
|
||||
} else if (y <= -0.5) {
|
||||
|
@ -38,8 +38,6 @@ const C5 = 4.16666666666665929218E-2;
|
||||
//
|
||||
// This may have slight differences on some edge cases and may need to replaced if so.
|
||||
fn sin32(x_: f32) f32 {
|
||||
@setFloatMode(this, @import("builtin").FloatMode.Strict);
|
||||
|
||||
const pi4a = 7.85398125648498535156e-1;
|
||||
const pi4b = 3.77489470793079817668E-8;
|
||||
const pi4c = 2.69515142907905952645E-15;
|
||||
|
@ -54,8 +54,6 @@ fn sinh32(x: f32) f32 {
|
||||
}
|
||||
|
||||
fn sinh64(x: f64) f64 {
|
||||
@setFloatMode(this, @import("builtin").FloatMode.Strict);
|
||||
|
||||
const u = @bitCast(u64, x);
|
||||
const w = @intCast(u32, u >> 32);
|
||||
const ax = @bitCast(f64, u & (@maxValue(u64) >> 1));
|
||||
|
@ -31,8 +31,6 @@ const Tq4 = -5.38695755929454629881E7;
|
||||
//
|
||||
// This may have slight differences on some edge cases and may need to replaced if so.
|
||||
fn tan32(x_: f32) f32 {
|
||||
@setFloatMode(this, @import("builtin").FloatMode.Strict);
|
||||
|
||||
const pi4a = 7.85398125648498535156e-1;
|
||||
const pi4b = 3.77489470793079817668E-8;
|
||||
const pi4c = 2.69515142907905952645E-15;
|
||||
|
Loading…
x
Reference in New Issue
Block a user