commit
1b4bae6d69
|
@ -261,12 +261,15 @@ endif()
|
|||
set(EMBEDDED_SOFTFLOAT_SOURCES
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/f128M_isSignalingNaN.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_commonNaNToF128M.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_commonNaNToF16UI.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_commonNaNToF32UI.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_commonNaNToF64UI.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_f128MToCommonNaN.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_f16UIToCommonNaN.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_f32UIToCommonNaN.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_f64UIToCommonNaN.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_propagateNaNF128M.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_propagateNaNF16UI.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/softfloat_raiseFlags.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_add.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_div.c"
|
||||
|
@ -293,8 +296,20 @@ set(EMBEDDED_SOFTFLOAT_SOURCES
|
|||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_to_ui32_r_minMag.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_to_ui64.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_to_ui64_r_minMag.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_add.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_div.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_eq.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_lt.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_mul.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_rem.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_roundToInt.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_sqrt.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_sub.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_to_f128M.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_to_f64.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f32_to_f128M.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f64_to_f128M.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f64_to_f16.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_add256M.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_addCarryM.c"
|
||||
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_addComplCarryM.c"
|
||||
|
@ -572,6 +587,7 @@ set(ZIG_STD_FILES
|
|||
"special/compiler_rt/floatuntidf.zig"
|
||||
"special/compiler_rt/muloti4.zig"
|
||||
"special/compiler_rt/index.zig"
|
||||
"special/compiler_rt/truncXfYf2.zig"
|
||||
"special/compiler_rt/udivmod.zig"
|
||||
"special/compiler_rt/udivmoddi4.zig"
|
||||
"special/compiler_rt/udivmodti4.zig"
|
||||
|
|
|
@ -258,6 +258,7 @@ struct ConstExprValue {
|
|||
// populated if special == ConstValSpecialStatic
|
||||
BigInt x_bigint;
|
||||
BigFloat x_bigfloat;
|
||||
float16_t x_f16;
|
||||
float x_f32;
|
||||
double x_f64;
|
||||
float128_t x_f128;
|
||||
|
@ -1598,6 +1599,7 @@ struct CodeGen {
|
|||
TypeTableEntry *entry_i128;
|
||||
TypeTableEntry *entry_isize;
|
||||
TypeTableEntry *entry_usize;
|
||||
TypeTableEntry *entry_f16;
|
||||
TypeTableEntry *entry_f32;
|
||||
TypeTableEntry *entry_f64;
|
||||
TypeTableEntry *entry_f128;
|
||||
|
|
|
@ -4668,6 +4668,13 @@ static uint32_t hash_const_val(ConstExprValue *const_val) {
|
|||
}
|
||||
case TypeTableEntryIdFloat:
|
||||
switch (const_val->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
{
|
||||
uint16_t result;
|
||||
static_assert(sizeof(result) == sizeof(const_val->data.x_f16), "");
|
||||
memcpy(&result, &const_val->data.x_f16, sizeof(result));
|
||||
return result * 65537u;
|
||||
}
|
||||
case 32:
|
||||
{
|
||||
uint32_t result;
|
||||
|
@ -5128,6 +5135,9 @@ void init_const_float(ConstExprValue *const_val, TypeTableEntry *type, double va
|
|||
bigfloat_init_64(&const_val->data.x_bigfloat, value);
|
||||
} else if (type->id == TypeTableEntryIdFloat) {
|
||||
switch (type->data.floating.bit_count) {
|
||||
case 16:
|
||||
const_val->data.x_f16 = zig_double_to_f16(value);
|
||||
break;
|
||||
case 32:
|
||||
const_val->data.x_f32 = value;
|
||||
break;
|
||||
|
@ -5441,6 +5451,8 @@ bool const_values_equal(ConstExprValue *a, ConstExprValue *b) {
|
|||
case TypeTableEntryIdFloat:
|
||||
assert(a->type->data.floating.bit_count == b->type->data.floating.bit_count);
|
||||
switch (a->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
return f16_eq(a->data.x_f16, b->data.x_f16);
|
||||
case 32:
|
||||
return a->data.x_f32 == b->data.x_f32;
|
||||
case 64:
|
||||
|
@ -5614,6 +5626,9 @@ void render_const_value(CodeGen *g, Buf *buf, ConstExprValue *const_val) {
|
|||
return;
|
||||
case TypeTableEntryIdFloat:
|
||||
switch (type_entry->data.floating.bit_count) {
|
||||
case 16:
|
||||
buf_appendf(buf, "%f", zig_f16_to_double(const_val->data.x_f16));
|
||||
return;
|
||||
case 32:
|
||||
buf_appendf(buf, "%f", const_val->data.x_f32);
|
||||
return;
|
||||
|
|
|
@ -18,6 +18,10 @@ void bigfloat_init_128(BigFloat *dest, float128_t x) {
|
|||
dest->value = x;
|
||||
}
|
||||
|
||||
void bigfloat_init_16(BigFloat *dest, float16_t x) {
|
||||
f16_to_f128M(x, &dest->value);
|
||||
}
|
||||
|
||||
void bigfloat_init_32(BigFloat *dest, float x) {
|
||||
float32_t f32_val;
|
||||
memcpy(&f32_val, &x, sizeof(float));
|
||||
|
@ -146,6 +150,10 @@ Cmp bigfloat_cmp(const BigFloat *op1, const BigFloat *op2) {
|
|||
}
|
||||
}
|
||||
|
||||
float16_t bigfloat_to_f16(const BigFloat *bigfloat) {
|
||||
return f128M_to_f16(&bigfloat->value);
|
||||
}
|
||||
|
||||
float bigfloat_to_f32(const BigFloat *bigfloat) {
|
||||
float32_t f32_value = f128M_to_f32(&bigfloat->value);
|
||||
float result;
|
||||
|
|
|
@ -22,6 +22,7 @@ struct BigFloat {
|
|||
|
||||
struct Buf;
|
||||
|
||||
void bigfloat_init_16(BigFloat *dest, float16_t x);
|
||||
void bigfloat_init_32(BigFloat *dest, float x);
|
||||
void bigfloat_init_64(BigFloat *dest, double x);
|
||||
void bigfloat_init_128(BigFloat *dest, float128_t x);
|
||||
|
@ -29,6 +30,7 @@ void bigfloat_init_bigfloat(BigFloat *dest, const BigFloat *x);
|
|||
void bigfloat_init_bigint(BigFloat *dest, const BigInt *op);
|
||||
int bigfloat_init_buf_base10(BigFloat *dest, const uint8_t *buf_ptr, size_t buf_len);
|
||||
|
||||
float16_t bigfloat_to_f16(const BigFloat *bigfloat);
|
||||
float bigfloat_to_f32(const BigFloat *bigfloat);
|
||||
double bigfloat_to_f64(const BigFloat *bigfloat);
|
||||
float128_t bigfloat_to_f128(const BigFloat *bigfloat);
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "os.hpp"
|
||||
#include "translate_c.hpp"
|
||||
#include "target.hpp"
|
||||
#include "util.hpp"
|
||||
#include "zig_llvm.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
@ -5211,6 +5212,8 @@ static LLVMValueRef gen_const_val(CodeGen *g, ConstExprValue *const_val, const c
|
|||
const_val->data.x_err_set->value, false);
|
||||
case TypeTableEntryIdFloat:
|
||||
switch (type_entry->data.floating.bit_count) {
|
||||
case 16:
|
||||
return LLVMConstReal(type_entry->type_ref, zig_f16_to_double(const_val->data.x_f16));
|
||||
case 32:
|
||||
return LLVMConstReal(type_entry->type_ref, const_val->data.x_f32);
|
||||
case 64:
|
||||
|
@ -6177,58 +6180,30 @@ static void define_builtin_types(CodeGen *g) {
|
|||
g->builtin_types.entry_usize = entry;
|
||||
}
|
||||
}
|
||||
{
|
||||
|
||||
auto add_fp_entry = [] (CodeGen *g,
|
||||
const char *name,
|
||||
uint32_t bit_count,
|
||||
LLVMTypeRef type_ref,
|
||||
TypeTableEntry **field) {
|
||||
TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdFloat);
|
||||
entry->type_ref = LLVMFloatType();
|
||||
buf_init_from_str(&entry->name, "f32");
|
||||
entry->data.floating.bit_count = 32;
|
||||
entry->type_ref = type_ref;
|
||||
buf_init_from_str(&entry->name, name);
|
||||
entry->data.floating.bit_count = bit_count;
|
||||
|
||||
uint64_t debug_size_in_bits = 8*LLVMStoreSizeOfType(g->target_data_ref, entry->type_ref);
|
||||
entry->di_type = ZigLLVMCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name),
|
||||
debug_size_in_bits,
|
||||
ZigLLVMEncoding_DW_ATE_float());
|
||||
g->builtin_types.entry_f32 = entry;
|
||||
*field = entry;
|
||||
g->primitive_type_table.put(&entry->name, entry);
|
||||
}
|
||||
{
|
||||
TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdFloat);
|
||||
entry->type_ref = LLVMDoubleType();
|
||||
buf_init_from_str(&entry->name, "f64");
|
||||
entry->data.floating.bit_count = 64;
|
||||
};
|
||||
add_fp_entry(g, "f16", 16, LLVMHalfType(), &g->builtin_types.entry_f16);
|
||||
add_fp_entry(g, "f32", 32, LLVMFloatType(), &g->builtin_types.entry_f32);
|
||||
add_fp_entry(g, "f64", 64, LLVMDoubleType(), &g->builtin_types.entry_f64);
|
||||
add_fp_entry(g, "f128", 128, LLVMFP128Type(), &g->builtin_types.entry_f128);
|
||||
add_fp_entry(g, "c_longdouble", 80, LLVMX86FP80Type(), &g->builtin_types.entry_c_longdouble);
|
||||
|
||||
uint64_t debug_size_in_bits = 8*LLVMStoreSizeOfType(g->target_data_ref, entry->type_ref);
|
||||
entry->di_type = ZigLLVMCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name),
|
||||
debug_size_in_bits,
|
||||
ZigLLVMEncoding_DW_ATE_float());
|
||||
g->builtin_types.entry_f64 = entry;
|
||||
g->primitive_type_table.put(&entry->name, entry);
|
||||
}
|
||||
{
|
||||
TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdFloat);
|
||||
entry->type_ref = LLVMFP128Type();
|
||||
buf_init_from_str(&entry->name, "f128");
|
||||
entry->data.floating.bit_count = 128;
|
||||
|
||||
uint64_t debug_size_in_bits = 8*LLVMStoreSizeOfType(g->target_data_ref, entry->type_ref);
|
||||
entry->di_type = ZigLLVMCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name),
|
||||
debug_size_in_bits,
|
||||
ZigLLVMEncoding_DW_ATE_float());
|
||||
g->builtin_types.entry_f128 = entry;
|
||||
g->primitive_type_table.put(&entry->name, entry);
|
||||
}
|
||||
{
|
||||
TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdFloat);
|
||||
entry->type_ref = LLVMX86FP80Type();
|
||||
buf_init_from_str(&entry->name, "c_longdouble");
|
||||
entry->data.floating.bit_count = 80;
|
||||
|
||||
uint64_t debug_size_in_bits = 8*LLVMStoreSizeOfType(g->target_data_ref, entry->type_ref);
|
||||
entry->di_type = ZigLLVMCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name),
|
||||
debug_size_in_bits,
|
||||
ZigLLVMEncoding_DW_ATE_float());
|
||||
g->builtin_types.entry_c_longdouble = entry;
|
||||
g->primitive_type_table.put(&entry->name, entry);
|
||||
}
|
||||
{
|
||||
TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdVoid);
|
||||
entry->type_ref = LLVMVoidType();
|
||||
|
|
156
src/ir.cpp
156
src/ir.cpp
|
@ -11,9 +11,10 @@
|
|||
#include "ir.hpp"
|
||||
#include "ir_print.hpp"
|
||||
#include "os.hpp"
|
||||
#include "translate_c.hpp"
|
||||
#include "range_set.hpp"
|
||||
#include "softfloat.hpp"
|
||||
#include "translate_c.hpp"
|
||||
#include "util.hpp"
|
||||
|
||||
struct IrExecContext {
|
||||
ConstExprValue *mem_slot_list;
|
||||
|
@ -7238,6 +7239,11 @@ static bool float_has_fraction(ConstExprValue *const_val) {
|
|||
return bigfloat_has_fraction(&const_val->data.x_bigfloat);
|
||||
} else if (const_val->type->id == TypeTableEntryIdFloat) {
|
||||
switch (const_val->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
{
|
||||
float16_t floored = f16_roundToInt(const_val->data.x_f16, softfloat_round_minMag, false);
|
||||
return !f16_eq(floored, const_val->data.x_f16);
|
||||
}
|
||||
case 32:
|
||||
return floorf(const_val->data.x_f32) != const_val->data.x_f32;
|
||||
case 64:
|
||||
|
@ -7261,6 +7267,9 @@ static void float_append_buf(Buf *buf, ConstExprValue *const_val) {
|
|||
bigfloat_append_buf(buf, &const_val->data.x_bigfloat);
|
||||
} else if (const_val->type->id == TypeTableEntryIdFloat) {
|
||||
switch (const_val->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
buf_appendf(buf, "%f", zig_f16_to_double(const_val->data.x_f16));
|
||||
break;
|
||||
case 32:
|
||||
buf_appendf(buf, "%f", const_val->data.x_f32);
|
||||
break;
|
||||
|
@ -7296,6 +7305,17 @@ static void float_init_bigint(BigInt *bigint, ConstExprValue *const_val) {
|
|||
bigint_init_bigfloat(bigint, &const_val->data.x_bigfloat);
|
||||
} else if (const_val->type->id == TypeTableEntryIdFloat) {
|
||||
switch (const_val->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
{
|
||||
double x = zig_f16_to_double(const_val->data.x_f16);
|
||||
if (x >= 0) {
|
||||
bigint_init_unsigned(bigint, (uint64_t)x);
|
||||
} else {
|
||||
bigint_init_unsigned(bigint, (uint64_t)-x);
|
||||
bigint->is_negative = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 32:
|
||||
if (const_val->data.x_f32 >= 0) {
|
||||
bigint_init_unsigned(bigint, (uint64_t)(const_val->data.x_f32));
|
||||
|
@ -7332,6 +7352,9 @@ static void float_init_bigfloat(ConstExprValue *dest_val, BigFloat *bigfloat) {
|
|||
bigfloat_init_bigfloat(&dest_val->data.x_bigfloat, bigfloat);
|
||||
} else if (dest_val->type->id == TypeTableEntryIdFloat) {
|
||||
switch (dest_val->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
dest_val->data.x_f16 = bigfloat_to_f16(bigfloat);
|
||||
break;
|
||||
case 32:
|
||||
dest_val->data.x_f32 = bigfloat_to_f32(bigfloat);
|
||||
break;
|
||||
|
@ -7349,11 +7372,39 @@ static void float_init_bigfloat(ConstExprValue *dest_val, BigFloat *bigfloat) {
|
|||
}
|
||||
}
|
||||
|
||||
static void float_init_f16(ConstExprValue *dest_val, float16_t x) {
|
||||
if (dest_val->type->id == TypeTableEntryIdComptimeFloat) {
|
||||
bigfloat_init_16(&dest_val->data.x_bigfloat, x);
|
||||
} else if (dest_val->type->id == TypeTableEntryIdFloat) {
|
||||
switch (dest_val->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
dest_val->data.x_f16 = x;
|
||||
break;
|
||||
case 32:
|
||||
dest_val->data.x_f32 = zig_f16_to_double(x);
|
||||
break;
|
||||
case 64:
|
||||
dest_val->data.x_f64 = zig_f16_to_double(x);
|
||||
break;
|
||||
case 128:
|
||||
f16_to_f128M(x, &dest_val->data.x_f128);
|
||||
break;
|
||||
default:
|
||||
zig_unreachable();
|
||||
}
|
||||
} else {
|
||||
zig_unreachable();
|
||||
}
|
||||
}
|
||||
|
||||
static void float_init_f32(ConstExprValue *dest_val, float x) {
|
||||
if (dest_val->type->id == TypeTableEntryIdComptimeFloat) {
|
||||
bigfloat_init_32(&dest_val->data.x_bigfloat, x);
|
||||
} else if (dest_val->type->id == TypeTableEntryIdFloat) {
|
||||
switch (dest_val->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
dest_val->data.x_f16 = zig_double_to_f16(x);
|
||||
break;
|
||||
case 32:
|
||||
dest_val->data.x_f32 = x;
|
||||
break;
|
||||
|
@ -7380,6 +7431,9 @@ static void float_init_f64(ConstExprValue *dest_val, double x) {
|
|||
bigfloat_init_64(&dest_val->data.x_bigfloat, x);
|
||||
} else if (dest_val->type->id == TypeTableEntryIdFloat) {
|
||||
switch (dest_val->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
dest_val->data.x_f16 = zig_double_to_f16(x);
|
||||
break;
|
||||
case 32:
|
||||
dest_val->data.x_f32 = x;
|
||||
break;
|
||||
|
@ -7406,6 +7460,9 @@ static void float_init_f128(ConstExprValue *dest_val, float128_t x) {
|
|||
bigfloat_init_128(&dest_val->data.x_bigfloat, x);
|
||||
} else if (dest_val->type->id == TypeTableEntryIdFloat) {
|
||||
switch (dest_val->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
dest_val->data.x_f16 = f128M_to_f16(&x);
|
||||
break;
|
||||
case 32:
|
||||
{
|
||||
float32_t f32_val = f128M_to_f32(&x);
|
||||
|
@ -7436,6 +7493,9 @@ static void float_init_float(ConstExprValue *dest_val, ConstExprValue *src_val)
|
|||
float_init_bigfloat(dest_val, &src_val->data.x_bigfloat);
|
||||
} else if (src_val->type->id == TypeTableEntryIdFloat) {
|
||||
switch (src_val->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
float_init_f16(dest_val, src_val->data.x_f16);
|
||||
break;
|
||||
case 32:
|
||||
float_init_f32(dest_val, src_val->data.x_f32);
|
||||
break;
|
||||
|
@ -7459,6 +7519,14 @@ static Cmp float_cmp(ConstExprValue *op1, ConstExprValue *op2) {
|
|||
return bigfloat_cmp(&op1->data.x_bigfloat, &op2->data.x_bigfloat);
|
||||
} else if (op1->type->id == TypeTableEntryIdFloat) {
|
||||
switch (op1->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
if (f16_lt(op1->data.x_f16, op2->data.x_f16)) {
|
||||
return CmpLT;
|
||||
} else if (f16_lt(op2->data.x_f16, op1->data.x_f16)) {
|
||||
return CmpGT;
|
||||
} else {
|
||||
return CmpEQ;
|
||||
}
|
||||
case 32:
|
||||
if (op1->data.x_f32 > op2->data.x_f32) {
|
||||
return CmpGT;
|
||||
|
@ -7496,6 +7564,17 @@ static Cmp float_cmp_zero(ConstExprValue *op) {
|
|||
return bigfloat_cmp_zero(&op->data.x_bigfloat);
|
||||
} else if (op->type->id == TypeTableEntryIdFloat) {
|
||||
switch (op->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
{
|
||||
const float16_t zero = zig_double_to_f16(0);
|
||||
if (f16_lt(op->data.x_f16, zero)) {
|
||||
return CmpLT;
|
||||
} else if (f16_lt(zero, op->data.x_f16)) {
|
||||
return CmpGT;
|
||||
} else {
|
||||
return CmpEQ;
|
||||
}
|
||||
}
|
||||
case 32:
|
||||
if (op->data.x_f32 < 0.0) {
|
||||
return CmpLT;
|
||||
|
@ -7537,6 +7616,9 @@ static void float_add(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal
|
|||
bigfloat_add(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
|
||||
} else if (op1->type->id == TypeTableEntryIdFloat) {
|
||||
switch (op1->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
out_val->data.x_f16 = f16_add(op1->data.x_f16, op2->data.x_f16);
|
||||
return;
|
||||
case 32:
|
||||
out_val->data.x_f32 = op1->data.x_f32 + op2->data.x_f32;
|
||||
return;
|
||||
|
@ -7561,6 +7643,9 @@ static void float_sub(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal
|
|||
bigfloat_sub(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
|
||||
} else if (op1->type->id == TypeTableEntryIdFloat) {
|
||||
switch (op1->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
out_val->data.x_f16 = f16_sub(op1->data.x_f16, op2->data.x_f16);
|
||||
return;
|
||||
case 32:
|
||||
out_val->data.x_f32 = op1->data.x_f32 - op2->data.x_f32;
|
||||
return;
|
||||
|
@ -7585,6 +7670,9 @@ static void float_mul(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal
|
|||
bigfloat_mul(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
|
||||
} else if (op1->type->id == TypeTableEntryIdFloat) {
|
||||
switch (op1->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
out_val->data.x_f16 = f16_mul(op1->data.x_f16, op2->data.x_f16);
|
||||
return;
|
||||
case 32:
|
||||
out_val->data.x_f32 = op1->data.x_f32 * op2->data.x_f32;
|
||||
return;
|
||||
|
@ -7609,6 +7697,9 @@ static void float_div(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal
|
|||
bigfloat_div(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
|
||||
} else if (op1->type->id == TypeTableEntryIdFloat) {
|
||||
switch (op1->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
out_val->data.x_f16 = f16_div(op1->data.x_f16, op2->data.x_f16);
|
||||
return;
|
||||
case 32:
|
||||
out_val->data.x_f32 = op1->data.x_f32 / op2->data.x_f32;
|
||||
return;
|
||||
|
@ -7633,21 +7724,15 @@ static void float_div_trunc(ConstExprValue *out_val, ConstExprValue *op1, ConstE
|
|||
bigfloat_div_trunc(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
|
||||
} else if (op1->type->id == TypeTableEntryIdFloat) {
|
||||
switch (op1->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
out_val->data.x_f16 = f16_div(op1->data.x_f16, op2->data.x_f16);
|
||||
out_val->data.x_f16 = f16_roundToInt(out_val->data.x_f16, softfloat_round_minMag, false);
|
||||
return;
|
||||
case 32:
|
||||
out_val->data.x_f32 = op1->data.x_f32 / op2->data.x_f32;
|
||||
if (out_val->data.x_f32 >= 0.0) {
|
||||
out_val->data.x_f32 = floorf(out_val->data.x_f32);
|
||||
} else {
|
||||
out_val->data.x_f32 = ceilf(out_val->data.x_f32);
|
||||
}
|
||||
out_val->data.x_f32 = truncf(op1->data.x_f32 / op2->data.x_f32);
|
||||
return;
|
||||
case 64:
|
||||
out_val->data.x_f64 = op1->data.x_f64 / op2->data.x_f64;
|
||||
if (out_val->data.x_f64 >= 0.0) {
|
||||
out_val->data.x_f64 = floor(out_val->data.x_f64);
|
||||
} else {
|
||||
out_val->data.x_f64 = ceil(out_val->data.x_f64);
|
||||
}
|
||||
out_val->data.x_f64 = trunc(op1->data.x_f64 / op2->data.x_f64);
|
||||
return;
|
||||
case 128:
|
||||
f128M_div(&op1->data.x_f128, &op2->data.x_f128, &out_val->data.x_f128);
|
||||
|
@ -7668,6 +7753,10 @@ static void float_div_floor(ConstExprValue *out_val, ConstExprValue *op1, ConstE
|
|||
bigfloat_div_floor(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
|
||||
} else if (op1->type->id == TypeTableEntryIdFloat) {
|
||||
switch (op1->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
out_val->data.x_f16 = f16_div(op1->data.x_f16, op2->data.x_f16);
|
||||
out_val->data.x_f16 = f16_roundToInt(out_val->data.x_f16, softfloat_round_min, false);
|
||||
return;
|
||||
case 32:
|
||||
out_val->data.x_f32 = floorf(op1->data.x_f32 / op2->data.x_f32);
|
||||
return;
|
||||
|
@ -7693,6 +7782,9 @@ static void float_rem(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal
|
|||
bigfloat_rem(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
|
||||
} else if (op1->type->id == TypeTableEntryIdFloat) {
|
||||
switch (op1->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
out_val->data.x_f16 = f16_rem(op1->data.x_f16, op2->data.x_f16);
|
||||
return;
|
||||
case 32:
|
||||
out_val->data.x_f32 = fmodf(op1->data.x_f32, op2->data.x_f32);
|
||||
return;
|
||||
|
@ -7710,6 +7802,16 @@ static void float_rem(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal
|
|||
}
|
||||
}
|
||||
|
||||
// c = a - b * trunc(a / b)
|
||||
static float16_t zig_f16_mod(float16_t a, float16_t b) {
|
||||
float16_t c;
|
||||
c = f16_div(a, b);
|
||||
c = f16_roundToInt(c, softfloat_round_min, true);
|
||||
c = f16_mul(b, c);
|
||||
c = f16_sub(a, c);
|
||||
return c;
|
||||
}
|
||||
|
||||
// c = a - b * trunc(a / b)
|
||||
static void zig_f128M_mod(const float128_t* a, const float128_t* b, float128_t* c) {
|
||||
f128M_div(a, b, c);
|
||||
|
@ -7725,6 +7827,9 @@ static void float_mod(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal
|
|||
bigfloat_mod(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
|
||||
} else if (op1->type->id == TypeTableEntryIdFloat) {
|
||||
switch (op1->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
out_val->data.x_f16 = zig_f16_mod(op1->data.x_f16, op2->data.x_f16);
|
||||
return;
|
||||
case 32:
|
||||
out_val->data.x_f32 = fmodf(fmodf(op1->data.x_f32, op2->data.x_f32) + op2->data.x_f32, op2->data.x_f32);
|
||||
return;
|
||||
|
@ -7748,6 +7853,12 @@ static void float_negate(ConstExprValue *out_val, ConstExprValue *op) {
|
|||
bigfloat_negate(&out_val->data.x_bigfloat, &op->data.x_bigfloat);
|
||||
} else if (op->type->id == TypeTableEntryIdFloat) {
|
||||
switch (op->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
{
|
||||
const float16_t zero = zig_double_to_f16(0);
|
||||
out_val->data.x_f16 = f16_sub(zero, op->data.x_f16);
|
||||
return;
|
||||
}
|
||||
case 32:
|
||||
out_val->data.x_f32 = -op->data.x_f32;
|
||||
return;
|
||||
|
@ -7770,6 +7881,9 @@ static void float_negate(ConstExprValue *out_val, ConstExprValue *op) {
|
|||
void float_write_ieee597(ConstExprValue *op, uint8_t *buf, bool is_big_endian) {
|
||||
if (op->type->id == TypeTableEntryIdFloat) {
|
||||
switch (op->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
memcpy(buf, &op->data.x_f16, 2); // TODO wrong when compiler is big endian
|
||||
return;
|
||||
case 32:
|
||||
memcpy(buf, &op->data.x_f32, 4); // TODO wrong when compiler is big endian
|
||||
return;
|
||||
|
@ -7790,6 +7904,9 @@ void float_write_ieee597(ConstExprValue *op, uint8_t *buf, bool is_big_endian) {
|
|||
void float_read_ieee597(ConstExprValue *val, uint8_t *buf, bool is_big_endian) {
|
||||
if (val->type->id == TypeTableEntryIdFloat) {
|
||||
switch (val->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
memcpy(&val->data.x_f16, buf, 2); // TODO wrong when compiler is big endian
|
||||
return;
|
||||
case 32:
|
||||
memcpy(&val->data.x_f32, buf, 4); // TODO wrong when compiler is big endian
|
||||
return;
|
||||
|
@ -8817,6 +8934,9 @@ static bool eval_const_expr_implicit_cast(IrAnalyze *ira, IrInstruction *source_
|
|||
if (other_val->type->id == TypeTableEntryIdComptimeFloat) {
|
||||
assert(new_type->id == TypeTableEntryIdFloat);
|
||||
switch (new_type->data.floating.bit_count) {
|
||||
case 16:
|
||||
const_val->data.x_f16 = bigfloat_to_f16(&other_val->data.x_bigfloat);
|
||||
break;
|
||||
case 32:
|
||||
const_val->data.x_f32 = bigfloat_to_f32(&other_val->data.x_bigfloat);
|
||||
break;
|
||||
|
@ -8847,6 +8967,9 @@ static bool eval_const_expr_implicit_cast(IrAnalyze *ira, IrInstruction *source_
|
|||
BigFloat bigfloat;
|
||||
bigfloat_init_bigint(&bigfloat, &other_val->data.x_bigint);
|
||||
switch (new_type->data.floating.bit_count) {
|
||||
case 16:
|
||||
const_val->data.x_f16 = bigfloat_to_f16(&bigfloat);
|
||||
break;
|
||||
case 32:
|
||||
const_val->data.x_f32 = bigfloat_to_f32(&bigfloat);
|
||||
break;
|
||||
|
@ -20104,6 +20227,9 @@ static TypeTableEntry *ir_analyze_instruction_sqrt(IrAnalyze *ira, IrInstruction
|
|||
bigfloat_sqrt(&out_val->data.x_bigfloat, &val->data.x_bigfloat);
|
||||
} else if (float_type->id == TypeTableEntryIdFloat) {
|
||||
switch (float_type->data.floating.bit_count) {
|
||||
case 16:
|
||||
out_val->data.x_f16 = f16_sqrt(val->data.x_f16);
|
||||
break;
|
||||
case 32:
|
||||
out_val->data.x_f32 = sqrtf(val->data.x_f32);
|
||||
break;
|
||||
|
@ -20124,7 +20250,9 @@ static TypeTableEntry *ir_analyze_instruction_sqrt(IrAnalyze *ira, IrInstruction
|
|||
}
|
||||
|
||||
assert(float_type->id == TypeTableEntryIdFloat);
|
||||
if (float_type->data.floating.bit_count != 32 && float_type->data.floating.bit_count != 64) {
|
||||
if (float_type->data.floating.bit_count != 16 &&
|
||||
float_type->data.floating.bit_count != 32 &&
|
||||
float_type->data.floating.bit_count != 64) {
|
||||
ir_add_error(ira, instruction->type, buf_sprintf("compiler TODO: add implementation of sqrt for '%s'", buf_ptr(&float_type->name)));
|
||||
return ira->codegen->builtin_types.entry_invalid;
|
||||
}
|
||||
|
|
19
src/util.hpp
19
src/util.hpp
|
@ -31,6 +31,8 @@
|
|||
|
||||
#endif
|
||||
|
||||
#include "softfloat.hpp"
|
||||
|
||||
#define BREAKPOINT __asm("int $0x03")
|
||||
|
||||
ATTRIBUTE_COLD
|
||||
|
@ -165,4 +167,21 @@ static inline uint8_t log2_u64(uint64_t x) {
|
|||
return (63 - clzll(x));
|
||||
}
|
||||
|
||||
static inline float16_t zig_double_to_f16(double x) {
|
||||
float64_t y;
|
||||
static_assert(sizeof(x) == sizeof(y), "");
|
||||
memcpy(&y, &x, sizeof(x));
|
||||
return f64_to_f16(y);
|
||||
}
|
||||
|
||||
|
||||
// Return value is safe to coerce to float even when |x| is NaN or Infinity.
|
||||
static inline double zig_f16_to_double(float16_t x) {
|
||||
float64_t y = f16_to_f64(x);
|
||||
double z;
|
||||
static_assert(sizeof(y) == sizeof(z), "");
|
||||
memcpy(&z, &y, sizeof(y));
|
||||
return z;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -10,9 +10,13 @@ pub extern fn __extendsftf2(a: f32) f128 {
|
|||
return extendXfYf2(f128, f32, a);
|
||||
}
|
||||
|
||||
pub extern fn __extendhfsf2(a: u16) f32 {
|
||||
return extendXfYf2(f32, f16, @bitCast(f16, a));
|
||||
}
|
||||
|
||||
const CHAR_BIT = 8;
|
||||
|
||||
pub fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t {
|
||||
inline fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t {
|
||||
const src_rep_t = @IntType(false, @typeInfo(src_t).Float.bits);
|
||||
const dst_rep_t = @IntType(false, @typeInfo(dst_t).Float.bits);
|
||||
const srcSigBits = std.math.floatMantissaBits(src_t);
|
||||
|
@ -22,22 +26,22 @@ pub fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t {
|
|||
|
||||
// Various constants whose values follow from the type parameters.
|
||||
// Any reasonable optimizer will fold and propagate all of these.
|
||||
const srcBits: i32 = @sizeOf(src_t) * CHAR_BIT;
|
||||
const srcExpBits: i32 = srcBits - srcSigBits - 1;
|
||||
const srcInfExp: i32 = (1 << srcExpBits) - 1;
|
||||
const srcExpBias: i32 = srcInfExp >> 1;
|
||||
const srcBits = @sizeOf(src_t) * CHAR_BIT;
|
||||
const srcExpBits = srcBits - srcSigBits - 1;
|
||||
const srcInfExp = (1 << srcExpBits) - 1;
|
||||
const srcExpBias = srcInfExp >> 1;
|
||||
|
||||
const srcMinNormal: src_rep_t = src_rep_t(1) << srcSigBits;
|
||||
const srcInfinity: src_rep_t = src_rep_t(@bitCast(u32, srcInfExp)) << srcSigBits;
|
||||
const srcSignMask: src_rep_t = src_rep_t(1) << @intCast(SrcShift, srcSigBits +% srcExpBits);
|
||||
const srcAbsMask: src_rep_t = srcSignMask -% 1;
|
||||
const srcQNaN: src_rep_t = src_rep_t(1) << @intCast(SrcShift, srcSigBits -% 1);
|
||||
const srcNaNCode: src_rep_t = srcQNaN -% 1;
|
||||
const srcMinNormal = 1 << srcSigBits;
|
||||
const srcInfinity = srcInfExp << srcSigBits;
|
||||
const srcSignMask = 1 << (srcSigBits + srcExpBits);
|
||||
const srcAbsMask = srcSignMask - 1;
|
||||
const srcQNaN = 1 << (srcSigBits - 1);
|
||||
const srcNaNCode = srcQNaN - 1;
|
||||
|
||||
const dstBits: i32 = @sizeOf(dst_t) * CHAR_BIT;
|
||||
const dstExpBits: i32 = dstBits - dstSigBits - 1;
|
||||
const dstInfExp: i32 = (1 << dstExpBits) - 1;
|
||||
const dstExpBias: i32 = dstInfExp >> 1;
|
||||
const dstBits = @sizeOf(dst_t) * CHAR_BIT;
|
||||
const dstExpBits = dstBits - dstSigBits - 1;
|
||||
const dstInfExp = (1 << dstExpBits) - 1;
|
||||
const dstExpBias = dstInfExp >> 1;
|
||||
|
||||
const dstMinNormal: dst_rep_t = dst_rep_t(1) << dstSigBits;
|
||||
|
||||
|
@ -47,38 +51,36 @@ pub fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t {
|
|||
const sign: src_rep_t = aRep & srcSignMask;
|
||||
var absResult: dst_rep_t = undefined;
|
||||
|
||||
// If @sizeOf(src_rep_t) < @sizeOf(int), the subtraction result is promoted
|
||||
// to (signed) int. To avoid that, explicitly cast to src_rep_t.
|
||||
if ((src_rep_t)(aAbs -% srcMinNormal) < srcInfinity -% srcMinNormal) {
|
||||
if (aAbs -% srcMinNormal < srcInfinity - srcMinNormal) {
|
||||
// a is a normal number.
|
||||
// Extend to the destination type by shifting the significand and
|
||||
// exponent into the proper position and rebiasing the exponent.
|
||||
absResult = dst_rep_t(aAbs) << (dstSigBits -% srcSigBits);
|
||||
absResult += dst_rep_t(@bitCast(u32, dstExpBias -% srcExpBias)) << dstSigBits;
|
||||
absResult = dst_rep_t(aAbs) << (dstSigBits - srcSigBits);
|
||||
absResult += (dstExpBias - srcExpBias) << dstSigBits;
|
||||
} else if (aAbs >= srcInfinity) {
|
||||
// a is NaN or infinity.
|
||||
// Conjure the result by beginning with infinity, then setting the qNaN
|
||||
// bit (if needed) and right-aligning the rest of the trailing NaN
|
||||
// payload field.
|
||||
absResult = dst_rep_t(@bitCast(u32, dstInfExp)) << dstSigBits;
|
||||
absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits);
|
||||
absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits);
|
||||
absResult = dstInfExp << dstSigBits;
|
||||
absResult |= dst_rep_t(aAbs & srcQNaN) << (dstSigBits - srcSigBits);
|
||||
absResult |= dst_rep_t(aAbs & srcNaNCode) << (dstSigBits - srcSigBits);
|
||||
} else if (aAbs != 0) {
|
||||
// a is denormal.
|
||||
// renormalize the significand and clear the leading bit, then insert
|
||||
// the correct adjusted exponent in the destination type.
|
||||
const scale: i32 = @clz(aAbs) - @clz(srcMinNormal);
|
||||
const scale: u32 = @clz(aAbs) - @clz(src_rep_t(srcMinNormal));
|
||||
absResult = dst_rep_t(aAbs) << @intCast(DstShift, dstSigBits - srcSigBits + scale);
|
||||
absResult ^= dstMinNormal;
|
||||
const resultExponent: i32 = dstExpBias - srcExpBias - scale + 1;
|
||||
absResult |= dst_rep_t(@bitCast(u32, resultExponent)) << @intCast(DstShift, dstSigBits);
|
||||
const resultExponent: u32 = dstExpBias - srcExpBias - scale + 1;
|
||||
absResult |= @intCast(dst_rep_t, resultExponent) << dstSigBits;
|
||||
} else {
|
||||
// a is zero.
|
||||
absResult = 0;
|
||||
}
|
||||
|
||||
// Apply the signbit to (dst_t)abs(a).
|
||||
const result: dst_rep_t align(@alignOf(dst_t)) = absResult | dst_rep_t(sign) << @intCast(DstShift, dstBits - srcBits);
|
||||
const result: dst_rep_t align(@alignOf(dst_t)) = absResult | dst_rep_t(sign) << (dstBits - srcBits);
|
||||
return @bitCast(dst_t, result);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
const __extenddftf2 = @import("extendXfYf2.zig").__extenddftf2;
|
||||
const __extendhfsf2 = @import("extendXfYf2.zig").__extendhfsf2;
|
||||
const __extendsftf2 = @import("extendXfYf2.zig").__extendsftf2;
|
||||
const assert = @import("std").debug.assert;
|
||||
|
||||
|
@ -24,6 +25,22 @@ fn test__extenddftf2(a: f64, expectedHi: u64, expectedLo: u64) void {
|
|||
@panic("__extenddftf2 test failure");
|
||||
}
|
||||
|
||||
fn test__extendhfsf2(a: u16, expected: u32) void {
|
||||
const x = __extendhfsf2(a);
|
||||
const rep = @bitCast(u32, x);
|
||||
|
||||
if (rep == expected) {
|
||||
if (rep & 0x7fffffff > 0x7f800000) {
|
||||
return; // NaN is always unequal.
|
||||
}
|
||||
if (x == @bitCast(f32, expected)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@panic("__extendhfsf2 test failure");
|
||||
}
|
||||
|
||||
fn test__extendsftf2(a: f32, expectedHi: u64, expectedLo: u64) void {
|
||||
const x = __extendsftf2(a);
|
||||
|
||||
|
@ -68,6 +85,35 @@ test "extenddftf2" {
|
|||
test__extenddftf2(0x1.edcba987654321fp-45, 0x3fd2edcba9876543, 0x2000000000000000);
|
||||
}
|
||||
|
||||
test "extendhfsf2" {
|
||||
test__extendhfsf2(0x7e00, 0x7fc00000); // qNaN
|
||||
test__extendhfsf2(0x7f00, 0x7fe00000); // sNaN
|
||||
|
||||
test__extendhfsf2(0, 0); // 0
|
||||
test__extendhfsf2(0x8000, 0x80000000); // -0
|
||||
|
||||
test__extendhfsf2(0x7c00, 0x7f800000); // inf
|
||||
test__extendhfsf2(0xfc00, 0xff800000); // -inf
|
||||
|
||||
test__extendhfsf2(0x0001, 0x33800000); // denormal (min), 2**-24
|
||||
test__extendhfsf2(0x8001, 0xb3800000); // denormal (min), -2**-24
|
||||
|
||||
test__extendhfsf2(0x03ff, 0x387fc000); // denormal (max), 2**-14 - 2**-24
|
||||
test__extendhfsf2(0x83ff, 0xb87fc000); // denormal (max), -2**-14 + 2**-24
|
||||
|
||||
test__extendhfsf2(0x0400, 0x38800000); // normal (min), 2**-14
|
||||
test__extendhfsf2(0x8400, 0xb8800000); // normal (min), -2**-14
|
||||
|
||||
test__extendhfsf2(0x7bff, 0x477fe000); // normal (max), 65504
|
||||
test__extendhfsf2(0xfbff, 0xc77fe000); // normal (max), -65504
|
||||
|
||||
test__extendhfsf2(0x3c01, 0x3f802000); // normal, 1 + 2**-10
|
||||
test__extendhfsf2(0xbc01, 0xbf802000); // normal, -1 - 2**-10
|
||||
|
||||
test__extendhfsf2(0x3555, 0x3eaaa000); // normal, approx. 1/3
|
||||
test__extendhfsf2(0xb555, 0xbeaaa000); // normal, approx. -1/3
|
||||
}
|
||||
|
||||
test "extendsftf2" {
|
||||
// qNaN
|
||||
test__extendsftf2(makeQNaN32(), 0x7fff800000000000, 0x0);
|
||||
|
|
|
@ -15,6 +15,8 @@ comptime {
|
|||
@export("__lttf2", @import("comparetf2.zig").__letf2, linkage);
|
||||
@export("__netf2", @import("comparetf2.zig").__letf2, linkage);
|
||||
@export("__gttf2", @import("comparetf2.zig").__getf2, linkage);
|
||||
@export("__gnu_h2f_ieee", @import("extendXfYf2.zig").__extendhfsf2, linkage);
|
||||
@export("__gnu_f2h_ieee", @import("truncXfYf2.zig").__truncsfhf2, linkage);
|
||||
}
|
||||
|
||||
@export("__unordtf2", @import("comparetf2.zig").__unordtf2, linkage);
|
||||
|
@ -22,6 +24,9 @@ comptime {
|
|||
@export("__floatuntidf", @import("floatuntidf.zig").__floatuntidf, linkage);
|
||||
@export("__extenddftf2", @import("extendXfYf2.zig").__extenddftf2, linkage);
|
||||
@export("__extendsftf2", @import("extendXfYf2.zig").__extendsftf2, linkage);
|
||||
@export("__extendhfsf2", @import("extendXfYf2.zig").__extendhfsf2, linkage);
|
||||
|
||||
@export("__truncsfhf2", @import("truncXfYf2.zig").__truncsfhf2, linkage);
|
||||
|
||||
@export("__fixunssfsi", @import("fixunssfsi.zig").__fixunssfsi, linkage);
|
||||
@export("__fixunssfdi", @import("fixunssfdi.zig").__fixunssfdi, linkage);
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
const std = @import("std");
|
||||
|
||||
pub extern fn __truncsfhf2(a: f32) u16 {
|
||||
return @bitCast(u16, truncXfYf2(f16, f32, a));
|
||||
}
|
||||
|
||||
const CHAR_BIT = 8;
|
||||
|
||||
inline fn truncXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t {
|
||||
const src_rep_t = @IntType(false, @typeInfo(src_t).Float.bits);
|
||||
const dst_rep_t = @IntType(false, @typeInfo(dst_t).Float.bits);
|
||||
const srcSigBits = std.math.floatMantissaBits(src_t);
|
||||
const dstSigBits = std.math.floatMantissaBits(dst_t);
|
||||
const SrcShift = std.math.Log2Int(src_rep_t);
|
||||
const DstShift = std.math.Log2Int(dst_rep_t);
|
||||
|
||||
// Various constants whose values follow from the type parameters.
|
||||
// Any reasonable optimizer will fold and propagate all of these.
|
||||
const srcBits = @sizeOf(src_t) * CHAR_BIT;
|
||||
const srcExpBits = srcBits - srcSigBits - 1;
|
||||
const srcInfExp = (1 << srcExpBits) - 1;
|
||||
const srcExpBias = srcInfExp >> 1;
|
||||
|
||||
const srcMinNormal = 1 << srcSigBits;
|
||||
const srcSignificandMask = srcMinNormal - 1;
|
||||
const srcInfinity = srcInfExp << srcSigBits;
|
||||
const srcSignMask = 1 << (srcSigBits + srcExpBits);
|
||||
const srcAbsMask = srcSignMask - 1;
|
||||
const roundMask = (1 << (srcSigBits - dstSigBits)) - 1;
|
||||
const halfway = 1 << (srcSigBits - dstSigBits - 1);
|
||||
const srcQNaN = 1 << (srcSigBits - 1);
|
||||
const srcNaNCode = srcQNaN - 1;
|
||||
|
||||
const dstBits = @sizeOf(dst_t) * CHAR_BIT;
|
||||
const dstExpBits = dstBits - dstSigBits - 1;
|
||||
const dstInfExp = (1 << dstExpBits) - 1;
|
||||
const dstExpBias = dstInfExp >> 1;
|
||||
|
||||
const underflowExponent = srcExpBias + 1 - dstExpBias;
|
||||
const overflowExponent = srcExpBias + dstInfExp - dstExpBias;
|
||||
const underflow = underflowExponent << srcSigBits;
|
||||
const overflow = overflowExponent << srcSigBits;
|
||||
|
||||
const dstQNaN = 1 << (dstSigBits - 1);
|
||||
const dstNaNCode = dstQNaN - 1;
|
||||
|
||||
// Break a into a sign and representation of the absolute value
|
||||
const aRep: src_rep_t = @bitCast(src_rep_t, a);
|
||||
const aAbs: src_rep_t = aRep & srcAbsMask;
|
||||
const sign: src_rep_t = aRep & srcSignMask;
|
||||
var absResult: dst_rep_t = undefined;
|
||||
|
||||
if (aAbs -% underflow < aAbs -% overflow) {
|
||||
// The exponent of a is within the range of normal numbers in the
|
||||
// destination format. We can convert by simply right-shifting with
|
||||
// rounding and adjusting the exponent.
|
||||
absResult = @truncate(dst_rep_t, aAbs >> (srcSigBits - dstSigBits));
|
||||
absResult -%= dst_rep_t(srcExpBias - dstExpBias) << dstSigBits;
|
||||
|
||||
const roundBits: src_rep_t = aAbs & roundMask;
|
||||
if (roundBits > halfway) {
|
||||
// Round to nearest
|
||||
absResult += 1;
|
||||
} else if (roundBits == halfway) {
|
||||
// Ties to even
|
||||
absResult += absResult & 1;
|
||||
}
|
||||
} else if (aAbs > srcInfinity) {
|
||||
// a is NaN.
|
||||
// Conjure the result by beginning with infinity, setting the qNaN
|
||||
// bit and inserting the (truncated) trailing NaN field.
|
||||
absResult = @intCast(dst_rep_t, dstInfExp) << dstSigBits;
|
||||
absResult |= dstQNaN;
|
||||
absResult |= @intCast(dst_rep_t, ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode);
|
||||
} else if (aAbs >= overflow) {
|
||||
// a overflows to infinity.
|
||||
absResult = @intCast(dst_rep_t, dstInfExp) << dstSigBits;
|
||||
} else {
|
||||
// a underflows on conversion to the destination type or is an exact
|
||||
// zero. The result may be a denormal or zero. Extract the exponent
|
||||
// to get the shift amount for the denormalization.
|
||||
const aExp: u32 = aAbs >> srcSigBits;
|
||||
const shift: u32 = srcExpBias - dstExpBias - aExp + 1;
|
||||
|
||||
const significand: src_rep_t = (aRep & srcSignificandMask) | srcMinNormal;
|
||||
|
||||
// Right shift by the denormalization amount with sticky.
|
||||
if (shift > srcSigBits) {
|
||||
absResult = 0;
|
||||
} else {
|
||||
const sticky: src_rep_t = significand << @intCast(SrcShift, srcBits - shift);
|
||||
const denormalizedSignificand: src_rep_t = significand >> @intCast(SrcShift, shift) | sticky;
|
||||
absResult = @intCast(dst_rep_t, denormalizedSignificand >> (srcSigBits - dstSigBits));
|
||||
const roundBits: src_rep_t = denormalizedSignificand & roundMask;
|
||||
if (roundBits > halfway) {
|
||||
// Round to nearest
|
||||
absResult += 1;
|
||||
} else if (roundBits == halfway) {
|
||||
// Ties to even
|
||||
absResult += absResult & 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const result: dst_rep_t align(@alignOf(dst_t)) = absResult | @truncate(dst_rep_t, sign >> @intCast(SrcShift, srcBits - dstBits));
|
||||
return @bitCast(dst_t, result);
|
||||
}
|
||||
|
||||
test "import truncXfYf2" {
|
||||
_ = @import("truncXfYf2_test.zig");
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
const __truncsfhf2 = @import("truncXfYf2.zig").__truncsfhf2;
|
||||
|
||||
fn test__truncsfhf2(a: u32, expected: u16) void {
|
||||
const actual = __truncsfhf2(@bitCast(f32, a));
|
||||
|
||||
if (actual == expected) {
|
||||
return;
|
||||
}
|
||||
|
||||
@panic("__truncsfhf2 test failure");
|
||||
}
|
||||
|
||||
test "truncsfhf2" {
|
||||
test__truncsfhf2(0x7fc00000, 0x7e00); // qNaN
|
||||
test__truncsfhf2(0x7fe00000, 0x7f00); // sNaN
|
||||
|
||||
test__truncsfhf2(0, 0); // 0
|
||||
test__truncsfhf2(0x80000000, 0x8000); // -0
|
||||
|
||||
test__truncsfhf2(0x7f800000, 0x7c00); // inf
|
||||
test__truncsfhf2(0xff800000, 0xfc00); // -inf
|
||||
|
||||
test__truncsfhf2(0x477ff000, 0x7c00); // 65520 -> inf
|
||||
test__truncsfhf2(0xc77ff000, 0xfc00); // -65520 -> -inf
|
||||
|
||||
test__truncsfhf2(0x71cc3892, 0x7c00); // 0x1.987124876876324p+100 -> inf
|
||||
test__truncsfhf2(0xf1cc3892, 0xfc00); // -0x1.987124876876324p+100 -> -inf
|
||||
|
||||
test__truncsfhf2(0x38800000, 0x0400); // normal (min), 2**-14
|
||||
test__truncsfhf2(0xb8800000, 0x8400); // normal (min), -2**-14
|
||||
|
||||
test__truncsfhf2(0x477fe000, 0x7bff); // normal (max), 65504
|
||||
test__truncsfhf2(0xc77fe000, 0xfbff); // normal (max), -65504
|
||||
|
||||
test__truncsfhf2(0x477fe100, 0x7bff); // normal, 65505 -> 65504
|
||||
test__truncsfhf2(0xc77fe100, 0xfbff); // normal, -65505 -> -65504
|
||||
|
||||
test__truncsfhf2(0x477fef00, 0x7bff); // normal, 65519 -> 65504
|
||||
test__truncsfhf2(0xc77fef00, 0xfbff); // normal, -65519 -> -65504
|
||||
|
||||
test__truncsfhf2(0x3f802000, 0x3c01); // normal, 1 + 2**-10
|
||||
test__truncsfhf2(0xbf802000, 0xbc01); // normal, -1 - 2**-10
|
||||
|
||||
test__truncsfhf2(0x3eaaa000, 0x3555); // normal, approx. 1/3
|
||||
test__truncsfhf2(0xbeaaa000, 0xb555); // normal, approx. -1/3
|
||||
|
||||
test__truncsfhf2(0x40490fdb, 0x4248); // normal, 3.1415926535
|
||||
test__truncsfhf2(0xc0490fdb, 0xc248); // normal, -3.1415926535
|
||||
|
||||
test__truncsfhf2(0x45cc3892, 0x6e62); // normal, 0x1.987124876876324p+12
|
||||
|
||||
test__truncsfhf2(0x3f800000, 0x3c00); // normal, 1
|
||||
test__truncsfhf2(0x38800000, 0x0400); // normal, 0x1.0p-14
|
||||
|
||||
test__truncsfhf2(0x33800000, 0x0001); // denormal (min), 2**-24
|
||||
test__truncsfhf2(0xb3800000, 0x8001); // denormal (min), -2**-24
|
||||
|
||||
test__truncsfhf2(0x387fc000, 0x03ff); // denormal (max), 2**-14 - 2**-24
|
||||
test__truncsfhf2(0xb87fc000, 0x83ff); // denormal (max), -2**-14 + 2**-24
|
||||
|
||||
test__truncsfhf2(0x35800000, 0x0010); // denormal, 0x1.0p-20
|
||||
test__truncsfhf2(0x33280000, 0x0001); // denormal, 0x1.5p-25 -> 0x1.0p-24
|
||||
test__truncsfhf2(0x33000000, 0x0000); // 0x1.0p-25 -> zero
|
||||
}
|
|
@ -350,13 +350,16 @@ fn testFloatToInts() void {
|
|||
assert(x == 10000);
|
||||
const y = @floatToInt(i32, f32(1e4));
|
||||
assert(y == 10000);
|
||||
expectFloatToInt(u8, 255.1, 255);
|
||||
expectFloatToInt(i8, 127.2, 127);
|
||||
expectFloatToInt(i8, -128.2, -128);
|
||||
expectFloatToInt(f16, 255.1, u8, 255);
|
||||
expectFloatToInt(f16, 127.2, i8, 127);
|
||||
expectFloatToInt(f16, -128.2, i8, -128);
|
||||
expectFloatToInt(f32, 255.1, u8, 255);
|
||||
expectFloatToInt(f32, 127.2, i8, 127);
|
||||
expectFloatToInt(f32, -128.2, i8, -128);
|
||||
}
|
||||
|
||||
fn expectFloatToInt(comptime T: type, f: f32, i: T) void {
|
||||
assert(@floatToInt(T, f) == i);
|
||||
fn expectFloatToInt(comptime F: type, f: F, comptime I: type, i: I) void {
|
||||
assert(@floatToInt(I, f) == i);
|
||||
}
|
||||
|
||||
test "cast u128 to f128 and back" {
|
||||
|
@ -418,19 +421,39 @@ test "@intCast comptime_int" {
|
|||
}
|
||||
|
||||
test "@floatCast comptime_int and comptime_float" {
|
||||
const result = @floatCast(f32, 1234);
|
||||
assert(@typeOf(result) == f32);
|
||||
assert(result == 1234.0);
|
||||
|
||||
const result2 = @floatCast(f32, 1234.0);
|
||||
assert(@typeOf(result) == f32);
|
||||
assert(result == 1234.0);
|
||||
{
|
||||
const result = @floatCast(f16, 1234);
|
||||
assert(@typeOf(result) == f16);
|
||||
assert(result == 1234.0);
|
||||
}
|
||||
{
|
||||
const result = @floatCast(f16, 1234.0);
|
||||
assert(@typeOf(result) == f16);
|
||||
assert(result == 1234.0);
|
||||
}
|
||||
{
|
||||
const result = @floatCast(f32, 1234);
|
||||
assert(@typeOf(result) == f32);
|
||||
assert(result == 1234.0);
|
||||
}
|
||||
{
|
||||
const result = @floatCast(f32, 1234.0);
|
||||
assert(@typeOf(result) == f32);
|
||||
assert(result == 1234.0);
|
||||
}
|
||||
}
|
||||
|
||||
test "comptime_int @intToFloat" {
|
||||
const result = @intToFloat(f32, 1234);
|
||||
assert(@typeOf(result) == f32);
|
||||
assert(result == 1234.0);
|
||||
{
|
||||
const result = @intToFloat(f16, 1234);
|
||||
assert(@typeOf(result) == f16);
|
||||
assert(result == 1234.0);
|
||||
}
|
||||
{
|
||||
const result = @intToFloat(f32, 1234);
|
||||
assert(@typeOf(result) == f32);
|
||||
assert(result == 1234.0);
|
||||
}
|
||||
}
|
||||
|
||||
test "@bytesToSlice keeps pointer alignment" {
|
||||
|
|
|
@ -6,15 +6,20 @@ test "division" {
|
|||
}
|
||||
fn testDivision() void {
|
||||
assert(div(u32, 13, 3) == 4);
|
||||
assert(div(f16, 1.0, 2.0) == 0.5);
|
||||
assert(div(f32, 1.0, 2.0) == 0.5);
|
||||
|
||||
assert(divExact(u32, 55, 11) == 5);
|
||||
assert(divExact(i32, -55, 11) == -5);
|
||||
assert(divExact(f16, 55.0, 11.0) == 5.0);
|
||||
assert(divExact(f16, -55.0, 11.0) == -5.0);
|
||||
assert(divExact(f32, 55.0, 11.0) == 5.0);
|
||||
assert(divExact(f32, -55.0, 11.0) == -5.0);
|
||||
|
||||
assert(divFloor(i32, 5, 3) == 1);
|
||||
assert(divFloor(i32, -5, 3) == -2);
|
||||
assert(divFloor(f16, 5.0, 3.0) == 1.0);
|
||||
assert(divFloor(f16, -5.0, 3.0) == -2.0);
|
||||
assert(divFloor(f32, 5.0, 3.0) == 1.0);
|
||||
assert(divFloor(f32, -5.0, 3.0) == -2.0);
|
||||
assert(divFloor(i32, -0x80000000, -2) == 0x40000000);
|
||||
|
@ -24,8 +29,12 @@ fn testDivision() void {
|
|||
|
||||
assert(divTrunc(i32, 5, 3) == 1);
|
||||
assert(divTrunc(i32, -5, 3) == -1);
|
||||
assert(divTrunc(f16, 5.0, 3.0) == 1.0);
|
||||
assert(divTrunc(f16, -5.0, 3.0) == -1.0);
|
||||
assert(divTrunc(f32, 5.0, 3.0) == 1.0);
|
||||
assert(divTrunc(f32, -5.0, 3.0) == -1.0);
|
||||
assert(divTrunc(f64, 5.0, 3.0) == 1.0);
|
||||
assert(divTrunc(f64, -5.0, 3.0) == -1.0);
|
||||
|
||||
comptime {
|
||||
assert(
|
||||
|
@ -435,10 +444,11 @@ test "comptime float rem int" {
|
|||
}
|
||||
|
||||
test "remainder division" {
|
||||
comptime remdiv(f16);
|
||||
comptime remdiv(f32);
|
||||
comptime remdiv(f64);
|
||||
comptime remdiv(f128);
|
||||
remdiv(f32);
|
||||
remdiv(f16);
|
||||
remdiv(f64);
|
||||
remdiv(f128);
|
||||
}
|
||||
|
@ -453,6 +463,8 @@ test "@sqrt" {
|
|||
comptime testSqrt(f64, 12.0);
|
||||
testSqrt(f32, 13.0);
|
||||
comptime testSqrt(f32, 13.0);
|
||||
testSqrt(f16, 13.0);
|
||||
comptime testSqrt(f16, 13.0);
|
||||
|
||||
const x = 14.0;
|
||||
const y = x * x;
|
||||
|
|
|
@ -53,6 +53,7 @@ test "@IntType builtin" {
|
|||
}
|
||||
|
||||
test "floating point primitive bit counts" {
|
||||
assert(f16.bit_count == 16);
|
||||
assert(f32.bit_count == 32);
|
||||
assert(f64.bit_count == 64);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue