From fd75e73ee9818f12fd81d8fdb3cb949c492d664a Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Wed, 27 Jun 2018 16:20:04 +0200 Subject: [PATCH] add f16 type Add support for half-precision floating point operations. Introduce `__extendhfsf2` and `__truncsfhf2` in std/special/compiler_rt. Add `__gnu_h2f_ieee` and `__gnu_f2h_ieee` as aliases that are used in Windows builds. The logic in std/special/compiler_rt/extendXfYf2.zig has been reworked and can now operate on 16 bits floating point types. `extendXfYf2()` and `truncXfYf2()` are marked `inline` to work around a not entirely understood stack alignment issue on Windows when calling the f16 versions of the builtins. closes #1122 --- CMakeLists.txt | 16 ++ src/all_types.hpp | 2 + src/analyze.cpp | 15 ++ src/bigfloat.cpp | 8 + src/bigfloat.hpp | 2 + src/codegen.cpp | 4 + src/ir.cpp | 151 ++++++++++++++++++- src/util.hpp | 19 +++ std/special/compiler_rt/extendXfYf2.zig | 56 +++---- std/special/compiler_rt/extendXfYf2_test.zig | 46 ++++++ std/special/compiler_rt/index.zig | 5 + std/special/compiler_rt/truncXfYf2.zig | 111 ++++++++++++++ std/special/compiler_rt/truncXfYf2_test.zig | 64 ++++++++ test/cases/cast.zig | 28 +++- test/cases/math.zig | 12 +- test/cases/misc.zig | 1 + 16 files changed, 505 insertions(+), 35 deletions(-) create mode 100644 std/special/compiler_rt/truncXfYf2.zig create mode 100644 std/special/compiler_rt/truncXfYf2_test.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index 99de2328d..789da4a8a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -261,12 +261,15 @@ endif() set(EMBEDDED_SOFTFLOAT_SOURCES "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/f128M_isSignalingNaN.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_commonNaNToF128M.c" + "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_commonNaNToF16UI.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_commonNaNToF32UI.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_commonNaNToF64UI.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_f128MToCommonNaN.c" + "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_f16UIToCommonNaN.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_f32UIToCommonNaN.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_f64UIToCommonNaN.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_propagateNaNF128M.c" + "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_propagateNaNF16UI.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/softfloat_raiseFlags.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_add.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_div.c" @@ -293,8 +296,20 @@ set(EMBEDDED_SOFTFLOAT_SOURCES "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_to_ui32_r_minMag.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_to_ui64.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_to_ui64_r_minMag.c" + "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_add.c" + "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_div.c" + "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_eq.c" + "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_lt.c" + "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_mul.c" + "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_rem.c" + "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_roundToInt.c" + "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_sqrt.c" + "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_sub.c" + "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_to_f128M.c" + "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_to_f64.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f32_to_f128M.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f64_to_f128M.c" + "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f64_to_f16.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_add256M.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_addCarryM.c" "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_addComplCarryM.c" @@ -572,6 +587,7 @@ set(ZIG_STD_FILES "special/compiler_rt/floatuntidf.zig" "special/compiler_rt/muloti4.zig" "special/compiler_rt/index.zig" + "special/compiler_rt/truncXfYf2.zig" "special/compiler_rt/udivmod.zig" "special/compiler_rt/udivmoddi4.zig" "special/compiler_rt/udivmodti4.zig" diff --git a/src/all_types.hpp b/src/all_types.hpp index 019dcb182..5d449491c 100644 --- a/src/all_types.hpp +++ b/src/all_types.hpp @@ -258,6 +258,7 @@ struct ConstExprValue { // populated if special == ConstValSpecialStatic BigInt x_bigint; BigFloat x_bigfloat; + float16_t x_f16; float x_f32; double x_f64; float128_t x_f128; @@ -1598,6 +1599,7 @@ struct CodeGen { TypeTableEntry *entry_i128; TypeTableEntry *entry_isize; TypeTableEntry *entry_usize; + TypeTableEntry *entry_f16; TypeTableEntry *entry_f32; TypeTableEntry *entry_f64; TypeTableEntry *entry_f128; diff --git a/src/analyze.cpp b/src/analyze.cpp index c018ee4e9..25cc1c79d 100644 --- a/src/analyze.cpp +++ b/src/analyze.cpp @@ -4668,6 +4668,13 @@ static uint32_t hash_const_val(ConstExprValue *const_val) { } case TypeTableEntryIdFloat: switch (const_val->type->data.floating.bit_count) { + case 16: + { + uint16_t result; + static_assert(sizeof(result) == sizeof(const_val->data.x_f16), ""); + memcpy(&result, &const_val->data.x_f16, sizeof(result)); + return result * 65537u; + } case 32: { uint32_t result; @@ -5128,6 +5135,9 @@ void init_const_float(ConstExprValue *const_val, TypeTableEntry *type, double va bigfloat_init_64(&const_val->data.x_bigfloat, value); } else if (type->id == TypeTableEntryIdFloat) { switch (type->data.floating.bit_count) { + case 16: + const_val->data.x_f16 = zig_double_to_f16(value); + break; case 32: const_val->data.x_f32 = value; break; @@ -5441,6 +5451,8 @@ bool const_values_equal(ConstExprValue *a, ConstExprValue *b) { case TypeTableEntryIdFloat: assert(a->type->data.floating.bit_count == b->type->data.floating.bit_count); switch (a->type->data.floating.bit_count) { + case 16: + return f16_eq(a->data.x_f16, b->data.x_f16); case 32: return a->data.x_f32 == b->data.x_f32; case 64: @@ -5614,6 +5626,9 @@ void render_const_value(CodeGen *g, Buf *buf, ConstExprValue *const_val) { return; case TypeTableEntryIdFloat: switch (type_entry->data.floating.bit_count) { + case 16: + buf_appendf(buf, "%f", zig_f16_to_double(const_val->data.x_f16)); + return; case 32: buf_appendf(buf, "%f", const_val->data.x_f32); return; diff --git a/src/bigfloat.cpp b/src/bigfloat.cpp index dcb6db61d..cc442fa3b 100644 --- a/src/bigfloat.cpp +++ b/src/bigfloat.cpp @@ -18,6 +18,10 @@ void bigfloat_init_128(BigFloat *dest, float128_t x) { dest->value = x; } +void bigfloat_init_16(BigFloat *dest, float16_t x) { + f16_to_f128M(x, &dest->value); +} + void bigfloat_init_32(BigFloat *dest, float x) { float32_t f32_val; memcpy(&f32_val, &x, sizeof(float)); @@ -146,6 +150,10 @@ Cmp bigfloat_cmp(const BigFloat *op1, const BigFloat *op2) { } } +float16_t bigfloat_to_f16(const BigFloat *bigfloat) { + return f128M_to_f16(&bigfloat->value); +} + float bigfloat_to_f32(const BigFloat *bigfloat) { float32_t f32_value = f128M_to_f32(&bigfloat->value); float result; diff --git a/src/bigfloat.hpp b/src/bigfloat.hpp index e212c30c8..c6ae56794 100644 --- a/src/bigfloat.hpp +++ b/src/bigfloat.hpp @@ -22,6 +22,7 @@ struct BigFloat { struct Buf; +void bigfloat_init_16(BigFloat *dest, float16_t x); void bigfloat_init_32(BigFloat *dest, float x); void bigfloat_init_64(BigFloat *dest, double x); void bigfloat_init_128(BigFloat *dest, float128_t x); @@ -29,6 +30,7 @@ void bigfloat_init_bigfloat(BigFloat *dest, const BigFloat *x); void bigfloat_init_bigint(BigFloat *dest, const BigInt *op); int bigfloat_init_buf_base10(BigFloat *dest, const uint8_t *buf_ptr, size_t buf_len); +float16_t bigfloat_to_f16(const BigFloat *bigfloat); float bigfloat_to_f32(const BigFloat *bigfloat); double bigfloat_to_f64(const BigFloat *bigfloat); float128_t bigfloat_to_f128(const BigFloat *bigfloat); diff --git a/src/codegen.cpp b/src/codegen.cpp index abec5a8ec..4419f4fc8 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -17,6 +17,7 @@ #include "os.hpp" #include "translate_c.hpp" #include "target.hpp" +#include "util.hpp" #include "zig_llvm.h" #include @@ -5211,6 +5212,8 @@ static LLVMValueRef gen_const_val(CodeGen *g, ConstExprValue *const_val, const c const_val->data.x_err_set->value, false); case TypeTableEntryIdFloat: switch (type_entry->data.floating.bit_count) { + case 16: + return LLVMConstReal(type_entry->type_ref, zig_f16_to_double(const_val->data.x_f16)); case 32: return LLVMConstReal(type_entry->type_ref, const_val->data.x_f32); case 64: @@ -6195,6 +6198,7 @@ static void define_builtin_types(CodeGen *g) { *field = entry; g->primitive_type_table.put(&entry->name, entry); }; + add_fp_entry(g, "f16", 16, LLVMHalfType(), &g->builtin_types.entry_f16); add_fp_entry(g, "f32", 32, LLVMFloatType(), &g->builtin_types.entry_f32); add_fp_entry(g, "f64", 64, LLVMDoubleType(), &g->builtin_types.entry_f64); add_fp_entry(g, "f128", 128, LLVMFP128Type(), &g->builtin_types.entry_f128); diff --git a/src/ir.cpp b/src/ir.cpp index 76178f243..694f91214 100644 --- a/src/ir.cpp +++ b/src/ir.cpp @@ -11,9 +11,10 @@ #include "ir.hpp" #include "ir_print.hpp" #include "os.hpp" -#include "translate_c.hpp" #include "range_set.hpp" #include "softfloat.hpp" +#include "translate_c.hpp" +#include "util.hpp" struct IrExecContext { ConstExprValue *mem_slot_list; @@ -7238,6 +7239,11 @@ static bool float_has_fraction(ConstExprValue *const_val) { return bigfloat_has_fraction(&const_val->data.x_bigfloat); } else if (const_val->type->id == TypeTableEntryIdFloat) { switch (const_val->type->data.floating.bit_count) { + case 16: + { + float16_t floored = f16_roundToInt(const_val->data.x_f16, softfloat_round_minMag, false); + return !f16_eq(floored, const_val->data.x_f16); + } case 32: return floorf(const_val->data.x_f32) != const_val->data.x_f32; case 64: @@ -7261,6 +7267,9 @@ static void float_append_buf(Buf *buf, ConstExprValue *const_val) { bigfloat_append_buf(buf, &const_val->data.x_bigfloat); } else if (const_val->type->id == TypeTableEntryIdFloat) { switch (const_val->type->data.floating.bit_count) { + case 16: + buf_appendf(buf, "%f", zig_f16_to_double(const_val->data.x_f16)); + break; case 32: buf_appendf(buf, "%f", const_val->data.x_f32); break; @@ -7296,6 +7305,17 @@ static void float_init_bigint(BigInt *bigint, ConstExprValue *const_val) { bigint_init_bigfloat(bigint, &const_val->data.x_bigfloat); } else if (const_val->type->id == TypeTableEntryIdFloat) { switch (const_val->type->data.floating.bit_count) { + case 16: + { + double x = zig_f16_to_double(const_val->data.x_f16); + if (x >= 0) { + bigint_init_unsigned(bigint, (uint64_t)x); + } else { + bigint_init_unsigned(bigint, (uint64_t)-x); + bigint->is_negative = true; + } + break; + } case 32: if (const_val->data.x_f32 >= 0) { bigint_init_unsigned(bigint, (uint64_t)(const_val->data.x_f32)); @@ -7332,6 +7352,9 @@ static void float_init_bigfloat(ConstExprValue *dest_val, BigFloat *bigfloat) { bigfloat_init_bigfloat(&dest_val->data.x_bigfloat, bigfloat); } else if (dest_val->type->id == TypeTableEntryIdFloat) { switch (dest_val->type->data.floating.bit_count) { + case 16: + dest_val->data.x_f16 = bigfloat_to_f16(bigfloat); + break; case 32: dest_val->data.x_f32 = bigfloat_to_f32(bigfloat); break; @@ -7349,11 +7372,39 @@ static void float_init_bigfloat(ConstExprValue *dest_val, BigFloat *bigfloat) { } } +static void float_init_f16(ConstExprValue *dest_val, float16_t x) { + if (dest_val->type->id == TypeTableEntryIdComptimeFloat) { + bigfloat_init_16(&dest_val->data.x_bigfloat, x); + } else if (dest_val->type->id == TypeTableEntryIdFloat) { + switch (dest_val->type->data.floating.bit_count) { + case 16: + dest_val->data.x_f16 = x; + break; + case 32: + dest_val->data.x_f32 = zig_f16_to_double(x); + break; + case 64: + dest_val->data.x_f64 = zig_f16_to_double(x); + break; + case 128: + f16_to_f128M(x, &dest_val->data.x_f128); + break; + default: + zig_unreachable(); + } + } else { + zig_unreachable(); + } +} + static void float_init_f32(ConstExprValue *dest_val, float x) { if (dest_val->type->id == TypeTableEntryIdComptimeFloat) { bigfloat_init_32(&dest_val->data.x_bigfloat, x); } else if (dest_val->type->id == TypeTableEntryIdFloat) { switch (dest_val->type->data.floating.bit_count) { + case 16: + dest_val->data.x_f16 = zig_double_to_f16(x); + break; case 32: dest_val->data.x_f32 = x; break; @@ -7380,6 +7431,9 @@ static void float_init_f64(ConstExprValue *dest_val, double x) { bigfloat_init_64(&dest_val->data.x_bigfloat, x); } else if (dest_val->type->id == TypeTableEntryIdFloat) { switch (dest_val->type->data.floating.bit_count) { + case 16: + dest_val->data.x_f16 = zig_double_to_f16(x); + break; case 32: dest_val->data.x_f32 = x; break; @@ -7406,6 +7460,9 @@ static void float_init_f128(ConstExprValue *dest_val, float128_t x) { bigfloat_init_128(&dest_val->data.x_bigfloat, x); } else if (dest_val->type->id == TypeTableEntryIdFloat) { switch (dest_val->type->data.floating.bit_count) { + case 16: + dest_val->data.x_f16 = f128M_to_f16(&x); + break; case 32: { float32_t f32_val = f128M_to_f32(&x); @@ -7436,6 +7493,9 @@ static void float_init_float(ConstExprValue *dest_val, ConstExprValue *src_val) float_init_bigfloat(dest_val, &src_val->data.x_bigfloat); } else if (src_val->type->id == TypeTableEntryIdFloat) { switch (src_val->type->data.floating.bit_count) { + case 16: + float_init_f16(dest_val, src_val->data.x_f16); + break; case 32: float_init_f32(dest_val, src_val->data.x_f32); break; @@ -7459,6 +7519,14 @@ static Cmp float_cmp(ConstExprValue *op1, ConstExprValue *op2) { return bigfloat_cmp(&op1->data.x_bigfloat, &op2->data.x_bigfloat); } else if (op1->type->id == TypeTableEntryIdFloat) { switch (op1->type->data.floating.bit_count) { + case 16: + if (f16_lt(op1->data.x_f16, op2->data.x_f16)) { + return CmpLT; + } else if (f16_lt(op2->data.x_f16, op1->data.x_f16)) { + return CmpGT; + } else { + return CmpEQ; + } case 32: if (op1->data.x_f32 > op2->data.x_f32) { return CmpGT; @@ -7496,6 +7564,17 @@ static Cmp float_cmp_zero(ConstExprValue *op) { return bigfloat_cmp_zero(&op->data.x_bigfloat); } else if (op->type->id == TypeTableEntryIdFloat) { switch (op->type->data.floating.bit_count) { + case 16: + { + const float16_t zero = zig_double_to_f16(0); + if (f16_lt(op->data.x_f16, zero)) { + return CmpLT; + } else if (f16_lt(zero, op->data.x_f16)) { + return CmpGT; + } else { + return CmpEQ; + } + } case 32: if (op->data.x_f32 < 0.0) { return CmpLT; @@ -7537,6 +7616,9 @@ static void float_add(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal bigfloat_add(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat); } else if (op1->type->id == TypeTableEntryIdFloat) { switch (op1->type->data.floating.bit_count) { + case 16: + out_val->data.x_f16 = f16_add(op1->data.x_f16, op2->data.x_f16); + return; case 32: out_val->data.x_f32 = op1->data.x_f32 + op2->data.x_f32; return; @@ -7561,6 +7643,9 @@ static void float_sub(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal bigfloat_sub(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat); } else if (op1->type->id == TypeTableEntryIdFloat) { switch (op1->type->data.floating.bit_count) { + case 16: + out_val->data.x_f16 = f16_sub(op1->data.x_f16, op2->data.x_f16); + return; case 32: out_val->data.x_f32 = op1->data.x_f32 - op2->data.x_f32; return; @@ -7585,6 +7670,9 @@ static void float_mul(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal bigfloat_mul(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat); } else if (op1->type->id == TypeTableEntryIdFloat) { switch (op1->type->data.floating.bit_count) { + case 16: + out_val->data.x_f16 = f16_mul(op1->data.x_f16, op2->data.x_f16); + return; case 32: out_val->data.x_f32 = op1->data.x_f32 * op2->data.x_f32; return; @@ -7609,6 +7697,9 @@ static void float_div(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal bigfloat_div(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat); } else if (op1->type->id == TypeTableEntryIdFloat) { switch (op1->type->data.floating.bit_count) { + case 16: + out_val->data.x_f16 = f16_div(op1->data.x_f16, op2->data.x_f16); + return; case 32: out_val->data.x_f32 = op1->data.x_f32 / op2->data.x_f32; return; @@ -7633,6 +7724,19 @@ static void float_div_trunc(ConstExprValue *out_val, ConstExprValue *op1, ConstE bigfloat_div_trunc(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat); } else if (op1->type->id == TypeTableEntryIdFloat) { switch (op1->type->data.floating.bit_count) { + case 16: + { + double a = zig_f16_to_double(op1->data.x_f16); + double b = zig_f16_to_double(op2->data.x_f16); + double c = a / b; + if (c >= 0.0) { + c = floor(c); + } else { + c = ceil(c); + } + out_val->data.x_f16 = zig_double_to_f16(c); + return; + } case 32: out_val->data.x_f32 = op1->data.x_f32 / op2->data.x_f32; if (out_val->data.x_f32 >= 0.0) { @@ -7668,6 +7772,10 @@ static void float_div_floor(ConstExprValue *out_val, ConstExprValue *op1, ConstE bigfloat_div_floor(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat); } else if (op1->type->id == TypeTableEntryIdFloat) { switch (op1->type->data.floating.bit_count) { + case 16: + out_val->data.x_f16 = f16_div(op1->data.x_f16, op2->data.x_f16); + out_val->data.x_f16 = f16_roundToInt(out_val->data.x_f16, softfloat_round_min, false); + return; case 32: out_val->data.x_f32 = floorf(op1->data.x_f32 / op2->data.x_f32); return; @@ -7693,6 +7801,9 @@ static void float_rem(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal bigfloat_rem(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat); } else if (op1->type->id == TypeTableEntryIdFloat) { switch (op1->type->data.floating.bit_count) { + case 16: + out_val->data.x_f16 = f16_rem(op1->data.x_f16, op2->data.x_f16); + return; case 32: out_val->data.x_f32 = fmodf(op1->data.x_f32, op2->data.x_f32); return; @@ -7710,6 +7821,16 @@ static void float_rem(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal } } +// c = a - b * trunc(a / b) +static float16_t zig_f16_mod(float16_t a, float16_t b) { + float16_t c; + c = f16_div(a, b); + c = f16_roundToInt(c, softfloat_round_min, true); + c = f16_mul(b, c); + c = f16_sub(a, c); + return c; +} + // c = a - b * trunc(a / b) static void zig_f128M_mod(const float128_t* a, const float128_t* b, float128_t* c) { f128M_div(a, b, c); @@ -7725,6 +7846,9 @@ static void float_mod(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal bigfloat_mod(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat); } else if (op1->type->id == TypeTableEntryIdFloat) { switch (op1->type->data.floating.bit_count) { + case 16: + out_val->data.x_f16 = zig_f16_mod(op1->data.x_f16, op2->data.x_f16); + return; case 32: out_val->data.x_f32 = fmodf(fmodf(op1->data.x_f32, op2->data.x_f32) + op2->data.x_f32, op2->data.x_f32); return; @@ -7748,6 +7872,12 @@ static void float_negate(ConstExprValue *out_val, ConstExprValue *op) { bigfloat_negate(&out_val->data.x_bigfloat, &op->data.x_bigfloat); } else if (op->type->id == TypeTableEntryIdFloat) { switch (op->type->data.floating.bit_count) { + case 16: + { + const float16_t zero = zig_double_to_f16(0); + out_val->data.x_f16 = f16_sub(zero, op->data.x_f16); + return; + } case 32: out_val->data.x_f32 = -op->data.x_f32; return; @@ -7770,6 +7900,9 @@ static void float_negate(ConstExprValue *out_val, ConstExprValue *op) { void float_write_ieee597(ConstExprValue *op, uint8_t *buf, bool is_big_endian) { if (op->type->id == TypeTableEntryIdFloat) { switch (op->type->data.floating.bit_count) { + case 16: + memcpy(buf, &op->data.x_f16, 2); // TODO wrong when compiler is big endian + return; case 32: memcpy(buf, &op->data.x_f32, 4); // TODO wrong when compiler is big endian return; @@ -7790,6 +7923,9 @@ void float_write_ieee597(ConstExprValue *op, uint8_t *buf, bool is_big_endian) { void float_read_ieee597(ConstExprValue *val, uint8_t *buf, bool is_big_endian) { if (val->type->id == TypeTableEntryIdFloat) { switch (val->type->data.floating.bit_count) { + case 16: + memcpy(&val->data.x_f16, buf, 2); // TODO wrong when compiler is big endian + return; case 32: memcpy(&val->data.x_f32, buf, 4); // TODO wrong when compiler is big endian return; @@ -8817,6 +8953,9 @@ static bool eval_const_expr_implicit_cast(IrAnalyze *ira, IrInstruction *source_ if (other_val->type->id == TypeTableEntryIdComptimeFloat) { assert(new_type->id == TypeTableEntryIdFloat); switch (new_type->data.floating.bit_count) { + case 16: + const_val->data.x_f16 = bigfloat_to_f16(&other_val->data.x_bigfloat); + break; case 32: const_val->data.x_f32 = bigfloat_to_f32(&other_val->data.x_bigfloat); break; @@ -8847,6 +8986,9 @@ static bool eval_const_expr_implicit_cast(IrAnalyze *ira, IrInstruction *source_ BigFloat bigfloat; bigfloat_init_bigint(&bigfloat, &other_val->data.x_bigint); switch (new_type->data.floating.bit_count) { + case 16: + const_val->data.x_f16 = bigfloat_to_f16(&bigfloat); + break; case 32: const_val->data.x_f32 = bigfloat_to_f32(&bigfloat); break; @@ -20104,6 +20246,9 @@ static TypeTableEntry *ir_analyze_instruction_sqrt(IrAnalyze *ira, IrInstruction bigfloat_sqrt(&out_val->data.x_bigfloat, &val->data.x_bigfloat); } else if (float_type->id == TypeTableEntryIdFloat) { switch (float_type->data.floating.bit_count) { + case 16: + out_val->data.x_f16 = f16_sqrt(val->data.x_f16); + break; case 32: out_val->data.x_f32 = sqrtf(val->data.x_f32); break; @@ -20124,7 +20269,9 @@ static TypeTableEntry *ir_analyze_instruction_sqrt(IrAnalyze *ira, IrInstruction } assert(float_type->id == TypeTableEntryIdFloat); - if (float_type->data.floating.bit_count != 32 && float_type->data.floating.bit_count != 64) { + if (float_type->data.floating.bit_count != 16 && + float_type->data.floating.bit_count != 32 && + float_type->data.floating.bit_count != 64) { ir_add_error(ira, instruction->type, buf_sprintf("compiler TODO: add implementation of sqrt for '%s'", buf_ptr(&float_type->name))); return ira->codegen->builtin_types.entry_invalid; } diff --git a/src/util.hpp b/src/util.hpp index 52baab7ac..b0402137b 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -31,6 +31,8 @@ #endif +#include "softfloat.hpp" + #define BREAKPOINT __asm("int $0x03") ATTRIBUTE_COLD @@ -165,4 +167,21 @@ static inline uint8_t log2_u64(uint64_t x) { return (63 - clzll(x)); } +static inline float16_t zig_double_to_f16(double x) { + float64_t y; + static_assert(sizeof(x) == sizeof(y), ""); + memcpy(&y, &x, sizeof(x)); + return f64_to_f16(y); +} + + +// Return value is safe to coerce to float even when |x| is NaN or Infinity. +static inline double zig_f16_to_double(float16_t x) { + float64_t y = f16_to_f64(x); + double z; + static_assert(sizeof(y) == sizeof(z), ""); + memcpy(&z, &y, sizeof(y)); + return z; +} + #endif diff --git a/std/special/compiler_rt/extendXfYf2.zig b/std/special/compiler_rt/extendXfYf2.zig index 6fa8cf465..099e27b74 100644 --- a/std/special/compiler_rt/extendXfYf2.zig +++ b/std/special/compiler_rt/extendXfYf2.zig @@ -10,9 +10,13 @@ pub extern fn __extendsftf2(a: f32) f128 { return extendXfYf2(f128, f32, a); } +pub extern fn __extendhfsf2(a: u16) f32 { + return extendXfYf2(f32, f16, @bitCast(f16, a)); +} + const CHAR_BIT = 8; -pub fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t { +inline fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t { const src_rep_t = @IntType(false, @typeInfo(src_t).Float.bits); const dst_rep_t = @IntType(false, @typeInfo(dst_t).Float.bits); const srcSigBits = std.math.floatMantissaBits(src_t); @@ -22,22 +26,22 @@ pub fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t { // Various constants whose values follow from the type parameters. // Any reasonable optimizer will fold and propagate all of these. - const srcBits: i32 = @sizeOf(src_t) * CHAR_BIT; - const srcExpBits: i32 = srcBits - srcSigBits - 1; - const srcInfExp: i32 = (1 << srcExpBits) - 1; - const srcExpBias: i32 = srcInfExp >> 1; + const srcBits = @sizeOf(src_t) * CHAR_BIT; + const srcExpBits = srcBits - srcSigBits - 1; + const srcInfExp = (1 << srcExpBits) - 1; + const srcExpBias = srcInfExp >> 1; - const srcMinNormal: src_rep_t = src_rep_t(1) << srcSigBits; - const srcInfinity: src_rep_t = src_rep_t(@bitCast(u32, srcInfExp)) << srcSigBits; - const srcSignMask: src_rep_t = src_rep_t(1) << @intCast(SrcShift, srcSigBits +% srcExpBits); - const srcAbsMask: src_rep_t = srcSignMask -% 1; - const srcQNaN: src_rep_t = src_rep_t(1) << @intCast(SrcShift, srcSigBits -% 1); - const srcNaNCode: src_rep_t = srcQNaN -% 1; + const srcMinNormal = 1 << srcSigBits; + const srcInfinity = srcInfExp << srcSigBits; + const srcSignMask = 1 << (srcSigBits + srcExpBits); + const srcAbsMask = srcSignMask - 1; + const srcQNaN = 1 << (srcSigBits - 1); + const srcNaNCode = srcQNaN - 1; - const dstBits: i32 = @sizeOf(dst_t) * CHAR_BIT; - const dstExpBits: i32 = dstBits - dstSigBits - 1; - const dstInfExp: i32 = (1 << dstExpBits) - 1; - const dstExpBias: i32 = dstInfExp >> 1; + const dstBits = @sizeOf(dst_t) * CHAR_BIT; + const dstExpBits = dstBits - dstSigBits - 1; + const dstInfExp = (1 << dstExpBits) - 1; + const dstExpBias = dstInfExp >> 1; const dstMinNormal: dst_rep_t = dst_rep_t(1) << dstSigBits; @@ -47,38 +51,36 @@ pub fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t { const sign: src_rep_t = aRep & srcSignMask; var absResult: dst_rep_t = undefined; - // If @sizeOf(src_rep_t) < @sizeOf(int), the subtraction result is promoted - // to (signed) int. To avoid that, explicitly cast to src_rep_t. - if ((src_rep_t)(aAbs -% srcMinNormal) < srcInfinity -% srcMinNormal) { + if (aAbs -% srcMinNormal < srcInfinity - srcMinNormal) { // a is a normal number. // Extend to the destination type by shifting the significand and // exponent into the proper position and rebiasing the exponent. - absResult = dst_rep_t(aAbs) << (dstSigBits -% srcSigBits); - absResult += dst_rep_t(@bitCast(u32, dstExpBias -% srcExpBias)) << dstSigBits; + absResult = dst_rep_t(aAbs) << (dstSigBits - srcSigBits); + absResult += (dstExpBias - srcExpBias) << dstSigBits; } else if (aAbs >= srcInfinity) { // a is NaN or infinity. // Conjure the result by beginning with infinity, then setting the qNaN // bit (if needed) and right-aligning the rest of the trailing NaN // payload field. - absResult = dst_rep_t(@bitCast(u32, dstInfExp)) << dstSigBits; - absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits); - absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits); + absResult = dstInfExp << dstSigBits; + absResult |= dst_rep_t(aAbs & srcQNaN) << (dstSigBits - srcSigBits); + absResult |= dst_rep_t(aAbs & srcNaNCode) << (dstSigBits - srcSigBits); } else if (aAbs != 0) { // a is denormal. // renormalize the significand and clear the leading bit, then insert // the correct adjusted exponent in the destination type. - const scale: i32 = @clz(aAbs) - @clz(srcMinNormal); + const scale: u32 = @clz(aAbs) - @clz(src_rep_t(srcMinNormal)); absResult = dst_rep_t(aAbs) << @intCast(DstShift, dstSigBits - srcSigBits + scale); absResult ^= dstMinNormal; - const resultExponent: i32 = dstExpBias - srcExpBias - scale + 1; - absResult |= dst_rep_t(@bitCast(u32, resultExponent)) << @intCast(DstShift, dstSigBits); + const resultExponent: u32 = dstExpBias - srcExpBias - scale + 1; + absResult |= @intCast(dst_rep_t, resultExponent) << dstSigBits; } else { // a is zero. absResult = 0; } // Apply the signbit to (dst_t)abs(a). - const result: dst_rep_t align(@alignOf(dst_t)) = absResult | dst_rep_t(sign) << @intCast(DstShift, dstBits - srcBits); + const result: dst_rep_t align(@alignOf(dst_t)) = absResult | dst_rep_t(sign) << (dstBits - srcBits); return @bitCast(dst_t, result); } diff --git a/std/special/compiler_rt/extendXfYf2_test.zig b/std/special/compiler_rt/extendXfYf2_test.zig index 84fb410fb..0168de12a 100644 --- a/std/special/compiler_rt/extendXfYf2_test.zig +++ b/std/special/compiler_rt/extendXfYf2_test.zig @@ -1,4 +1,5 @@ const __extenddftf2 = @import("extendXfYf2.zig").__extenddftf2; +const __extendhfsf2 = @import("extendXfYf2.zig").__extendhfsf2; const __extendsftf2 = @import("extendXfYf2.zig").__extendsftf2; const assert = @import("std").debug.assert; @@ -24,6 +25,22 @@ fn test__extenddftf2(a: f64, expectedHi: u64, expectedLo: u64) void { @panic("__extenddftf2 test failure"); } +fn test__extendhfsf2(a: u16, expected: u32) void { + const x = __extendhfsf2(a); + const rep = @bitCast(u32, x); + + if (rep == expected) { + if (rep & 0x7fffffff > 0x7f800000) { + return; // NaN is always unequal. + } + if (x == @bitCast(f32, expected)) { + return; + } + } + + @panic("__extendhfsf2 test failure"); +} + fn test__extendsftf2(a: f32, expectedHi: u64, expectedLo: u64) void { const x = __extendsftf2(a); @@ -68,6 +85,35 @@ test "extenddftf2" { test__extenddftf2(0x1.edcba987654321fp-45, 0x3fd2edcba9876543, 0x2000000000000000); } +test "extendhfsf2" { + test__extendhfsf2(0x7e00, 0x7fc00000); // qNaN + test__extendhfsf2(0x7f00, 0x7fe00000); // sNaN + + test__extendhfsf2(0, 0); // 0 + test__extendhfsf2(0x8000, 0x80000000); // -0 + + test__extendhfsf2(0x7c00, 0x7f800000); // inf + test__extendhfsf2(0xfc00, 0xff800000); // -inf + + test__extendhfsf2(0x0001, 0x33800000); // denormal (min), 2**-24 + test__extendhfsf2(0x8001, 0xb3800000); // denormal (min), -2**-24 + + test__extendhfsf2(0x03ff, 0x387fc000); // denormal (max), 2**-14 - 2**-24 + test__extendhfsf2(0x83ff, 0xb87fc000); // denormal (max), -2**-14 + 2**-24 + + test__extendhfsf2(0x0400, 0x38800000); // normal (min), 2**-14 + test__extendhfsf2(0x8400, 0xb8800000); // normal (min), -2**-14 + + test__extendhfsf2(0x7bff, 0x477fe000); // normal (max), 65504 + test__extendhfsf2(0xfbff, 0xc77fe000); // normal (max), -65504 + + test__extendhfsf2(0x3c01, 0x3f802000); // normal, 1 + 2**-10 + test__extendhfsf2(0xbc01, 0xbf802000); // normal, -1 - 2**-10 + + test__extendhfsf2(0x3555, 0x3eaaa000); // normal, approx. 1/3 + test__extendhfsf2(0xb555, 0xbeaaa000); // normal, approx. -1/3 +} + test "extendsftf2" { // qNaN test__extendsftf2(makeQNaN32(), 0x7fff800000000000, 0x0); diff --git a/std/special/compiler_rt/index.zig b/std/special/compiler_rt/index.zig index c96e1587f..fda8d9d8a 100644 --- a/std/special/compiler_rt/index.zig +++ b/std/special/compiler_rt/index.zig @@ -15,6 +15,8 @@ comptime { @export("__lttf2", @import("comparetf2.zig").__letf2, linkage); @export("__netf2", @import("comparetf2.zig").__letf2, linkage); @export("__gttf2", @import("comparetf2.zig").__getf2, linkage); + @export("__gnu_h2f_ieee", @import("extendXfYf2.zig").__extendhfsf2, linkage); + @export("__gnu_f2h_ieee", @import("truncXfYf2.zig").__truncsfhf2, linkage); } @export("__unordtf2", @import("comparetf2.zig").__unordtf2, linkage); @@ -22,6 +24,9 @@ comptime { @export("__floatuntidf", @import("floatuntidf.zig").__floatuntidf, linkage); @export("__extenddftf2", @import("extendXfYf2.zig").__extenddftf2, linkage); @export("__extendsftf2", @import("extendXfYf2.zig").__extendsftf2, linkage); + @export("__extendhfsf2", @import("extendXfYf2.zig").__extendhfsf2, linkage); + + @export("__truncsfhf2", @import("truncXfYf2.zig").__truncsfhf2, linkage); @export("__fixunssfsi", @import("fixunssfsi.zig").__fixunssfsi, linkage); @export("__fixunssfdi", @import("fixunssfdi.zig").__fixunssfdi, linkage); diff --git a/std/special/compiler_rt/truncXfYf2.zig b/std/special/compiler_rt/truncXfYf2.zig new file mode 100644 index 000000000..f08c6ae34 --- /dev/null +++ b/std/special/compiler_rt/truncXfYf2.zig @@ -0,0 +1,111 @@ +const std = @import("std"); + +pub extern fn __truncsfhf2(a: f32) u16 { + return @bitCast(u16, truncXfYf2(f16, f32, a)); +} + +const CHAR_BIT = 8; + +inline fn truncXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t { + const src_rep_t = @IntType(false, @typeInfo(src_t).Float.bits); + const dst_rep_t = @IntType(false, @typeInfo(dst_t).Float.bits); + const srcSigBits = std.math.floatMantissaBits(src_t); + const dstSigBits = std.math.floatMantissaBits(dst_t); + const SrcShift = std.math.Log2Int(src_rep_t); + const DstShift = std.math.Log2Int(dst_rep_t); + + // Various constants whose values follow from the type parameters. + // Any reasonable optimizer will fold and propagate all of these. + const srcBits = @sizeOf(src_t) * CHAR_BIT; + const srcExpBits = srcBits - srcSigBits - 1; + const srcInfExp = (1 << srcExpBits) - 1; + const srcExpBias = srcInfExp >> 1; + + const srcMinNormal = 1 << srcSigBits; + const srcSignificandMask = srcMinNormal - 1; + const srcInfinity = srcInfExp << srcSigBits; + const srcSignMask = 1 << (srcSigBits + srcExpBits); + const srcAbsMask = srcSignMask - 1; + const roundMask = (1 << (srcSigBits - dstSigBits)) - 1; + const halfway = 1 << (srcSigBits - dstSigBits - 1); + const srcQNaN = 1 << (srcSigBits - 1); + const srcNaNCode = srcQNaN - 1; + + const dstBits = @sizeOf(dst_t) * CHAR_BIT; + const dstExpBits = dstBits - dstSigBits - 1; + const dstInfExp = (1 << dstExpBits) - 1; + const dstExpBias = dstInfExp >> 1; + + const underflowExponent = srcExpBias + 1 - dstExpBias; + const overflowExponent = srcExpBias + dstInfExp - dstExpBias; + const underflow = underflowExponent << srcSigBits; + const overflow = overflowExponent << srcSigBits; + + const dstQNaN = 1 << (dstSigBits - 1); + const dstNaNCode = dstQNaN - 1; + + // Break a into a sign and representation of the absolute value + const aRep: src_rep_t = @bitCast(src_rep_t, a); + const aAbs: src_rep_t = aRep & srcAbsMask; + const sign: src_rep_t = aRep & srcSignMask; + var absResult: dst_rep_t = undefined; + + if (aAbs -% underflow < aAbs -% overflow) { + // The exponent of a is within the range of normal numbers in the + // destination format. We can convert by simply right-shifting with + // rounding and adjusting the exponent. + absResult = @truncate(dst_rep_t, aAbs >> (srcSigBits - dstSigBits)); + absResult -%= dst_rep_t(srcExpBias - dstExpBias) << dstSigBits; + + const roundBits: src_rep_t = aAbs & roundMask; + if (roundBits > halfway) { + // Round to nearest + absResult += 1; + } else if (roundBits == halfway) { + // Ties to even + absResult += absResult & 1; + } + } else if (aAbs > srcInfinity) { + // a is NaN. + // Conjure the result by beginning with infinity, setting the qNaN + // bit and inserting the (truncated) trailing NaN field. + absResult = @intCast(dst_rep_t, dstInfExp) << dstSigBits; + absResult |= dstQNaN; + absResult |= @intCast(dst_rep_t, ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode); + } else if (aAbs >= overflow) { + // a overflows to infinity. + absResult = @intCast(dst_rep_t, dstInfExp) << dstSigBits; + } else { + // a underflows on conversion to the destination type or is an exact + // zero. The result may be a denormal or zero. Extract the exponent + // to get the shift amount for the denormalization. + const aExp: u32 = aAbs >> srcSigBits; + const shift: u32 = srcExpBias - dstExpBias - aExp + 1; + + const significand: src_rep_t = (aRep & srcSignificandMask) | srcMinNormal; + + // Right shift by the denormalization amount with sticky. + if (shift > srcSigBits) { + absResult = 0; + } else { + const sticky: src_rep_t = significand << @intCast(SrcShift, srcBits - shift); + const denormalizedSignificand: src_rep_t = significand >> @intCast(SrcShift, shift) | sticky; + absResult = @intCast(dst_rep_t, denormalizedSignificand >> (srcSigBits - dstSigBits)); + const roundBits: src_rep_t = denormalizedSignificand & roundMask; + if (roundBits > halfway) { + // Round to nearest + absResult += 1; + } else if (roundBits == halfway) { + // Ties to even + absResult += absResult & 1; + } + } + } + + const result: dst_rep_t align(@alignOf(dst_t)) = absResult | @truncate(dst_rep_t, sign >> @intCast(SrcShift, srcBits - dstBits)); + return @bitCast(dst_t, result); +} + +test "import truncXfYf2" { + _ = @import("truncXfYf2_test.zig"); +} diff --git a/std/special/compiler_rt/truncXfYf2_test.zig b/std/special/compiler_rt/truncXfYf2_test.zig new file mode 100644 index 000000000..e4dae7b5b --- /dev/null +++ b/std/special/compiler_rt/truncXfYf2_test.zig @@ -0,0 +1,64 @@ +const __truncsfhf2 = @import("truncXfYf2.zig").__truncsfhf2; + +fn test__truncsfhf2(a: u32, expected: u16) void { + const actual = __truncsfhf2(@bitCast(f32, a)); + + if (actual == expected) { + return; + } + + @panic("__truncsfhf2 test failure"); +} + +test "truncsfhf2" { + test__truncsfhf2(0x7fc00000, 0x7e00); // qNaN + test__truncsfhf2(0x7fe00000, 0x7f00); // sNaN + + test__truncsfhf2(0, 0); // 0 + test__truncsfhf2(0x80000000, 0x8000); // -0 + + test__truncsfhf2(0x7f800000, 0x7c00); // inf + test__truncsfhf2(0xff800000, 0xfc00); // -inf + + test__truncsfhf2(0x477ff000, 0x7c00); // 65520 -> inf + test__truncsfhf2(0xc77ff000, 0xfc00); // -65520 -> -inf + + test__truncsfhf2(0x71cc3892, 0x7c00); // 0x1.987124876876324p+100 -> inf + test__truncsfhf2(0xf1cc3892, 0xfc00); // -0x1.987124876876324p+100 -> -inf + + test__truncsfhf2(0x38800000, 0x0400); // normal (min), 2**-14 + test__truncsfhf2(0xb8800000, 0x8400); // normal (min), -2**-14 + + test__truncsfhf2(0x477fe000, 0x7bff); // normal (max), 65504 + test__truncsfhf2(0xc77fe000, 0xfbff); // normal (max), -65504 + + test__truncsfhf2(0x477fe100, 0x7bff); // normal, 65505 -> 65504 + test__truncsfhf2(0xc77fe100, 0xfbff); // normal, -65505 -> -65504 + + test__truncsfhf2(0x477fef00, 0x7bff); // normal, 65519 -> 65504 + test__truncsfhf2(0xc77fef00, 0xfbff); // normal, -65519 -> -65504 + + test__truncsfhf2(0x3f802000, 0x3c01); // normal, 1 + 2**-10 + test__truncsfhf2(0xbf802000, 0xbc01); // normal, -1 - 2**-10 + + test__truncsfhf2(0x3eaaa000, 0x3555); // normal, approx. 1/3 + test__truncsfhf2(0xbeaaa000, 0xb555); // normal, approx. -1/3 + + test__truncsfhf2(0x40490fdb, 0x4248); // normal, 3.1415926535 + test__truncsfhf2(0xc0490fdb, 0xc248); // normal, -3.1415926535 + + test__truncsfhf2(0x45cc3892, 0x6e62); // normal, 0x1.987124876876324p+12 + + test__truncsfhf2(0x3f800000, 0x3c00); // normal, 1 + test__truncsfhf2(0x38800000, 0x0400); // normal, 0x1.0p-14 + + test__truncsfhf2(0x33800000, 0x0001); // denormal (min), 2**-24 + test__truncsfhf2(0xb3800000, 0x8001); // denormal (min), -2**-24 + + test__truncsfhf2(0x387fc000, 0x03ff); // denormal (max), 2**-14 - 2**-24 + test__truncsfhf2(0xb87fc000, 0x83ff); // denormal (max), -2**-14 + 2**-24 + + test__truncsfhf2(0x35800000, 0x0010); // denormal, 0x1.0p-20 + test__truncsfhf2(0x33280000, 0x0001); // denormal, 0x1.5p-25 -> 0x1.0p-24 + test__truncsfhf2(0x33000000, 0x0000); // 0x1.0p-25 -> zero +} diff --git a/test/cases/cast.zig b/test/cases/cast.zig index 4209d87c1..5688d90e1 100644 --- a/test/cases/cast.zig +++ b/test/cases/cast.zig @@ -350,13 +350,16 @@ fn testFloatToInts() void { assert(x == 10000); const y = @floatToInt(i32, f32(1e4)); assert(y == 10000); - expectFloatToInt(u8, 255.1, 255); - expectFloatToInt(i8, 127.2, 127); - expectFloatToInt(i8, -128.2, -128); + expectFloatToInt(f16, 255.1, u8, 255); + expectFloatToInt(f16, 127.2, i8, 127); + expectFloatToInt(f16, -128.2, i8, -128); + expectFloatToInt(f32, 255.1, u8, 255); + expectFloatToInt(f32, 127.2, i8, 127); + expectFloatToInt(f32, -128.2, i8, -128); } -fn expectFloatToInt(comptime T: type, f: f32, i: T) void { - assert(@floatToInt(T, f) == i); +fn expectFloatToInt(comptime F: type, f: F, comptime I: type, i: I) void { + assert(@floatToInt(I, f) == i); } test "cast u128 to f128 and back" { @@ -418,6 +421,16 @@ test "@intCast comptime_int" { } test "@floatCast comptime_int and comptime_float" { + { + const result = @floatCast(f16, 1234); + assert(@typeOf(result) == f16); + assert(result == 1234.0); + } + { + const result = @floatCast(f16, 1234.0); + assert(@typeOf(result) == f16); + assert(result == 1234.0); + } { const result = @floatCast(f32, 1234); assert(@typeOf(result) == f32); @@ -431,6 +444,11 @@ test "@floatCast comptime_int and comptime_float" { } test "comptime_int @intToFloat" { + { + const result = @intToFloat(f16, 1234); + assert(@typeOf(result) == f16); + assert(result == 1234.0); + } { const result = @intToFloat(f32, 1234); assert(@typeOf(result) == f32); diff --git a/test/cases/math.zig b/test/cases/math.zig index 08388d3df..1807e5a1b 100644 --- a/test/cases/math.zig +++ b/test/cases/math.zig @@ -6,15 +6,20 @@ test "division" { } fn testDivision() void { assert(div(u32, 13, 3) == 4); + assert(div(f16, 1.0, 2.0) == 0.5); assert(div(f32, 1.0, 2.0) == 0.5); assert(divExact(u32, 55, 11) == 5); assert(divExact(i32, -55, 11) == -5); + assert(divExact(f16, 55.0, 11.0) == 5.0); + assert(divExact(f16, -55.0, 11.0) == -5.0); assert(divExact(f32, 55.0, 11.0) == 5.0); assert(divExact(f32, -55.0, 11.0) == -5.0); assert(divFloor(i32, 5, 3) == 1); assert(divFloor(i32, -5, 3) == -2); + assert(divFloor(f16, 5.0, 3.0) == 1.0); + assert(divFloor(f16, -5.0, 3.0) == -2.0); assert(divFloor(f32, 5.0, 3.0) == 1.0); assert(divFloor(f32, -5.0, 3.0) == -2.0); assert(divFloor(i32, -0x80000000, -2) == 0x40000000); @@ -24,6 +29,8 @@ fn testDivision() void { assert(divTrunc(i32, 5, 3) == 1); assert(divTrunc(i32, -5, 3) == -1); + assert(divTrunc(f16, 5.0, 3.0) == 1.0); + assert(divTrunc(f16, -5.0, 3.0) == -1.0); assert(divTrunc(f32, 5.0, 3.0) == 1.0); assert(divTrunc(f32, -5.0, 3.0) == -1.0); @@ -435,10 +442,11 @@ test "comptime float rem int" { } test "remainder division" { + comptime remdiv(f16); comptime remdiv(f32); comptime remdiv(f64); comptime remdiv(f128); - remdiv(f32); + remdiv(f16); remdiv(f64); remdiv(f128); } @@ -453,6 +461,8 @@ test "@sqrt" { comptime testSqrt(f64, 12.0); testSqrt(f32, 13.0); comptime testSqrt(f32, 13.0); + testSqrt(f16, 13.0); + comptime testSqrt(f16, 13.0); const x = 14.0; const y = x * x; diff --git a/test/cases/misc.zig b/test/cases/misc.zig index d539f79a5..0f181a7b4 100644 --- a/test/cases/misc.zig +++ b/test/cases/misc.zig @@ -53,6 +53,7 @@ test "@IntType builtin" { } test "floating point primitive bit counts" { + assert(f16.bit_count == 16); assert(f32.bit_count == 32); assert(f64.bit_count == 64); }