Merge pull request #1159 from bnoordhuis/f16

add f16 type
master
Andrew Kelley 2018-06-27 12:29:05 -04:00 committed by GitHub
commit 1b4bae6d69
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 530 additions and 101 deletions

View File

@ -261,12 +261,15 @@ endif()
set(EMBEDDED_SOFTFLOAT_SOURCES
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/f128M_isSignalingNaN.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_commonNaNToF128M.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_commonNaNToF16UI.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_commonNaNToF32UI.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_commonNaNToF64UI.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_f128MToCommonNaN.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_f16UIToCommonNaN.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_f32UIToCommonNaN.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_f64UIToCommonNaN.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_propagateNaNF128M.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/s_propagateNaNF16UI.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/8086/softfloat_raiseFlags.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_add.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_div.c"
@ -293,8 +296,20 @@ set(EMBEDDED_SOFTFLOAT_SOURCES
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_to_ui32_r_minMag.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_to_ui64.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_to_ui64_r_minMag.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_add.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_div.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_eq.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_lt.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_mul.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_rem.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_roundToInt.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_sqrt.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_sub.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_to_f128M.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_to_f64.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f32_to_f128M.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f64_to_f128M.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f64_to_f16.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_add256M.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_addCarryM.c"
"${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_addComplCarryM.c"
@ -572,6 +587,7 @@ set(ZIG_STD_FILES
"special/compiler_rt/floatuntidf.zig"
"special/compiler_rt/muloti4.zig"
"special/compiler_rt/index.zig"
"special/compiler_rt/truncXfYf2.zig"
"special/compiler_rt/udivmod.zig"
"special/compiler_rt/udivmoddi4.zig"
"special/compiler_rt/udivmodti4.zig"

View File

@ -258,6 +258,7 @@ struct ConstExprValue {
// populated if special == ConstValSpecialStatic
BigInt x_bigint;
BigFloat x_bigfloat;
float16_t x_f16;
float x_f32;
double x_f64;
float128_t x_f128;
@ -1598,6 +1599,7 @@ struct CodeGen {
TypeTableEntry *entry_i128;
TypeTableEntry *entry_isize;
TypeTableEntry *entry_usize;
TypeTableEntry *entry_f16;
TypeTableEntry *entry_f32;
TypeTableEntry *entry_f64;
TypeTableEntry *entry_f128;

View File

@ -4668,6 +4668,13 @@ static uint32_t hash_const_val(ConstExprValue *const_val) {
}
case TypeTableEntryIdFloat:
switch (const_val->type->data.floating.bit_count) {
case 16:
{
uint16_t result;
static_assert(sizeof(result) == sizeof(const_val->data.x_f16), "");
memcpy(&result, &const_val->data.x_f16, sizeof(result));
return result * 65537u;
}
case 32:
{
uint32_t result;
@ -5128,6 +5135,9 @@ void init_const_float(ConstExprValue *const_val, TypeTableEntry *type, double va
bigfloat_init_64(&const_val->data.x_bigfloat, value);
} else if (type->id == TypeTableEntryIdFloat) {
switch (type->data.floating.bit_count) {
case 16:
const_val->data.x_f16 = zig_double_to_f16(value);
break;
case 32:
const_val->data.x_f32 = value;
break;
@ -5441,6 +5451,8 @@ bool const_values_equal(ConstExprValue *a, ConstExprValue *b) {
case TypeTableEntryIdFloat:
assert(a->type->data.floating.bit_count == b->type->data.floating.bit_count);
switch (a->type->data.floating.bit_count) {
case 16:
return f16_eq(a->data.x_f16, b->data.x_f16);
case 32:
return a->data.x_f32 == b->data.x_f32;
case 64:
@ -5614,6 +5626,9 @@ void render_const_value(CodeGen *g, Buf *buf, ConstExprValue *const_val) {
return;
case TypeTableEntryIdFloat:
switch (type_entry->data.floating.bit_count) {
case 16:
buf_appendf(buf, "%f", zig_f16_to_double(const_val->data.x_f16));
return;
case 32:
buf_appendf(buf, "%f", const_val->data.x_f32);
return;

View File

@ -18,6 +18,10 @@ void bigfloat_init_128(BigFloat *dest, float128_t x) {
dest->value = x;
}
void bigfloat_init_16(BigFloat *dest, float16_t x) {
f16_to_f128M(x, &dest->value);
}
void bigfloat_init_32(BigFloat *dest, float x) {
float32_t f32_val;
memcpy(&f32_val, &x, sizeof(float));
@ -146,6 +150,10 @@ Cmp bigfloat_cmp(const BigFloat *op1, const BigFloat *op2) {
}
}
float16_t bigfloat_to_f16(const BigFloat *bigfloat) {
return f128M_to_f16(&bigfloat->value);
}
float bigfloat_to_f32(const BigFloat *bigfloat) {
float32_t f32_value = f128M_to_f32(&bigfloat->value);
float result;

View File

@ -22,6 +22,7 @@ struct BigFloat {
struct Buf;
void bigfloat_init_16(BigFloat *dest, float16_t x);
void bigfloat_init_32(BigFloat *dest, float x);
void bigfloat_init_64(BigFloat *dest, double x);
void bigfloat_init_128(BigFloat *dest, float128_t x);
@ -29,6 +30,7 @@ void bigfloat_init_bigfloat(BigFloat *dest, const BigFloat *x);
void bigfloat_init_bigint(BigFloat *dest, const BigInt *op);
int bigfloat_init_buf_base10(BigFloat *dest, const uint8_t *buf_ptr, size_t buf_len);
float16_t bigfloat_to_f16(const BigFloat *bigfloat);
float bigfloat_to_f32(const BigFloat *bigfloat);
double bigfloat_to_f64(const BigFloat *bigfloat);
float128_t bigfloat_to_f128(const BigFloat *bigfloat);

View File

@ -17,6 +17,7 @@
#include "os.hpp"
#include "translate_c.hpp"
#include "target.hpp"
#include "util.hpp"
#include "zig_llvm.h"
#include <stdio.h>
@ -5211,6 +5212,8 @@ static LLVMValueRef gen_const_val(CodeGen *g, ConstExprValue *const_val, const c
const_val->data.x_err_set->value, false);
case TypeTableEntryIdFloat:
switch (type_entry->data.floating.bit_count) {
case 16:
return LLVMConstReal(type_entry->type_ref, zig_f16_to_double(const_val->data.x_f16));
case 32:
return LLVMConstReal(type_entry->type_ref, const_val->data.x_f32);
case 64:
@ -6177,58 +6180,30 @@ static void define_builtin_types(CodeGen *g) {
g->builtin_types.entry_usize = entry;
}
}
{
auto add_fp_entry = [] (CodeGen *g,
const char *name,
uint32_t bit_count,
LLVMTypeRef type_ref,
TypeTableEntry **field) {
TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdFloat);
entry->type_ref = LLVMFloatType();
buf_init_from_str(&entry->name, "f32");
entry->data.floating.bit_count = 32;
entry->type_ref = type_ref;
buf_init_from_str(&entry->name, name);
entry->data.floating.bit_count = bit_count;
uint64_t debug_size_in_bits = 8*LLVMStoreSizeOfType(g->target_data_ref, entry->type_ref);
entry->di_type = ZigLLVMCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name),
debug_size_in_bits,
ZigLLVMEncoding_DW_ATE_float());
g->builtin_types.entry_f32 = entry;
*field = entry;
g->primitive_type_table.put(&entry->name, entry);
}
{
TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdFloat);
entry->type_ref = LLVMDoubleType();
buf_init_from_str(&entry->name, "f64");
entry->data.floating.bit_count = 64;
};
add_fp_entry(g, "f16", 16, LLVMHalfType(), &g->builtin_types.entry_f16);
add_fp_entry(g, "f32", 32, LLVMFloatType(), &g->builtin_types.entry_f32);
add_fp_entry(g, "f64", 64, LLVMDoubleType(), &g->builtin_types.entry_f64);
add_fp_entry(g, "f128", 128, LLVMFP128Type(), &g->builtin_types.entry_f128);
add_fp_entry(g, "c_longdouble", 80, LLVMX86FP80Type(), &g->builtin_types.entry_c_longdouble);
uint64_t debug_size_in_bits = 8*LLVMStoreSizeOfType(g->target_data_ref, entry->type_ref);
entry->di_type = ZigLLVMCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name),
debug_size_in_bits,
ZigLLVMEncoding_DW_ATE_float());
g->builtin_types.entry_f64 = entry;
g->primitive_type_table.put(&entry->name, entry);
}
{
TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdFloat);
entry->type_ref = LLVMFP128Type();
buf_init_from_str(&entry->name, "f128");
entry->data.floating.bit_count = 128;
uint64_t debug_size_in_bits = 8*LLVMStoreSizeOfType(g->target_data_ref, entry->type_ref);
entry->di_type = ZigLLVMCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name),
debug_size_in_bits,
ZigLLVMEncoding_DW_ATE_float());
g->builtin_types.entry_f128 = entry;
g->primitive_type_table.put(&entry->name, entry);
}
{
TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdFloat);
entry->type_ref = LLVMX86FP80Type();
buf_init_from_str(&entry->name, "c_longdouble");
entry->data.floating.bit_count = 80;
uint64_t debug_size_in_bits = 8*LLVMStoreSizeOfType(g->target_data_ref, entry->type_ref);
entry->di_type = ZigLLVMCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name),
debug_size_in_bits,
ZigLLVMEncoding_DW_ATE_float());
g->builtin_types.entry_c_longdouble = entry;
g->primitive_type_table.put(&entry->name, entry);
}
{
TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdVoid);
entry->type_ref = LLVMVoidType();

View File

@ -11,9 +11,10 @@
#include "ir.hpp"
#include "ir_print.hpp"
#include "os.hpp"
#include "translate_c.hpp"
#include "range_set.hpp"
#include "softfloat.hpp"
#include "translate_c.hpp"
#include "util.hpp"
struct IrExecContext {
ConstExprValue *mem_slot_list;
@ -7238,6 +7239,11 @@ static bool float_has_fraction(ConstExprValue *const_val) {
return bigfloat_has_fraction(&const_val->data.x_bigfloat);
} else if (const_val->type->id == TypeTableEntryIdFloat) {
switch (const_val->type->data.floating.bit_count) {
case 16:
{
float16_t floored = f16_roundToInt(const_val->data.x_f16, softfloat_round_minMag, false);
return !f16_eq(floored, const_val->data.x_f16);
}
case 32:
return floorf(const_val->data.x_f32) != const_val->data.x_f32;
case 64:
@ -7261,6 +7267,9 @@ static void float_append_buf(Buf *buf, ConstExprValue *const_val) {
bigfloat_append_buf(buf, &const_val->data.x_bigfloat);
} else if (const_val->type->id == TypeTableEntryIdFloat) {
switch (const_val->type->data.floating.bit_count) {
case 16:
buf_appendf(buf, "%f", zig_f16_to_double(const_val->data.x_f16));
break;
case 32:
buf_appendf(buf, "%f", const_val->data.x_f32);
break;
@ -7296,6 +7305,17 @@ static void float_init_bigint(BigInt *bigint, ConstExprValue *const_val) {
bigint_init_bigfloat(bigint, &const_val->data.x_bigfloat);
} else if (const_val->type->id == TypeTableEntryIdFloat) {
switch (const_val->type->data.floating.bit_count) {
case 16:
{
double x = zig_f16_to_double(const_val->data.x_f16);
if (x >= 0) {
bigint_init_unsigned(bigint, (uint64_t)x);
} else {
bigint_init_unsigned(bigint, (uint64_t)-x);
bigint->is_negative = true;
}
break;
}
case 32:
if (const_val->data.x_f32 >= 0) {
bigint_init_unsigned(bigint, (uint64_t)(const_val->data.x_f32));
@ -7332,6 +7352,9 @@ static void float_init_bigfloat(ConstExprValue *dest_val, BigFloat *bigfloat) {
bigfloat_init_bigfloat(&dest_val->data.x_bigfloat, bigfloat);
} else if (dest_val->type->id == TypeTableEntryIdFloat) {
switch (dest_val->type->data.floating.bit_count) {
case 16:
dest_val->data.x_f16 = bigfloat_to_f16(bigfloat);
break;
case 32:
dest_val->data.x_f32 = bigfloat_to_f32(bigfloat);
break;
@ -7349,11 +7372,39 @@ static void float_init_bigfloat(ConstExprValue *dest_val, BigFloat *bigfloat) {
}
}
static void float_init_f16(ConstExprValue *dest_val, float16_t x) {
if (dest_val->type->id == TypeTableEntryIdComptimeFloat) {
bigfloat_init_16(&dest_val->data.x_bigfloat, x);
} else if (dest_val->type->id == TypeTableEntryIdFloat) {
switch (dest_val->type->data.floating.bit_count) {
case 16:
dest_val->data.x_f16 = x;
break;
case 32:
dest_val->data.x_f32 = zig_f16_to_double(x);
break;
case 64:
dest_val->data.x_f64 = zig_f16_to_double(x);
break;
case 128:
f16_to_f128M(x, &dest_val->data.x_f128);
break;
default:
zig_unreachable();
}
} else {
zig_unreachable();
}
}
static void float_init_f32(ConstExprValue *dest_val, float x) {
if (dest_val->type->id == TypeTableEntryIdComptimeFloat) {
bigfloat_init_32(&dest_val->data.x_bigfloat, x);
} else if (dest_val->type->id == TypeTableEntryIdFloat) {
switch (dest_val->type->data.floating.bit_count) {
case 16:
dest_val->data.x_f16 = zig_double_to_f16(x);
break;
case 32:
dest_val->data.x_f32 = x;
break;
@ -7380,6 +7431,9 @@ static void float_init_f64(ConstExprValue *dest_val, double x) {
bigfloat_init_64(&dest_val->data.x_bigfloat, x);
} else if (dest_val->type->id == TypeTableEntryIdFloat) {
switch (dest_val->type->data.floating.bit_count) {
case 16:
dest_val->data.x_f16 = zig_double_to_f16(x);
break;
case 32:
dest_val->data.x_f32 = x;
break;
@ -7406,6 +7460,9 @@ static void float_init_f128(ConstExprValue *dest_val, float128_t x) {
bigfloat_init_128(&dest_val->data.x_bigfloat, x);
} else if (dest_val->type->id == TypeTableEntryIdFloat) {
switch (dest_val->type->data.floating.bit_count) {
case 16:
dest_val->data.x_f16 = f128M_to_f16(&x);
break;
case 32:
{
float32_t f32_val = f128M_to_f32(&x);
@ -7436,6 +7493,9 @@ static void float_init_float(ConstExprValue *dest_val, ConstExprValue *src_val)
float_init_bigfloat(dest_val, &src_val->data.x_bigfloat);
} else if (src_val->type->id == TypeTableEntryIdFloat) {
switch (src_val->type->data.floating.bit_count) {
case 16:
float_init_f16(dest_val, src_val->data.x_f16);
break;
case 32:
float_init_f32(dest_val, src_val->data.x_f32);
break;
@ -7459,6 +7519,14 @@ static Cmp float_cmp(ConstExprValue *op1, ConstExprValue *op2) {
return bigfloat_cmp(&op1->data.x_bigfloat, &op2->data.x_bigfloat);
} else if (op1->type->id == TypeTableEntryIdFloat) {
switch (op1->type->data.floating.bit_count) {
case 16:
if (f16_lt(op1->data.x_f16, op2->data.x_f16)) {
return CmpLT;
} else if (f16_lt(op2->data.x_f16, op1->data.x_f16)) {
return CmpGT;
} else {
return CmpEQ;
}
case 32:
if (op1->data.x_f32 > op2->data.x_f32) {
return CmpGT;
@ -7496,6 +7564,17 @@ static Cmp float_cmp_zero(ConstExprValue *op) {
return bigfloat_cmp_zero(&op->data.x_bigfloat);
} else if (op->type->id == TypeTableEntryIdFloat) {
switch (op->type->data.floating.bit_count) {
case 16:
{
const float16_t zero = zig_double_to_f16(0);
if (f16_lt(op->data.x_f16, zero)) {
return CmpLT;
} else if (f16_lt(zero, op->data.x_f16)) {
return CmpGT;
} else {
return CmpEQ;
}
}
case 32:
if (op->data.x_f32 < 0.0) {
return CmpLT;
@ -7537,6 +7616,9 @@ static void float_add(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal
bigfloat_add(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
} else if (op1->type->id == TypeTableEntryIdFloat) {
switch (op1->type->data.floating.bit_count) {
case 16:
out_val->data.x_f16 = f16_add(op1->data.x_f16, op2->data.x_f16);
return;
case 32:
out_val->data.x_f32 = op1->data.x_f32 + op2->data.x_f32;
return;
@ -7561,6 +7643,9 @@ static void float_sub(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal
bigfloat_sub(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
} else if (op1->type->id == TypeTableEntryIdFloat) {
switch (op1->type->data.floating.bit_count) {
case 16:
out_val->data.x_f16 = f16_sub(op1->data.x_f16, op2->data.x_f16);
return;
case 32:
out_val->data.x_f32 = op1->data.x_f32 - op2->data.x_f32;
return;
@ -7585,6 +7670,9 @@ static void float_mul(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal
bigfloat_mul(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
} else if (op1->type->id == TypeTableEntryIdFloat) {
switch (op1->type->data.floating.bit_count) {
case 16:
out_val->data.x_f16 = f16_mul(op1->data.x_f16, op2->data.x_f16);
return;
case 32:
out_val->data.x_f32 = op1->data.x_f32 * op2->data.x_f32;
return;
@ -7609,6 +7697,9 @@ static void float_div(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal
bigfloat_div(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
} else if (op1->type->id == TypeTableEntryIdFloat) {
switch (op1->type->data.floating.bit_count) {
case 16:
out_val->data.x_f16 = f16_div(op1->data.x_f16, op2->data.x_f16);
return;
case 32:
out_val->data.x_f32 = op1->data.x_f32 / op2->data.x_f32;
return;
@ -7633,21 +7724,15 @@ static void float_div_trunc(ConstExprValue *out_val, ConstExprValue *op1, ConstE
bigfloat_div_trunc(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
} else if (op1->type->id == TypeTableEntryIdFloat) {
switch (op1->type->data.floating.bit_count) {
case 16:
out_val->data.x_f16 = f16_div(op1->data.x_f16, op2->data.x_f16);
out_val->data.x_f16 = f16_roundToInt(out_val->data.x_f16, softfloat_round_minMag, false);
return;
case 32:
out_val->data.x_f32 = op1->data.x_f32 / op2->data.x_f32;
if (out_val->data.x_f32 >= 0.0) {
out_val->data.x_f32 = floorf(out_val->data.x_f32);
} else {
out_val->data.x_f32 = ceilf(out_val->data.x_f32);
}
out_val->data.x_f32 = truncf(op1->data.x_f32 / op2->data.x_f32);
return;
case 64:
out_val->data.x_f64 = op1->data.x_f64 / op2->data.x_f64;
if (out_val->data.x_f64 >= 0.0) {
out_val->data.x_f64 = floor(out_val->data.x_f64);
} else {
out_val->data.x_f64 = ceil(out_val->data.x_f64);
}
out_val->data.x_f64 = trunc(op1->data.x_f64 / op2->data.x_f64);
return;
case 128:
f128M_div(&op1->data.x_f128, &op2->data.x_f128, &out_val->data.x_f128);
@ -7668,6 +7753,10 @@ static void float_div_floor(ConstExprValue *out_val, ConstExprValue *op1, ConstE
bigfloat_div_floor(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
} else if (op1->type->id == TypeTableEntryIdFloat) {
switch (op1->type->data.floating.bit_count) {
case 16:
out_val->data.x_f16 = f16_div(op1->data.x_f16, op2->data.x_f16);
out_val->data.x_f16 = f16_roundToInt(out_val->data.x_f16, softfloat_round_min, false);
return;
case 32:
out_val->data.x_f32 = floorf(op1->data.x_f32 / op2->data.x_f32);
return;
@ -7693,6 +7782,9 @@ static void float_rem(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal
bigfloat_rem(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
} else if (op1->type->id == TypeTableEntryIdFloat) {
switch (op1->type->data.floating.bit_count) {
case 16:
out_val->data.x_f16 = f16_rem(op1->data.x_f16, op2->data.x_f16);
return;
case 32:
out_val->data.x_f32 = fmodf(op1->data.x_f32, op2->data.x_f32);
return;
@ -7710,6 +7802,16 @@ static void float_rem(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal
}
}
// c = a - b * trunc(a / b)
static float16_t zig_f16_mod(float16_t a, float16_t b) {
float16_t c;
c = f16_div(a, b);
c = f16_roundToInt(c, softfloat_round_min, true);
c = f16_mul(b, c);
c = f16_sub(a, c);
return c;
}
// c = a - b * trunc(a / b)
static void zig_f128M_mod(const float128_t* a, const float128_t* b, float128_t* c) {
f128M_div(a, b, c);
@ -7725,6 +7827,9 @@ static void float_mod(ConstExprValue *out_val, ConstExprValue *op1, ConstExprVal
bigfloat_mod(&out_val->data.x_bigfloat, &op1->data.x_bigfloat, &op2->data.x_bigfloat);
} else if (op1->type->id == TypeTableEntryIdFloat) {
switch (op1->type->data.floating.bit_count) {
case 16:
out_val->data.x_f16 = zig_f16_mod(op1->data.x_f16, op2->data.x_f16);
return;
case 32:
out_val->data.x_f32 = fmodf(fmodf(op1->data.x_f32, op2->data.x_f32) + op2->data.x_f32, op2->data.x_f32);
return;
@ -7748,6 +7853,12 @@ static void float_negate(ConstExprValue *out_val, ConstExprValue *op) {
bigfloat_negate(&out_val->data.x_bigfloat, &op->data.x_bigfloat);
} else if (op->type->id == TypeTableEntryIdFloat) {
switch (op->type->data.floating.bit_count) {
case 16:
{
const float16_t zero = zig_double_to_f16(0);
out_val->data.x_f16 = f16_sub(zero, op->data.x_f16);
return;
}
case 32:
out_val->data.x_f32 = -op->data.x_f32;
return;
@ -7770,6 +7881,9 @@ static void float_negate(ConstExprValue *out_val, ConstExprValue *op) {
void float_write_ieee597(ConstExprValue *op, uint8_t *buf, bool is_big_endian) {
if (op->type->id == TypeTableEntryIdFloat) {
switch (op->type->data.floating.bit_count) {
case 16:
memcpy(buf, &op->data.x_f16, 2); // TODO wrong when compiler is big endian
return;
case 32:
memcpy(buf, &op->data.x_f32, 4); // TODO wrong when compiler is big endian
return;
@ -7790,6 +7904,9 @@ void float_write_ieee597(ConstExprValue *op, uint8_t *buf, bool is_big_endian) {
void float_read_ieee597(ConstExprValue *val, uint8_t *buf, bool is_big_endian) {
if (val->type->id == TypeTableEntryIdFloat) {
switch (val->type->data.floating.bit_count) {
case 16:
memcpy(&val->data.x_f16, buf, 2); // TODO wrong when compiler is big endian
return;
case 32:
memcpy(&val->data.x_f32, buf, 4); // TODO wrong when compiler is big endian
return;
@ -8817,6 +8934,9 @@ static bool eval_const_expr_implicit_cast(IrAnalyze *ira, IrInstruction *source_
if (other_val->type->id == TypeTableEntryIdComptimeFloat) {
assert(new_type->id == TypeTableEntryIdFloat);
switch (new_type->data.floating.bit_count) {
case 16:
const_val->data.x_f16 = bigfloat_to_f16(&other_val->data.x_bigfloat);
break;
case 32:
const_val->data.x_f32 = bigfloat_to_f32(&other_val->data.x_bigfloat);
break;
@ -8847,6 +8967,9 @@ static bool eval_const_expr_implicit_cast(IrAnalyze *ira, IrInstruction *source_
BigFloat bigfloat;
bigfloat_init_bigint(&bigfloat, &other_val->data.x_bigint);
switch (new_type->data.floating.bit_count) {
case 16:
const_val->data.x_f16 = bigfloat_to_f16(&bigfloat);
break;
case 32:
const_val->data.x_f32 = bigfloat_to_f32(&bigfloat);
break;
@ -20104,6 +20227,9 @@ static TypeTableEntry *ir_analyze_instruction_sqrt(IrAnalyze *ira, IrInstruction
bigfloat_sqrt(&out_val->data.x_bigfloat, &val->data.x_bigfloat);
} else if (float_type->id == TypeTableEntryIdFloat) {
switch (float_type->data.floating.bit_count) {
case 16:
out_val->data.x_f16 = f16_sqrt(val->data.x_f16);
break;
case 32:
out_val->data.x_f32 = sqrtf(val->data.x_f32);
break;
@ -20124,7 +20250,9 @@ static TypeTableEntry *ir_analyze_instruction_sqrt(IrAnalyze *ira, IrInstruction
}
assert(float_type->id == TypeTableEntryIdFloat);
if (float_type->data.floating.bit_count != 32 && float_type->data.floating.bit_count != 64) {
if (float_type->data.floating.bit_count != 16 &&
float_type->data.floating.bit_count != 32 &&
float_type->data.floating.bit_count != 64) {
ir_add_error(ira, instruction->type, buf_sprintf("compiler TODO: add implementation of sqrt for '%s'", buf_ptr(&float_type->name)));
return ira->codegen->builtin_types.entry_invalid;
}

View File

@ -31,6 +31,8 @@
#endif
#include "softfloat.hpp"
#define BREAKPOINT __asm("int $0x03")
ATTRIBUTE_COLD
@ -165,4 +167,21 @@ static inline uint8_t log2_u64(uint64_t x) {
return (63 - clzll(x));
}
static inline float16_t zig_double_to_f16(double x) {
float64_t y;
static_assert(sizeof(x) == sizeof(y), "");
memcpy(&y, &x, sizeof(x));
return f64_to_f16(y);
}
// Return value is safe to coerce to float even when |x| is NaN or Infinity.
static inline double zig_f16_to_double(float16_t x) {
float64_t y = f16_to_f64(x);
double z;
static_assert(sizeof(y) == sizeof(z), "");
memcpy(&z, &y, sizeof(y));
return z;
}
#endif

View File

@ -10,9 +10,13 @@ pub extern fn __extendsftf2(a: f32) f128 {
return extendXfYf2(f128, f32, a);
}
pub extern fn __extendhfsf2(a: u16) f32 {
return extendXfYf2(f32, f16, @bitCast(f16, a));
}
const CHAR_BIT = 8;
pub fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t {
inline fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t {
const src_rep_t = @IntType(false, @typeInfo(src_t).Float.bits);
const dst_rep_t = @IntType(false, @typeInfo(dst_t).Float.bits);
const srcSigBits = std.math.floatMantissaBits(src_t);
@ -22,22 +26,22 @@ pub fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t {
// Various constants whose values follow from the type parameters.
// Any reasonable optimizer will fold and propagate all of these.
const srcBits: i32 = @sizeOf(src_t) * CHAR_BIT;
const srcExpBits: i32 = srcBits - srcSigBits - 1;
const srcInfExp: i32 = (1 << srcExpBits) - 1;
const srcExpBias: i32 = srcInfExp >> 1;
const srcBits = @sizeOf(src_t) * CHAR_BIT;
const srcExpBits = srcBits - srcSigBits - 1;
const srcInfExp = (1 << srcExpBits) - 1;
const srcExpBias = srcInfExp >> 1;
const srcMinNormal: src_rep_t = src_rep_t(1) << srcSigBits;
const srcInfinity: src_rep_t = src_rep_t(@bitCast(u32, srcInfExp)) << srcSigBits;
const srcSignMask: src_rep_t = src_rep_t(1) << @intCast(SrcShift, srcSigBits +% srcExpBits);
const srcAbsMask: src_rep_t = srcSignMask -% 1;
const srcQNaN: src_rep_t = src_rep_t(1) << @intCast(SrcShift, srcSigBits -% 1);
const srcNaNCode: src_rep_t = srcQNaN -% 1;
const srcMinNormal = 1 << srcSigBits;
const srcInfinity = srcInfExp << srcSigBits;
const srcSignMask = 1 << (srcSigBits + srcExpBits);
const srcAbsMask = srcSignMask - 1;
const srcQNaN = 1 << (srcSigBits - 1);
const srcNaNCode = srcQNaN - 1;
const dstBits: i32 = @sizeOf(dst_t) * CHAR_BIT;
const dstExpBits: i32 = dstBits - dstSigBits - 1;
const dstInfExp: i32 = (1 << dstExpBits) - 1;
const dstExpBias: i32 = dstInfExp >> 1;
const dstBits = @sizeOf(dst_t) * CHAR_BIT;
const dstExpBits = dstBits - dstSigBits - 1;
const dstInfExp = (1 << dstExpBits) - 1;
const dstExpBias = dstInfExp >> 1;
const dstMinNormal: dst_rep_t = dst_rep_t(1) << dstSigBits;
@ -47,38 +51,36 @@ pub fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t {
const sign: src_rep_t = aRep & srcSignMask;
var absResult: dst_rep_t = undefined;
// If @sizeOf(src_rep_t) < @sizeOf(int), the subtraction result is promoted
// to (signed) int. To avoid that, explicitly cast to src_rep_t.
if ((src_rep_t)(aAbs -% srcMinNormal) < srcInfinity -% srcMinNormal) {
if (aAbs -% srcMinNormal < srcInfinity - srcMinNormal) {
// a is a normal number.
// Extend to the destination type by shifting the significand and
// exponent into the proper position and rebiasing the exponent.
absResult = dst_rep_t(aAbs) << (dstSigBits -% srcSigBits);
absResult += dst_rep_t(@bitCast(u32, dstExpBias -% srcExpBias)) << dstSigBits;
absResult = dst_rep_t(aAbs) << (dstSigBits - srcSigBits);
absResult += (dstExpBias - srcExpBias) << dstSigBits;
} else if (aAbs >= srcInfinity) {
// a is NaN or infinity.
// Conjure the result by beginning with infinity, then setting the qNaN
// bit (if needed) and right-aligning the rest of the trailing NaN
// payload field.
absResult = dst_rep_t(@bitCast(u32, dstInfExp)) << dstSigBits;
absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits);
absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits);
absResult = dstInfExp << dstSigBits;
absResult |= dst_rep_t(aAbs & srcQNaN) << (dstSigBits - srcSigBits);
absResult |= dst_rep_t(aAbs & srcNaNCode) << (dstSigBits - srcSigBits);
} else if (aAbs != 0) {
// a is denormal.
// renormalize the significand and clear the leading bit, then insert
// the correct adjusted exponent in the destination type.
const scale: i32 = @clz(aAbs) - @clz(srcMinNormal);
const scale: u32 = @clz(aAbs) - @clz(src_rep_t(srcMinNormal));
absResult = dst_rep_t(aAbs) << @intCast(DstShift, dstSigBits - srcSigBits + scale);
absResult ^= dstMinNormal;
const resultExponent: i32 = dstExpBias - srcExpBias - scale + 1;
absResult |= dst_rep_t(@bitCast(u32, resultExponent)) << @intCast(DstShift, dstSigBits);
const resultExponent: u32 = dstExpBias - srcExpBias - scale + 1;
absResult |= @intCast(dst_rep_t, resultExponent) << dstSigBits;
} else {
// a is zero.
absResult = 0;
}
// Apply the signbit to (dst_t)abs(a).
const result: dst_rep_t align(@alignOf(dst_t)) = absResult | dst_rep_t(sign) << @intCast(DstShift, dstBits - srcBits);
const result: dst_rep_t align(@alignOf(dst_t)) = absResult | dst_rep_t(sign) << (dstBits - srcBits);
return @bitCast(dst_t, result);
}

View File

@ -1,4 +1,5 @@
const __extenddftf2 = @import("extendXfYf2.zig").__extenddftf2;
const __extendhfsf2 = @import("extendXfYf2.zig").__extendhfsf2;
const __extendsftf2 = @import("extendXfYf2.zig").__extendsftf2;
const assert = @import("std").debug.assert;
@ -24,6 +25,22 @@ fn test__extenddftf2(a: f64, expectedHi: u64, expectedLo: u64) void {
@panic("__extenddftf2 test failure");
}
fn test__extendhfsf2(a: u16, expected: u32) void {
const x = __extendhfsf2(a);
const rep = @bitCast(u32, x);
if (rep == expected) {
if (rep & 0x7fffffff > 0x7f800000) {
return; // NaN is always unequal.
}
if (x == @bitCast(f32, expected)) {
return;
}
}
@panic("__extendhfsf2 test failure");
}
fn test__extendsftf2(a: f32, expectedHi: u64, expectedLo: u64) void {
const x = __extendsftf2(a);
@ -68,6 +85,35 @@ test "extenddftf2" {
test__extenddftf2(0x1.edcba987654321fp-45, 0x3fd2edcba9876543, 0x2000000000000000);
}
test "extendhfsf2" {
test__extendhfsf2(0x7e00, 0x7fc00000); // qNaN
test__extendhfsf2(0x7f00, 0x7fe00000); // sNaN
test__extendhfsf2(0, 0); // 0
test__extendhfsf2(0x8000, 0x80000000); // -0
test__extendhfsf2(0x7c00, 0x7f800000); // inf
test__extendhfsf2(0xfc00, 0xff800000); // -inf
test__extendhfsf2(0x0001, 0x33800000); // denormal (min), 2**-24
test__extendhfsf2(0x8001, 0xb3800000); // denormal (min), -2**-24
test__extendhfsf2(0x03ff, 0x387fc000); // denormal (max), 2**-14 - 2**-24
test__extendhfsf2(0x83ff, 0xb87fc000); // denormal (max), -2**-14 + 2**-24
test__extendhfsf2(0x0400, 0x38800000); // normal (min), 2**-14
test__extendhfsf2(0x8400, 0xb8800000); // normal (min), -2**-14
test__extendhfsf2(0x7bff, 0x477fe000); // normal (max), 65504
test__extendhfsf2(0xfbff, 0xc77fe000); // normal (max), -65504
test__extendhfsf2(0x3c01, 0x3f802000); // normal, 1 + 2**-10
test__extendhfsf2(0xbc01, 0xbf802000); // normal, -1 - 2**-10
test__extendhfsf2(0x3555, 0x3eaaa000); // normal, approx. 1/3
test__extendhfsf2(0xb555, 0xbeaaa000); // normal, approx. -1/3
}
test "extendsftf2" {
// qNaN
test__extendsftf2(makeQNaN32(), 0x7fff800000000000, 0x0);

View File

@ -15,6 +15,8 @@ comptime {
@export("__lttf2", @import("comparetf2.zig").__letf2, linkage);
@export("__netf2", @import("comparetf2.zig").__letf2, linkage);
@export("__gttf2", @import("comparetf2.zig").__getf2, linkage);
@export("__gnu_h2f_ieee", @import("extendXfYf2.zig").__extendhfsf2, linkage);
@export("__gnu_f2h_ieee", @import("truncXfYf2.zig").__truncsfhf2, linkage);
}
@export("__unordtf2", @import("comparetf2.zig").__unordtf2, linkage);
@ -22,6 +24,9 @@ comptime {
@export("__floatuntidf", @import("floatuntidf.zig").__floatuntidf, linkage);
@export("__extenddftf2", @import("extendXfYf2.zig").__extenddftf2, linkage);
@export("__extendsftf2", @import("extendXfYf2.zig").__extendsftf2, linkage);
@export("__extendhfsf2", @import("extendXfYf2.zig").__extendhfsf2, linkage);
@export("__truncsfhf2", @import("truncXfYf2.zig").__truncsfhf2, linkage);
@export("__fixunssfsi", @import("fixunssfsi.zig").__fixunssfsi, linkage);
@export("__fixunssfdi", @import("fixunssfdi.zig").__fixunssfdi, linkage);

View File

@ -0,0 +1,111 @@
const std = @import("std");
pub extern fn __truncsfhf2(a: f32) u16 {
return @bitCast(u16, truncXfYf2(f16, f32, a));
}
const CHAR_BIT = 8;
inline fn truncXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t {
const src_rep_t = @IntType(false, @typeInfo(src_t).Float.bits);
const dst_rep_t = @IntType(false, @typeInfo(dst_t).Float.bits);
const srcSigBits = std.math.floatMantissaBits(src_t);
const dstSigBits = std.math.floatMantissaBits(dst_t);
const SrcShift = std.math.Log2Int(src_rep_t);
const DstShift = std.math.Log2Int(dst_rep_t);
// Various constants whose values follow from the type parameters.
// Any reasonable optimizer will fold and propagate all of these.
const srcBits = @sizeOf(src_t) * CHAR_BIT;
const srcExpBits = srcBits - srcSigBits - 1;
const srcInfExp = (1 << srcExpBits) - 1;
const srcExpBias = srcInfExp >> 1;
const srcMinNormal = 1 << srcSigBits;
const srcSignificandMask = srcMinNormal - 1;
const srcInfinity = srcInfExp << srcSigBits;
const srcSignMask = 1 << (srcSigBits + srcExpBits);
const srcAbsMask = srcSignMask - 1;
const roundMask = (1 << (srcSigBits - dstSigBits)) - 1;
const halfway = 1 << (srcSigBits - dstSigBits - 1);
const srcQNaN = 1 << (srcSigBits - 1);
const srcNaNCode = srcQNaN - 1;
const dstBits = @sizeOf(dst_t) * CHAR_BIT;
const dstExpBits = dstBits - dstSigBits - 1;
const dstInfExp = (1 << dstExpBits) - 1;
const dstExpBias = dstInfExp >> 1;
const underflowExponent = srcExpBias + 1 - dstExpBias;
const overflowExponent = srcExpBias + dstInfExp - dstExpBias;
const underflow = underflowExponent << srcSigBits;
const overflow = overflowExponent << srcSigBits;
const dstQNaN = 1 << (dstSigBits - 1);
const dstNaNCode = dstQNaN - 1;
// Break a into a sign and representation of the absolute value
const aRep: src_rep_t = @bitCast(src_rep_t, a);
const aAbs: src_rep_t = aRep & srcAbsMask;
const sign: src_rep_t = aRep & srcSignMask;
var absResult: dst_rep_t = undefined;
if (aAbs -% underflow < aAbs -% overflow) {
// The exponent of a is within the range of normal numbers in the
// destination format. We can convert by simply right-shifting with
// rounding and adjusting the exponent.
absResult = @truncate(dst_rep_t, aAbs >> (srcSigBits - dstSigBits));
absResult -%= dst_rep_t(srcExpBias - dstExpBias) << dstSigBits;
const roundBits: src_rep_t = aAbs & roundMask;
if (roundBits > halfway) {
// Round to nearest
absResult += 1;
} else if (roundBits == halfway) {
// Ties to even
absResult += absResult & 1;
}
} else if (aAbs > srcInfinity) {
// a is NaN.
// Conjure the result by beginning with infinity, setting the qNaN
// bit and inserting the (truncated) trailing NaN field.
absResult = @intCast(dst_rep_t, dstInfExp) << dstSigBits;
absResult |= dstQNaN;
absResult |= @intCast(dst_rep_t, ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode);
} else if (aAbs >= overflow) {
// a overflows to infinity.
absResult = @intCast(dst_rep_t, dstInfExp) << dstSigBits;
} else {
// a underflows on conversion to the destination type or is an exact
// zero. The result may be a denormal or zero. Extract the exponent
// to get the shift amount for the denormalization.
const aExp: u32 = aAbs >> srcSigBits;
const shift: u32 = srcExpBias - dstExpBias - aExp + 1;
const significand: src_rep_t = (aRep & srcSignificandMask) | srcMinNormal;
// Right shift by the denormalization amount with sticky.
if (shift > srcSigBits) {
absResult = 0;
} else {
const sticky: src_rep_t = significand << @intCast(SrcShift, srcBits - shift);
const denormalizedSignificand: src_rep_t = significand >> @intCast(SrcShift, shift) | sticky;
absResult = @intCast(dst_rep_t, denormalizedSignificand >> (srcSigBits - dstSigBits));
const roundBits: src_rep_t = denormalizedSignificand & roundMask;
if (roundBits > halfway) {
// Round to nearest
absResult += 1;
} else if (roundBits == halfway) {
// Ties to even
absResult += absResult & 1;
}
}
}
const result: dst_rep_t align(@alignOf(dst_t)) = absResult | @truncate(dst_rep_t, sign >> @intCast(SrcShift, srcBits - dstBits));
return @bitCast(dst_t, result);
}
test "import truncXfYf2" {
_ = @import("truncXfYf2_test.zig");
}

View File

@ -0,0 +1,64 @@
const __truncsfhf2 = @import("truncXfYf2.zig").__truncsfhf2;
fn test__truncsfhf2(a: u32, expected: u16) void {
const actual = __truncsfhf2(@bitCast(f32, a));
if (actual == expected) {
return;
}
@panic("__truncsfhf2 test failure");
}
test "truncsfhf2" {
test__truncsfhf2(0x7fc00000, 0x7e00); // qNaN
test__truncsfhf2(0x7fe00000, 0x7f00); // sNaN
test__truncsfhf2(0, 0); // 0
test__truncsfhf2(0x80000000, 0x8000); // -0
test__truncsfhf2(0x7f800000, 0x7c00); // inf
test__truncsfhf2(0xff800000, 0xfc00); // -inf
test__truncsfhf2(0x477ff000, 0x7c00); // 65520 -> inf
test__truncsfhf2(0xc77ff000, 0xfc00); // -65520 -> -inf
test__truncsfhf2(0x71cc3892, 0x7c00); // 0x1.987124876876324p+100 -> inf
test__truncsfhf2(0xf1cc3892, 0xfc00); // -0x1.987124876876324p+100 -> -inf
test__truncsfhf2(0x38800000, 0x0400); // normal (min), 2**-14
test__truncsfhf2(0xb8800000, 0x8400); // normal (min), -2**-14
test__truncsfhf2(0x477fe000, 0x7bff); // normal (max), 65504
test__truncsfhf2(0xc77fe000, 0xfbff); // normal (max), -65504
test__truncsfhf2(0x477fe100, 0x7bff); // normal, 65505 -> 65504
test__truncsfhf2(0xc77fe100, 0xfbff); // normal, -65505 -> -65504
test__truncsfhf2(0x477fef00, 0x7bff); // normal, 65519 -> 65504
test__truncsfhf2(0xc77fef00, 0xfbff); // normal, -65519 -> -65504
test__truncsfhf2(0x3f802000, 0x3c01); // normal, 1 + 2**-10
test__truncsfhf2(0xbf802000, 0xbc01); // normal, -1 - 2**-10
test__truncsfhf2(0x3eaaa000, 0x3555); // normal, approx. 1/3
test__truncsfhf2(0xbeaaa000, 0xb555); // normal, approx. -1/3
test__truncsfhf2(0x40490fdb, 0x4248); // normal, 3.1415926535
test__truncsfhf2(0xc0490fdb, 0xc248); // normal, -3.1415926535
test__truncsfhf2(0x45cc3892, 0x6e62); // normal, 0x1.987124876876324p+12
test__truncsfhf2(0x3f800000, 0x3c00); // normal, 1
test__truncsfhf2(0x38800000, 0x0400); // normal, 0x1.0p-14
test__truncsfhf2(0x33800000, 0x0001); // denormal (min), 2**-24
test__truncsfhf2(0xb3800000, 0x8001); // denormal (min), -2**-24
test__truncsfhf2(0x387fc000, 0x03ff); // denormal (max), 2**-14 - 2**-24
test__truncsfhf2(0xb87fc000, 0x83ff); // denormal (max), -2**-14 + 2**-24
test__truncsfhf2(0x35800000, 0x0010); // denormal, 0x1.0p-20
test__truncsfhf2(0x33280000, 0x0001); // denormal, 0x1.5p-25 -> 0x1.0p-24
test__truncsfhf2(0x33000000, 0x0000); // 0x1.0p-25 -> zero
}

View File

@ -350,13 +350,16 @@ fn testFloatToInts() void {
assert(x == 10000);
const y = @floatToInt(i32, f32(1e4));
assert(y == 10000);
expectFloatToInt(u8, 255.1, 255);
expectFloatToInt(i8, 127.2, 127);
expectFloatToInt(i8, -128.2, -128);
expectFloatToInt(f16, 255.1, u8, 255);
expectFloatToInt(f16, 127.2, i8, 127);
expectFloatToInt(f16, -128.2, i8, -128);
expectFloatToInt(f32, 255.1, u8, 255);
expectFloatToInt(f32, 127.2, i8, 127);
expectFloatToInt(f32, -128.2, i8, -128);
}
fn expectFloatToInt(comptime T: type, f: f32, i: T) void {
assert(@floatToInt(T, f) == i);
fn expectFloatToInt(comptime F: type, f: F, comptime I: type, i: I) void {
assert(@floatToInt(I, f) == i);
}
test "cast u128 to f128 and back" {
@ -418,19 +421,39 @@ test "@intCast comptime_int" {
}
test "@floatCast comptime_int and comptime_float" {
const result = @floatCast(f32, 1234);
assert(@typeOf(result) == f32);
assert(result == 1234.0);
const result2 = @floatCast(f32, 1234.0);
assert(@typeOf(result) == f32);
assert(result == 1234.0);
{
const result = @floatCast(f16, 1234);
assert(@typeOf(result) == f16);
assert(result == 1234.0);
}
{
const result = @floatCast(f16, 1234.0);
assert(@typeOf(result) == f16);
assert(result == 1234.0);
}
{
const result = @floatCast(f32, 1234);
assert(@typeOf(result) == f32);
assert(result == 1234.0);
}
{
const result = @floatCast(f32, 1234.0);
assert(@typeOf(result) == f32);
assert(result == 1234.0);
}
}
test "comptime_int @intToFloat" {
const result = @intToFloat(f32, 1234);
assert(@typeOf(result) == f32);
assert(result == 1234.0);
{
const result = @intToFloat(f16, 1234);
assert(@typeOf(result) == f16);
assert(result == 1234.0);
}
{
const result = @intToFloat(f32, 1234);
assert(@typeOf(result) == f32);
assert(result == 1234.0);
}
}
test "@bytesToSlice keeps pointer alignment" {

View File

@ -6,15 +6,20 @@ test "division" {
}
fn testDivision() void {
assert(div(u32, 13, 3) == 4);
assert(div(f16, 1.0, 2.0) == 0.5);
assert(div(f32, 1.0, 2.0) == 0.5);
assert(divExact(u32, 55, 11) == 5);
assert(divExact(i32, -55, 11) == -5);
assert(divExact(f16, 55.0, 11.0) == 5.0);
assert(divExact(f16, -55.0, 11.0) == -5.0);
assert(divExact(f32, 55.0, 11.0) == 5.0);
assert(divExact(f32, -55.0, 11.0) == -5.0);
assert(divFloor(i32, 5, 3) == 1);
assert(divFloor(i32, -5, 3) == -2);
assert(divFloor(f16, 5.0, 3.0) == 1.0);
assert(divFloor(f16, -5.0, 3.0) == -2.0);
assert(divFloor(f32, 5.0, 3.0) == 1.0);
assert(divFloor(f32, -5.0, 3.0) == -2.0);
assert(divFloor(i32, -0x80000000, -2) == 0x40000000);
@ -24,8 +29,12 @@ fn testDivision() void {
assert(divTrunc(i32, 5, 3) == 1);
assert(divTrunc(i32, -5, 3) == -1);
assert(divTrunc(f16, 5.0, 3.0) == 1.0);
assert(divTrunc(f16, -5.0, 3.0) == -1.0);
assert(divTrunc(f32, 5.0, 3.0) == 1.0);
assert(divTrunc(f32, -5.0, 3.0) == -1.0);
assert(divTrunc(f64, 5.0, 3.0) == 1.0);
assert(divTrunc(f64, -5.0, 3.0) == -1.0);
comptime {
assert(
@ -435,10 +444,11 @@ test "comptime float rem int" {
}
test "remainder division" {
comptime remdiv(f16);
comptime remdiv(f32);
comptime remdiv(f64);
comptime remdiv(f128);
remdiv(f32);
remdiv(f16);
remdiv(f64);
remdiv(f128);
}
@ -453,6 +463,8 @@ test "@sqrt" {
comptime testSqrt(f64, 12.0);
testSqrt(f32, 13.0);
comptime testSqrt(f32, 13.0);
testSqrt(f16, 13.0);
comptime testSqrt(f16, 13.0);
const x = 14.0;
const y = x * x;

View File

@ -53,6 +53,7 @@ test "@IntType builtin" {
}
test "floating point primitive bit counts" {
assert(f16.bit_count == 16);
assert(f32.bit_count == 32);
assert(f64.bit_count == 64);
}