Merge remote-tracking branch 'origin/master' into copy-elision-3

2019-06-26 14:44:01 -04:00 · 2019-06-26 14:44:01 -04:00 · 3085d29af8
parent 5cd4753bea 07c0d484ee
commit 3085d29af8
21 changed files with 1481 additions and 513 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -389,6 +389,8 @@ set(EMBEDDED_SOFTFLOAT_SOURCES
    "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_subMagsF32.c"
    "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_subMagsF64.c"
    "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_tryPropagateNaNF128M.c"
+    "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_mulAdd.c"
+    "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_mulAdd.c"
    "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/softfloat_state.c"
    "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/ui32_to_f128M.c"
    "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/ui64_to_f128M.c"
@ -6653,15 +6655,18 @@ set(OPTIMIZED_C_FLAGS "-std=c99 -O3")
 set(EXE_LDFLAGS " ")
 if(MSVC)
    set(EXE_LDFLAGS "/STACK:16777216")
-elseif(ZIG_STATIC)
+elseif(MINGW) 
+    set(EXE_LDFLAGS "${EXE_LDFLAGS} -Wl,--stack,16777216")
+endif()
+
+if(ZIG_STATIC)
    if(APPLE)
        set(EXE_LDFLAGS "-static-libgcc -static-libstdc++")
    else()
        set(EXE_LDFLAGS "-static")
    endif()
-else()
-    set(EXE_LDFLAGS " ")
 endif()
+
 if(ZIG_TEST_COVERAGE)
    set(EXE_CFLAGS "${EXE_CFLAGS} -fprofile-arcs -ftest-coverage")
    set(EXE_LDFLAGS "${EXE_LDFLAGS} -fprofile-arcs -ftest-coverage")
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@ -6259,6 +6259,13 @@ comptime {
      This function is only valid within function scope.
      </p>

+      {#header_close#}
+      {#header_open|@mulAdd#}
+      <pre>{#syntax#}@mulAdd(comptime T: type, a: T, b: T, c: T) T{#endsyntax#}</pre>
+      <p>
+      Fused multiply add (for floats), similar to {#syntax#}(a * b) + c{#endsyntax#}, except
+      only rounds once, and is thus more accurate.
+      </p>
      {#header_close#}

      {#header_open|@byteSwap#}
@ -7347,10 +7354,91 @@ test "@setRuntimeSafety" {
      <pre>{#syntax#}@sqrt(comptime T: type, value: T) T{#endsyntax#}</pre>
      <p>
      Performs the square root of a floating point number. Uses a dedicated hardware instruction
-      when available. Currently only supports f32 and f64 at runtime. f128 at runtime is TODO.
+      when available. Supports f16, f32, f64, and f128, as well as vectors.
      </p>
+      {#header_close#}
+      {#header_open|@sin#}
+      <pre>{#syntax#}@sin(comptime T: type, value: T) T{#endsyntax#}</pre>
      <p>
-      This is a low-level intrinsic. Most code can use {#syntax#}std.math.sqrt{#endsyntax#} instead.
+      Sine trigometric function on a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@cos#}
+      <pre>{#syntax#}@cos(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Cosine trigometric function on a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@exp#}
+      <pre>{#syntax#}@exp(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Base-e exponential function on a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@exp2#}
+      <pre>{#syntax#}@exp2(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Base-2 exponential function on a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@ln#}
+      <pre>{#syntax#}@ln(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the natural logarithm of a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@log2#}
+      <pre>{#syntax#}@log2(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the logarithm to the base 2 of a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@log10#}
+      <pre>{#syntax#}@log10(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the logarithm to the base 10 of a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@fabs#}
+      <pre>{#syntax#}@fabs(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the absolute value of a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@floor#}
+      <pre>{#syntax#}@floor(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the largest integral value not greater than the given floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@ceil#}
+      <pre>{#syntax#}@ceil(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the largest integral value not less than the given floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@trunc#}
+      <pre>{#syntax#}@trunc(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Rounds the given floating point number to an integer, towards zero. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@round#}
+      <pre>{#syntax#}@round(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Rounds the given floating point number to an integer, away from zero. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
      </p>
      {#header_close#}

--- a/src-self-hosted/dep_tokenizer.zig
+++ b/src-self-hosted/dep_tokenizer.zig
@ -998,7 +998,7 @@ fn printCharValues(out: var, bytes: []const u8) !void {

 fn printUnderstandableChar(out: var, char: u8) !void {
    if (!std.ascii.isPrint(char) or char == ' ') {
-        std.fmt.format(out.context, anyerror, out.output, "\\x{X2}", char) catch {};
+        std.fmt.format(out.context, anyerror, out.output, "\\x{X:2}", char) catch {};
    } else {
        try out.write("'");
        try out.write([_]u8{printable_char_tab[char]});
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@ -1419,6 +1419,7 @@ enum BuiltinFnId {
    BuiltinFnIdSubWithOverflow,
    BuiltinFnIdMulWithOverflow,
    BuiltinFnIdShlWithOverflow,
+    BuiltinFnIdMulAdd,
    BuiltinFnIdCInclude,
    BuiltinFnIdCDefine,
    BuiltinFnIdCUndef,
@ -1446,6 +1447,19 @@ enum BuiltinFnId {
    BuiltinFnIdRem,
    BuiltinFnIdMod,
    BuiltinFnIdSqrt,
+    BuiltinFnIdSin,
+    BuiltinFnIdCos,
+    BuiltinFnIdExp,
+    BuiltinFnIdExp2,
+    BuiltinFnIdLn,
+    BuiltinFnIdLog2,
+    BuiltinFnIdLog10,
+    BuiltinFnIdFabs,
+    BuiltinFnIdFloor,
+    BuiltinFnIdCeil,
+    BuiltinFnIdTrunc,
+    BuiltinFnIdNearbyInt,
+    BuiltinFnIdRound,
    BuiltinFnIdTruncate,
    BuiltinFnIdIntCast,
    BuiltinFnIdFloatCast,
@ -1567,9 +1581,8 @@ enum ZigLLVMFnId {
    ZigLLVMFnIdClz,
    ZigLLVMFnIdPopCount,
    ZigLLVMFnIdOverflowArithmetic,
-    ZigLLVMFnIdFloor,
-    ZigLLVMFnIdCeil,
-    ZigLLVMFnIdSqrt,
+    ZigLLVMFnIdFMA,
+    ZigLLVMFnIdFloatOp,
    ZigLLVMFnIdBswap,
    ZigLLVMFnIdBitReverse,
 };
@ -1596,7 +1609,9 @@ struct ZigLLVMFnKey {
            uint32_t bit_count;
        } pop_count;
        struct {
+            BuiltinFnId op;
            uint32_t bit_count;
+            uint32_t vector_len; // 0 means not a vector
        } floating;
        struct {
            AddSubMul add_sub_mul;
@ -2260,6 +2275,8 @@ enum IrInstructionId {
    IrInstructionIdOverflowOp,
    IrInstructionIdTestErrSrc,
    IrInstructionIdTestErrGen,
+    IrInstructionIdMulAdd,
+    IrInstructionIdFloatOp,
    IrInstructionIdUnwrapErrCode,
    IrInstructionIdUnwrapErrPayload,
    IrInstructionIdErrWrapCode,
@ -2324,7 +2341,6 @@ enum IrInstructionId {
    IrInstructionIdAddImplicitReturnType,
    IrInstructionIdMergeErrRetTraces,
    IrInstructionIdMarkErrRetTracePtr,
-    IrInstructionIdSqrt,
    IrInstructionIdErrSetCast,
    IrInstructionIdToBytes,
    IrInstructionIdFromBytes,
@ -3080,6 +3096,15 @@ struct IrInstructionOverflowOp {
    ZigType *result_ptr_type;
 };

+struct IrInstructionMulAdd {
+    IrInstruction base;
+
+    IrInstruction *type_value;
+    IrInstruction *op1;
+    IrInstruction *op2;
+    IrInstruction *op3;
+};
+
 struct IrInstructionAlignOf {
    IrInstruction base;

@ -3512,11 +3537,13 @@ struct IrInstructionMarkErrRetTracePtr {
    IrInstruction *err_ret_trace_ptr;
 };

-struct IrInstructionSqrt {
+// For float ops which take a single argument
+struct IrInstructionFloatOp {
    IrInstruction base;

+    BuiltinFnId op;
    IrInstruction *type;
-    IrInstruction *op;
+    IrInstruction *op1;
 };

 struct IrInstructionCheckRuntimeScope {
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@ -5736,12 +5736,13 @@ uint32_t zig_llvm_fn_key_hash(ZigLLVMFnKey x) {
            return (uint32_t)(x.data.clz.bit_count) * (uint32_t)2428952817;
        case ZigLLVMFnIdPopCount:
            return (uint32_t)(x.data.clz.bit_count) * (uint32_t)101195049;
-        case ZigLLVMFnIdFloor:
-            return (uint32_t)(x.data.floating.bit_count) * (uint32_t)1899859168;
-        case ZigLLVMFnIdCeil:
-            return (uint32_t)(x.data.floating.bit_count) * (uint32_t)1953839089;
-        case ZigLLVMFnIdSqrt:
-            return (uint32_t)(x.data.floating.bit_count) * (uint32_t)2225366385;
+        case ZigLLVMFnIdFloatOp:
+            return (uint32_t)(x.data.floating.bit_count) * ((uint32_t)x.id + 1025) +
+                   (uint32_t)(x.data.floating.vector_len) * (((uint32_t)x.id << 5) + 1025) +
+                   (uint32_t)(x.data.floating.op) * (uint32_t)43789879;
+        case ZigLLVMFnIdFMA:
+            return (uint32_t)(x.data.floating.bit_count) * ((uint32_t)x.id + 1025) +
+                   (uint32_t)(x.data.floating.vector_len) * (((uint32_t)x.id << 5) + 1025);
        case ZigLLVMFnIdBswap:
            return (uint32_t)(x.data.bswap.bit_count) * (uint32_t)3661994335;
        case ZigLLVMFnIdBitReverse:
@ -5769,10 +5770,13 @@ bool zig_llvm_fn_key_eql(ZigLLVMFnKey a, ZigLLVMFnKey b) {
            return a.data.bswap.bit_count == b.data.bswap.bit_count;
        case ZigLLVMFnIdBitReverse:
            return a.data.bit_reverse.bit_count == b.data.bit_reverse.bit_count;
-        case ZigLLVMFnIdFloor:
-        case ZigLLVMFnIdCeil:
-        case ZigLLVMFnIdSqrt:
-            return a.data.floating.bit_count == b.data.floating.bit_count;
+        case ZigLLVMFnIdFloatOp:
+            return a.data.floating.bit_count == b.data.floating.bit_count &&
+                   a.data.floating.vector_len == b.data.floating.vector_len &&
+                   a.data.floating.op == b.data.floating.op;
+        case ZigLLVMFnIdFMA:
+            return a.data.floating.bit_count == b.data.floating.bit_count &&
+                   a.data.floating.vector_len == b.data.floating.vector_len;
        case ZigLLVMFnIdOverflowArithmetic:
            return (a.data.overflow_arithmetic.bit_count == b.data.overflow_arithmetic.bit_count) &&
                (a.data.overflow_arithmetic.add_sub_mul == b.data.overflow_arithmetic.add_sub_mul) &&
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@ -808,32 +808,47 @@ static LLVMValueRef get_int_overflow_fn(CodeGen *g, ZigType *operand_type, AddSu
    return fn_val;
 }

-static LLVMValueRef get_float_fn(CodeGen *g, ZigType *type_entry, ZigLLVMFnId fn_id) {
-    assert(type_entry->id == ZigTypeIdFloat);
+static LLVMValueRef get_float_fn(CodeGen *g, ZigType *type_entry, ZigLLVMFnId fn_id, BuiltinFnId op) {
+    assert(type_entry->id == ZigTypeIdFloat ||
+           type_entry->id == ZigTypeIdVector);
+
+    bool is_vector = (type_entry->id == ZigTypeIdVector);
+    ZigType *float_type = is_vector ? type_entry->data.vector.elem_type : type_entry;

    ZigLLVMFnKey key = {};
    key.id = fn_id;
-    key.data.floating.bit_count = (uint32_t)type_entry->data.floating.bit_count;
+    key.data.floating.bit_count = (uint32_t)float_type->data.floating.bit_count;
+    key.data.floating.vector_len = is_vector ? (uint32_t)type_entry->data.vector.len : 0;
+    key.data.floating.op = op;

    auto existing_entry = g->llvm_fn_table.maybe_get(key);
    if (existing_entry)
        return existing_entry->value;

    const char *name;
-    if (fn_id == ZigLLVMFnIdFloor) {
-        name = "floor";
-    } else if (fn_id == ZigLLVMFnIdCeil) {
-        name = "ceil";
-    } else if (fn_id == ZigLLVMFnIdSqrt) {
-        name = "sqrt";
+    uint32_t num_args;
+    if (fn_id == ZigLLVMFnIdFMA) {
+        name = "fma";
+        num_args = 3;
+    } else if (fn_id == ZigLLVMFnIdFloatOp) {
+        name = float_op_to_name(op, true);
+        num_args = 1;
    } else {
        zig_unreachable();
    }

    char fn_name[64];
-    sprintf(fn_name, "llvm.%s.f%" ZIG_PRI_usize "", name, type_entry->data.floating.bit_count);
+    if (is_vector)
+        sprintf(fn_name, "llvm.%s.v%" PRIu32 "f%" PRIu32, name, key.data.floating.vector_len, key.data.floating.bit_count);
+    else
+        sprintf(fn_name, "llvm.%s.f%" PRIu32, name, key.data.floating.bit_count);
    LLVMTypeRef float_type_ref = get_llvm_type(g, type_entry);
-    LLVMTypeRef fn_type = LLVMFunctionType(float_type_ref, &float_type_ref, 1, false);
+    LLVMTypeRef return_elem_types[3] = {
+        float_type_ref,
+        float_type_ref,
+        float_type_ref,
+    };
+    LLVMTypeRef fn_type = LLVMFunctionType(float_type_ref, return_elem_types, num_args, false);
    LLVMValueRef fn_val = LLVMAddFunction(g->module, fn_name, fn_type);
    assert(LLVMGetIntrinsicID(fn_val));

@ -2483,22 +2498,17 @@ static LLVMValueRef gen_overflow_shr_op(CodeGen *g, ZigType *type_entry,
    return result;
 }

-static LLVMValueRef gen_floor(CodeGen *g, LLVMValueRef val, ZigType *type_entry) {
-    if (type_entry->id == ZigTypeIdInt)
+static LLVMValueRef gen_float_op(CodeGen *g, LLVMValueRef val, ZigType *type_entry, BuiltinFnId op) {
+    if ((op == BuiltinFnIdCeil ||
+         op == BuiltinFnIdFloor) &&
+        type_entry->id == ZigTypeIdInt)
        return val;
+    assert(type_entry->id == ZigTypeIdFloat);

-    LLVMValueRef floor_fn = get_float_fn(g, type_entry, ZigLLVMFnIdFloor);
+    LLVMValueRef floor_fn = get_float_fn(g, type_entry, ZigLLVMFnIdFloatOp, op);
    return LLVMBuildCall(g->builder, floor_fn, &val, 1, "");
 }

-static LLVMValueRef gen_ceil(CodeGen *g, LLVMValueRef val, ZigType *type_entry) {
-    if (type_entry->id == ZigTypeIdInt)
-        return val;
-
-    LLVMValueRef ceil_fn = get_float_fn(g, type_entry, ZigLLVMFnIdCeil);
-    return LLVMBuildCall(g->builder, ceil_fn, &val, 1, "");
-}
-
 enum DivKind {
    DivKindFloat,
    DivKindTrunc,
@ -2574,7 +2584,7 @@ static LLVMValueRef gen_div(CodeGen *g, bool want_runtime_safety, bool want_fast
                return result;
            case DivKindExact:
                if (want_runtime_safety) {
-                    LLVMValueRef floored = gen_floor(g, result, type_entry);
+                    LLVMValueRef floored = gen_float_op(g, result, type_entry, BuiltinFnIdFloor);
                    LLVMBasicBlockRef ok_block = LLVMAppendBasicBlock(g->cur_fn_val, "DivExactOk");
                    LLVMBasicBlockRef fail_block = LLVMAppendBasicBlock(g->cur_fn_val, "DivExactFail");
                    LLVMValueRef ok_bit = LLVMBuildFCmp(g->builder, LLVMRealOEQ, floored, result, "");
@ -2596,12 +2606,12 @@ static LLVMValueRef gen_div(CodeGen *g, bool want_runtime_safety, bool want_fast
                    LLVMBuildCondBr(g->builder, ltz, ltz_block, gez_block);

                    LLVMPositionBuilderAtEnd(g->builder, ltz_block);
-                    LLVMValueRef ceiled = gen_ceil(g, result, type_entry);
+                    LLVMValueRef ceiled = gen_float_op(g, result, type_entry, BuiltinFnIdCeil);
                    LLVMBasicBlockRef ceiled_end_block = LLVMGetInsertBlock(g->builder);
                    LLVMBuildBr(g->builder, end_block);

                    LLVMPositionBuilderAtEnd(g->builder, gez_block);
-                    LLVMValueRef floored = gen_floor(g, result, type_entry);
+                    LLVMValueRef floored = gen_float_op(g, result, type_entry, BuiltinFnIdFloor);
                    LLVMBasicBlockRef floored_end_block = LLVMGetInsertBlock(g->builder);
                    LLVMBuildBr(g->builder, end_block);

@ -2613,7 +2623,7 @@ static LLVMValueRef gen_div(CodeGen *g, bool want_runtime_safety, bool want_fast
                    return phi;
                }
            case DivKindFloor:
-                return gen_floor(g, result, type_entry);
+                return gen_float_op(g, result, type_entry, BuiltinFnIdFloor);
        }
        zig_unreachable();
    }
@ -5417,13 +5427,28 @@ static LLVMValueRef ir_render_mark_err_ret_trace_ptr(CodeGen *g, IrExecutable *e
    return nullptr;
 }

-static LLVMValueRef ir_render_sqrt(CodeGen *g, IrExecutable *executable, IrInstructionSqrt *instruction) {
-    LLVMValueRef op = ir_llvm_value(g, instruction->op);
+static LLVMValueRef ir_render_float_op(CodeGen *g, IrExecutable *executable, IrInstructionFloatOp *instruction) {
+    LLVMValueRef op = ir_llvm_value(g, instruction->op1);
    assert(instruction->base.value.type->id == ZigTypeIdFloat);
-    LLVMValueRef fn_val = get_float_fn(g, instruction->base.value.type, ZigLLVMFnIdSqrt);
+    LLVMValueRef fn_val = get_float_fn(g, instruction->base.value.type, ZigLLVMFnIdFloatOp, instruction->op);
    return LLVMBuildCall(g->builder, fn_val, &op, 1, "");
 }

+static LLVMValueRef ir_render_mul_add(CodeGen *g, IrExecutable *executable, IrInstructionMulAdd *instruction) {
+    LLVMValueRef op1 = ir_llvm_value(g, instruction->op1);
+    LLVMValueRef op2 = ir_llvm_value(g, instruction->op2);
+    LLVMValueRef op3 = ir_llvm_value(g, instruction->op3);
+    assert(instruction->base.value.type->id == ZigTypeIdFloat ||
+           instruction->base.value.type->id == ZigTypeIdVector);
+    LLVMValueRef fn_val = get_float_fn(g, instruction->base.value.type, ZigLLVMFnIdFMA, BuiltinFnIdMulAdd);
+    LLVMValueRef args[3] = {
+        op1,
+        op2,
+        op3,
+    };
+    return LLVMBuildCall(g->builder, fn_val, args, 3, "");
+}
+
 static LLVMValueRef ir_render_bswap(CodeGen *g, IrExecutable *executable, IrInstructionBswap *instruction) {
    LLVMValueRef op = ir_llvm_value(g, instruction->op);
    ZigType *int_type = instruction->base.value.type;
@ -5770,8 +5795,10 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
            return ir_render_merge_err_ret_traces(g, executable, (IrInstructionMergeErrRetTraces *)instruction);
        case IrInstructionIdMarkErrRetTracePtr:
            return ir_render_mark_err_ret_trace_ptr(g, executable, (IrInstructionMarkErrRetTracePtr *)instruction);
-        case IrInstructionIdSqrt:
-            return ir_render_sqrt(g, executable, (IrInstructionSqrt *)instruction);
+        case IrInstructionIdFloatOp:
+            return ir_render_float_op(g, executable, (IrInstructionFloatOp *)instruction);
+        case IrInstructionIdMulAdd:
+            return ir_render_mul_add(g, executable, (IrInstructionMulAdd *)instruction);
        case IrInstructionIdArrayToVector:
            return ir_render_array_to_vector(g, executable, (IrInstructionArrayToVector *)instruction);
        case IrInstructionIdVectorToArray:
@ -7356,6 +7383,21 @@ static void define_builtin_fns(CodeGen *g) {
    create_builtin_fn(g, BuiltinFnIdRem, "rem", 2);
    create_builtin_fn(g, BuiltinFnIdMod, "mod", 2);
    create_builtin_fn(g, BuiltinFnIdSqrt, "sqrt", 2);
+    create_builtin_fn(g, BuiltinFnIdSin, "sin", 2);
+    create_builtin_fn(g, BuiltinFnIdCos, "cos", 2);
+    create_builtin_fn(g, BuiltinFnIdExp, "exp", 2);
+    create_builtin_fn(g, BuiltinFnIdExp2, "exp2", 2);
+    create_builtin_fn(g, BuiltinFnIdLn, "ln", 2);
+    create_builtin_fn(g, BuiltinFnIdLog2, "log2", 2);
+    create_builtin_fn(g, BuiltinFnIdLog10, "log10", 2);
+    create_builtin_fn(g, BuiltinFnIdFabs, "fabs", 2);
+    create_builtin_fn(g, BuiltinFnIdFloor, "floor", 2);
+    create_builtin_fn(g, BuiltinFnIdCeil, "ceil", 2);
+    create_builtin_fn(g, BuiltinFnIdTrunc, "trunc", 2);
+    //Needs library support on Windows
+    //create_builtin_fn(g, BuiltinFnIdNearbyInt, "nearbyInt", 2);
+    create_builtin_fn(g, BuiltinFnIdRound, "round", 2);
+    create_builtin_fn(g, BuiltinFnIdMulAdd, "mulAdd", 4);
    create_builtin_fn(g, BuiltinFnIdInlineCall, "inlineCall", SIZE_MAX);
    create_builtin_fn(g, BuiltinFnIdNoInlineCall, "noInlineCall", SIZE_MAX);
    create_builtin_fn(g, BuiltinFnIdNewStackCall, "newStackCall", SIZE_MAX);
--- a/src/ir.cpp
+++ b/src/ir.cpp
@ -777,6 +777,10 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionTestErrGen *) {
    return IrInstructionIdTestErrGen;
 }

+static constexpr IrInstructionId ir_instruction_id(IrInstructionMulAdd *) {
+  return IrInstructionIdMulAdd;
+}
+
 static constexpr IrInstructionId ir_instruction_id(IrInstructionUnwrapErrCode *) {
    return IrInstructionIdUnwrapErrCode;
 }
@ -1037,8 +1041,8 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionMarkErrRetTraceP
    return IrInstructionIdMarkErrRetTracePtr;
 }

-static constexpr IrInstructionId ir_instruction_id(IrInstructionSqrt *) {
-    return IrInstructionIdSqrt;
+static constexpr IrInstructionId ir_instruction_id(IrInstructionFloatOp *) {
+    return IrInstructionIdFloatOp;
 }

 static constexpr IrInstructionId ir_instruction_id(IrInstructionCheckRuntimeScope *) {
@ -2437,6 +2441,75 @@ static IrInstruction *ir_build_overflow_op(IrBuilder *irb, Scope *scope, AstNode
    return &instruction->base;
 }

+
+//TODO Powi, Pow, minnum, maxnum, maximum, minimum, copysign,
+// lround, llround, lrint, llrint
+// So far this is only non-complicated type functions.
+const char *float_op_to_name(BuiltinFnId op, bool llvm_name) {
+    const bool b = llvm_name;
+
+    switch (op) {
+    case BuiltinFnIdSqrt:
+        return "sqrt";
+    case BuiltinFnIdSin:
+        return "sin";
+    case BuiltinFnIdCos:
+        return "cos";
+    case BuiltinFnIdExp:
+        return "exp";
+    case BuiltinFnIdExp2:
+        return "exp2";
+    case BuiltinFnIdLn:
+        return b ? "log" : "ln";
+    case BuiltinFnIdLog10:
+        return "log10";
+    case BuiltinFnIdLog2:
+        return "log2";
+    case BuiltinFnIdFabs:
+        return "fabs";
+    case BuiltinFnIdFloor:
+        return "floor";
+    case BuiltinFnIdCeil:
+        return "ceil";
+    case BuiltinFnIdTrunc:
+        return "trunc";
+    case BuiltinFnIdNearbyInt:
+        return b ? "nearbyint" : "nearbyInt";
+    case BuiltinFnIdRound:
+        return "round";
+    default:
+        zig_unreachable();
+    }
+}
+
+static IrInstruction *ir_build_float_op(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *type, IrInstruction *op1, BuiltinFnId op) {
+    IrInstructionFloatOp *instruction = ir_build_instruction<IrInstructionFloatOp>(irb, scope, source_node);
+    instruction->type = type;
+    instruction->op1 = op1;
+    instruction->op = op;
+
+    if (type != nullptr) ir_ref_instruction(type, irb->current_basic_block);
+    ir_ref_instruction(op1, irb->current_basic_block);
+
+    return &instruction->base;
+}
+
+static IrInstruction *ir_build_mul_add(IrBuilder *irb, Scope *scope, AstNode *source_node,
+        IrInstruction *type_value, IrInstruction *op1, IrInstruction *op2, IrInstruction *op3) {
+    IrInstructionMulAdd *instruction = ir_build_instruction<IrInstructionMulAdd>(irb, scope, source_node);
+    instruction->type_value = type_value;
+    instruction->op1 = op1;
+    instruction->op2 = op2;
+    instruction->op3 = op3;
+
+    ir_ref_instruction(type_value, irb->current_basic_block);
+    ir_ref_instruction(op1, irb->current_basic_block);
+    ir_ref_instruction(op2, irb->current_basic_block);
+    ir_ref_instruction(op3, irb->current_basic_block);
+
+    return &instruction->base;
+}
+
 static IrInstruction *ir_build_align_of(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *type_value) {
    IrInstructionAlignOf *instruction = ir_build_instruction<IrInstructionAlignOf>(irb, scope, source_node);
    instruction->type_value = type_value;
@ -3201,17 +3274,6 @@ static IrInstruction *ir_build_mark_err_ret_trace_ptr(IrBuilder *irb, Scope *sco
    return &instruction->base;
 }

-static IrInstruction *ir_build_sqrt(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *type, IrInstruction *op) {
-    IrInstructionSqrt *instruction = ir_build_instruction<IrInstructionSqrt>(irb, scope, source_node);
-    instruction->type = type;
-    instruction->op = op;
-
-    if (type != nullptr) ir_ref_instruction(type, irb->current_basic_block);
-    ir_ref_instruction(op, irb->current_basic_block);
-
-    return &instruction->base;
-}
-
 static IrInstruction *ir_build_has_decl(IrBuilder *irb, Scope *scope, AstNode *source_node,
        IrInstruction *container, IrInstruction *name)
 {
@ -4380,6 +4442,33 @@ static IrInstruction *ir_gen_overflow_op(IrBuilder *irb, Scope *scope, AstNode *
    return ir_build_overflow_op(irb, scope, node, op, type_value, op1, op2, result_ptr, nullptr);
 }

+static IrInstruction *ir_gen_mul_add(IrBuilder *irb, Scope *scope, AstNode *node) {
+    assert(node->type == NodeTypeFnCallExpr);
+
+    AstNode *type_node = node->data.fn_call_expr.params.at(0);
+    AstNode *op1_node = node->data.fn_call_expr.params.at(1);
+    AstNode *op2_node = node->data.fn_call_expr.params.at(2);
+    AstNode *op3_node = node->data.fn_call_expr.params.at(3);
+
+    IrInstruction *type_value = ir_gen_node(irb, type_node, scope);
+    if (type_value == irb->codegen->invalid_instruction)
+        return irb->codegen->invalid_instruction;
+
+    IrInstruction *op1 = ir_gen_node(irb, op1_node, scope);
+    if (op1 == irb->codegen->invalid_instruction)
+        return irb->codegen->invalid_instruction;
+
+    IrInstruction *op2 = ir_gen_node(irb, op2_node, scope);
+    if (op2 == irb->codegen->invalid_instruction)
+        return irb->codegen->invalid_instruction;
+
+    IrInstruction *op3 = ir_gen_node(irb, op3_node, scope);
+    if (op3 == irb->codegen->invalid_instruction)
+        return irb->codegen->invalid_instruction;
+
+    return ir_build_mul_add(irb, scope, node, type_value, op1, op2, op3);
+}
+
 static IrInstruction *ir_gen_this(IrBuilder *irb, Scope *orig_scope, AstNode *node) {
    for (Scope *it_scope = orig_scope; it_scope != nullptr; it_scope = it_scope->parent) {
        if (it_scope->id == ScopeIdDecls) {
@ -4708,6 +4797,19 @@ static IrInstruction *ir_gen_builtin_fn_call(IrBuilder *irb, Scope *scope, AstNo
                return ir_lval_wrap(irb, scope, bin_op, lval, result_loc);
            }
        case BuiltinFnIdSqrt:
+        case BuiltinFnIdSin:
+        case BuiltinFnIdCos:
+        case BuiltinFnIdExp:
+        case BuiltinFnIdExp2:
+        case BuiltinFnIdLn:
+        case BuiltinFnIdLog2:
+        case BuiltinFnIdLog10:
+        case BuiltinFnIdFabs:
+        case BuiltinFnIdFloor:
+        case BuiltinFnIdCeil:
+        case BuiltinFnIdTrunc:
+        case BuiltinFnIdNearbyInt:
+        case BuiltinFnIdRound:
            {
                AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
                IrInstruction *arg0_value = ir_gen_node(irb, arg0_node, scope);
@ -4719,7 +4821,7 @@ static IrInstruction *ir_gen_builtin_fn_call(IrBuilder *irb, Scope *scope, AstNo
                if (arg1_value == irb->codegen->invalid_instruction)
                    return arg1_value;

-                IrInstruction *ir_sqrt = ir_build_sqrt(irb, scope, node, arg0_value, arg1_value);
+                IrInstruction *ir_sqrt = ir_build_float_op(irb, scope, node, arg0_value, arg1_value, builtin_fn->id);
                return ir_lval_wrap(irb, scope, ir_sqrt, lval, result_loc);
            }
        case BuiltinFnIdTruncate:
@ -5043,6 +5145,8 @@ static IrInstruction *ir_gen_builtin_fn_call(IrBuilder *irb, Scope *scope, AstNo
            return ir_lval_wrap(irb, scope, ir_gen_overflow_op(irb, scope, node, IrOverflowOpMul), lval, result_loc);
        case BuiltinFnIdShlWithOverflow:
            return ir_lval_wrap(irb, scope, ir_gen_overflow_op(irb, scope, node, IrOverflowOpShl), lval, result_loc);
+        case BuiltinFnIdMulAdd:
+            return ir_lval_wrap(irb, scope, ir_gen_mul_add(irb, scope, node), lval, result_loc);
        case BuiltinFnIdTypeName:
            {
                AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
@ -22709,6 +22813,125 @@ static IrInstruction *ir_analyze_instruction_result_ptr(IrAnalyze *ira, IrInstru
    return ir_get_ref(ira, &instruction->base, result, true, false);
 }

+static void ir_eval_mul_add(IrAnalyze *ira, IrInstructionMulAdd *source_instr, ZigType *float_type,
+    ConstExprValue *op1, ConstExprValue *op2, ConstExprValue *op3, ConstExprValue *out_val) {
+    if (float_type->id == ZigTypeIdComptimeFloat) {
+        f128M_mulAdd(&out_val->data.x_bigfloat.value, &op1->data.x_bigfloat.value, &op2->data.x_bigfloat.value,
+            &op3->data.x_bigfloat.value);
+    } else if (float_type->id == ZigTypeIdFloat) {
+        switch (float_type->data.floating.bit_count) {
+            case 16:
+                out_val->data.x_f16 = f16_mulAdd(op1->data.x_f16, op2->data.x_f16, op3->data.x_f16);
+                break;
+            case 32:
+                out_val->data.x_f32 = fmaf(op1->data.x_f32, op2->data.x_f32, op3->data.x_f32);
+                break;
+            case 64:
+                out_val->data.x_f64 = fma(op1->data.x_f64, op2->data.x_f64, op3->data.x_f64);
+                break;
+            case 128:
+                f128M_mulAdd(&op1->data.x_f128, &op2->data.x_f128, &op3->data.x_f128, &out_val->data.x_f128);
+                break;
+            default:
+                zig_unreachable();
+        }
+    } else {
+        zig_unreachable();
+    }
+}
+
+static IrInstruction *ir_analyze_instruction_mul_add(IrAnalyze *ira, IrInstructionMulAdd *instruction) {
+    IrInstruction *type_value = instruction->type_value->child;
+    if (type_is_invalid(type_value->value.type))
+        return ira->codegen->invalid_instruction;
+    
+    ZigType *expr_type = ir_resolve_type(ira, type_value);
+    if (type_is_invalid(expr_type))
+        return ira->codegen->invalid_instruction;
+    
+    // Only allow float types, and vectors of floats.
+    ZigType *float_type = (expr_type->id == ZigTypeIdVector) ? expr_type->data.vector.elem_type : expr_type;
+    if (float_type->id != ZigTypeIdFloat) {
+        ir_add_error(ira, type_value,
+            buf_sprintf("expected float or vector of float type, found '%s'", buf_ptr(&float_type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+
+    IrInstruction *op1 = instruction->op1->child;
+    if (type_is_invalid(op1->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *casted_op1 = ir_implicit_cast(ira, op1, expr_type);
+    if (type_is_invalid(casted_op1->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *op2 = instruction->op2->child;
+    if (type_is_invalid(op2->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *casted_op2 = ir_implicit_cast(ira, op2, expr_type);
+    if (type_is_invalid(casted_op2->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *op3 = instruction->op3->child;
+    if (type_is_invalid(op3->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *casted_op3 = ir_implicit_cast(ira, op3, expr_type);
+    if (type_is_invalid(casted_op3->value.type))
+        return ira->codegen->invalid_instruction;
+
+    if (instr_is_comptime(casted_op1) &&
+        instr_is_comptime(casted_op2) &&
+        instr_is_comptime(casted_op3)) {
+        ConstExprValue *op1_const = ir_resolve_const(ira, casted_op1, UndefBad);
+        if (!op1_const)
+            return ira->codegen->invalid_instruction;
+        ConstExprValue *op2_const = ir_resolve_const(ira, casted_op2, UndefBad);
+        if (!op2_const)
+            return ira->codegen->invalid_instruction;
+        ConstExprValue *op3_const = ir_resolve_const(ira, casted_op3, UndefBad);
+        if (!op3_const)
+            return ira->codegen->invalid_instruction;
+
+        IrInstruction *result = ir_const(ira, &instruction->base, expr_type);
+        ConstExprValue *out_val = &result->value;
+
+        if (expr_type->id == ZigTypeIdVector) {
+            expand_undef_array(ira->codegen, op1_const);
+            expand_undef_array(ira->codegen, op2_const);
+            expand_undef_array(ira->codegen, op3_const);
+            out_val->special = ConstValSpecialUndef;
+            expand_undef_array(ira->codegen, out_val);
+            size_t len = expr_type->data.vector.len;
+            for (size_t i = 0; i < len; i += 1) {
+                ConstExprValue *float_operand_op1 = &op1_const->data.x_array.data.s_none.elements[i];
+                ConstExprValue *float_operand_op2 = &op2_const->data.x_array.data.s_none.elements[i];
+                ConstExprValue *float_operand_op3 = &op3_const->data.x_array.data.s_none.elements[i];
+                ConstExprValue *float_out_val = &out_val->data.x_array.data.s_none.elements[i];
+                assert(float_operand_op1->type == float_type);
+                assert(float_operand_op2->type == float_type);
+                assert(float_operand_op3->type == float_type);
+                assert(float_out_val->type == float_type);
+                ir_eval_mul_add(ira, instruction, float_type,
+                        op1_const, op2_const, op3_const, float_out_val);
+                float_out_val->type = float_type;
+            }
+            out_val->type = expr_type;
+            out_val->special = ConstValSpecialStatic;
+        } else {
+            ir_eval_mul_add(ira, instruction, float_type, op1_const, op2_const, op3_const, out_val);
+        }
+        return result;
+    }
+
+    IrInstruction *result = ir_build_mul_add(&ira->new_irb,
+            instruction->base.scope, instruction->base.source_node,
+            type_value, casted_op1, casted_op2, casted_op3);
+    result->value.type = expr_type;
+    return result;
+}
+
 static IrInstruction *ir_analyze_instruction_test_err(IrAnalyze *ira, IrInstructionTestErrSrc *instruction) {
    IrInstruction *base_ptr = instruction->base_ptr->child;
    if (type_is_invalid(base_ptr->value.type))
@ -24542,70 +24765,254 @@ static IrInstruction *ir_analyze_instruction_mark_err_ret_trace_ptr(IrAnalyze *i
    return result;
 }

-static IrInstruction *ir_analyze_instruction_sqrt(IrAnalyze *ira, IrInstructionSqrt *instruction) {
-    ZigType *float_type = ir_resolve_type(ira, instruction->type->child);
-    if (type_is_invalid(float_type))
+static void ir_eval_float_op(IrAnalyze *ira, IrInstructionFloatOp *source_instr, ZigType *float_type,
+    ConstExprValue *op, ConstExprValue *out_val) {
+    assert(ira && source_instr && float_type && out_val && op);
+    assert(float_type->id == ZigTypeIdFloat ||
+           float_type->id == ZigTypeIdComptimeFloat);
+
+    BuiltinFnId fop = source_instr->op;
+    unsigned bits;
+
+    switch (float_type->id) {
+    case ZigTypeIdComptimeFloat:
+        bits = 128;
+        break;
+    case ZigTypeIdFloat:
+        bits = float_type->data.floating.bit_count;
+        break;
+    default:
+        zig_unreachable();
+    }
+
+    switch (bits) {
+    case 16: {
+        switch (fop) {
+        case BuiltinFnIdSqrt:
+            out_val->data.x_f16 = f16_sqrt(op->data.x_f16);
+            break;
+        case BuiltinFnIdSin:
+        case BuiltinFnIdCos:
+        case BuiltinFnIdExp:
+        case BuiltinFnIdExp2:
+        case BuiltinFnIdLn:
+        case BuiltinFnIdLog10:
+        case BuiltinFnIdLog2:
+        case BuiltinFnIdFabs:
+        case BuiltinFnIdFloor:
+        case BuiltinFnIdCeil:
+        case BuiltinFnIdTrunc:
+        case BuiltinFnIdNearbyInt:
+        case BuiltinFnIdRound:
+            zig_panic("unimplemented f16 builtin");
+        default:
+            zig_unreachable();
+        };
+        break;
+    };
+    case 32: {
+        switch (fop) {
+        case BuiltinFnIdSqrt:
+            out_val->data.x_f32 = sqrtf(op->data.x_f32);
+            break;
+        case BuiltinFnIdSin:
+            out_val->data.x_f32 = sinf(op->data.x_f32);
+            break;
+        case BuiltinFnIdCos:
+            out_val->data.x_f32 = cosf(op->data.x_f32);
+            break;
+        case BuiltinFnIdExp:
+            out_val->data.x_f32 = expf(op->data.x_f32);
+            break;
+        case BuiltinFnIdExp2:
+            out_val->data.x_f32 = exp2f(op->data.x_f32);
+            break;
+        case BuiltinFnIdLn:
+            out_val->data.x_f32 = logf(op->data.x_f32);
+            break;
+        case BuiltinFnIdLog10:
+            out_val->data.x_f32 = log10f(op->data.x_f32);
+            break;
+        case BuiltinFnIdLog2:
+            out_val->data.x_f32 = log2f(op->data.x_f32);
+            break;
+        case BuiltinFnIdFabs:
+            out_val->data.x_f32 = fabsf(op->data.x_f32);
+            break;
+        case BuiltinFnIdFloor:
+            out_val->data.x_f32 = floorf(op->data.x_f32);
+            break;
+        case BuiltinFnIdCeil:
+            out_val->data.x_f32 = ceilf(op->data.x_f32);
+            break;
+        case BuiltinFnIdTrunc:
+            out_val->data.x_f32 = truncf(op->data.x_f32);
+            break;
+        case BuiltinFnIdNearbyInt:
+            out_val->data.x_f32 = nearbyintf(op->data.x_f32);
+            break;
+        case BuiltinFnIdRound:
+            out_val->data.x_f32 = roundf(op->data.x_f32);
+            break;
+        default:
+            zig_unreachable();
+        };
+        break;
+    };
+    case 64: {
+        switch (fop) {
+        case BuiltinFnIdSqrt:
+            out_val->data.x_f64 = sqrt(op->data.x_f64);
+            break;
+        case BuiltinFnIdSin:
+            out_val->data.x_f64 = sin(op->data.x_f64);
+            break;
+        case BuiltinFnIdCos:
+            out_val->data.x_f64 = cos(op->data.x_f64);
+            break;
+        case BuiltinFnIdExp:
+            out_val->data.x_f64 = exp(op->data.x_f64);
+            break;
+        case BuiltinFnIdExp2:
+            out_val->data.x_f64 = exp2(op->data.x_f64);
+            break;
+        case BuiltinFnIdLn:
+            out_val->data.x_f64 = log(op->data.x_f64);
+            break;
+        case BuiltinFnIdLog10:
+            out_val->data.x_f64 = log10(op->data.x_f64);
+            break;
+        case BuiltinFnIdLog2:
+            out_val->data.x_f64 = log2(op->data.x_f64);
+            break;
+        case BuiltinFnIdFabs:
+            out_val->data.x_f64 = fabs(op->data.x_f64);
+            break;
+        case BuiltinFnIdFloor:
+            out_val->data.x_f64 = floor(op->data.x_f64);
+            break;
+        case BuiltinFnIdCeil:
+            out_val->data.x_f64 = ceil(op->data.x_f64);
+            break;
+        case BuiltinFnIdTrunc:
+            out_val->data.x_f64 = trunc(op->data.x_f64);
+            break;
+        case BuiltinFnIdNearbyInt:
+            out_val->data.x_f64 = nearbyint(op->data.x_f64);
+            break;
+        case BuiltinFnIdRound:
+            out_val->data.x_f64 = round(op->data.x_f64);
+            break;
+        default:
+            zig_unreachable();
+        }
+        break;
+    };
+    case 128: {
+        float128_t *out, *in;
+        if (float_type->id == ZigTypeIdComptimeFloat) {
+            out = &out_val->data.x_bigfloat.value;
+            in = &op->data.x_bigfloat.value;
+        } else {
+            out = &out_val->data.x_f128;
+            in = &op->data.x_f128;
+        }
+        switch (fop) {
+        case BuiltinFnIdSqrt:
+            f128M_sqrt(in, out);
+            break;
+        case BuiltinFnIdNearbyInt:
+        case BuiltinFnIdSin:
+        case BuiltinFnIdCos:
+        case BuiltinFnIdExp:
+        case BuiltinFnIdExp2:
+        case BuiltinFnIdLn:
+        case BuiltinFnIdLog10:
+        case BuiltinFnIdLog2:
+        case BuiltinFnIdFabs:
+        case BuiltinFnIdFloor:
+        case BuiltinFnIdCeil:
+        case BuiltinFnIdTrunc:
+        case BuiltinFnIdRound:
+            zig_panic("unimplemented f128 builtin");
+        default:
+            zig_unreachable();
+        }
+        break;
+    };
+    default:
+        zig_unreachable();
+    }
+}
+
+static IrInstruction *ir_analyze_instruction_float_op(IrAnalyze *ira, IrInstructionFloatOp *instruction) {
+    IrInstruction *type = instruction->type->child;
+    if (type_is_invalid(type->value.type))
+        return ira->codegen->invalid_instruction;
+    
+    ZigType *expr_type = ir_resolve_type(ira, type);
+    if (type_is_invalid(expr_type))
        return ira->codegen->invalid_instruction;

-    IrInstruction *op = instruction->op->child;
-    if (type_is_invalid(op->value.type))
-        return ira->codegen->invalid_instruction;
-
-    bool ok_type = float_type->id == ZigTypeIdComptimeFloat || float_type->id == ZigTypeIdFloat;
-    if (!ok_type) {
-        ir_add_error(ira, instruction->type, buf_sprintf("@sqrt does not support type '%s'", buf_ptr(&float_type->name)));
+    // Only allow float types, and vectors of floats.
+    ZigType *float_type = (expr_type->id == ZigTypeIdVector) ? expr_type->data.vector.elem_type : expr_type;
+    if (float_type->id != ZigTypeIdFloat && float_type->id != ZigTypeIdComptimeFloat) {
+        ir_add_error(ira, instruction->type, buf_sprintf("@%s does not support type '%s'", float_op_to_name(instruction->op, false), buf_ptr(&float_type->name)));
        return ira->codegen->invalid_instruction;
    }

-    IrInstruction *casted_op = ir_implicit_cast(ira, op, float_type);
-    if (type_is_invalid(casted_op->value.type))
+    IrInstruction *op1 = instruction->op1->child;
+    if (type_is_invalid(op1->value.type))
        return ira->codegen->invalid_instruction;

-    if (instr_is_comptime(casted_op)) {
-        ConstExprValue *val = ir_resolve_const(ira, casted_op, UndefBad);
-        if (!val)
+    IrInstruction *casted_op1 = ir_implicit_cast(ira, op1, float_type);
+    if (type_is_invalid(casted_op1->value.type))
+        return ira->codegen->invalid_instruction;
+
+    if (instr_is_comptime(casted_op1)) {
+        // Our comptime 16-bit and 128-bit support is quite limited.
+        if ((float_type->id == ZigTypeIdComptimeFloat ||
+            float_type->data.floating.bit_count == 16 ||
+            float_type->data.floating.bit_count == 128) &&
+            instruction->op != BuiltinFnIdSqrt) {
+            ir_add_error(ira, instruction->type, buf_sprintf("@%s does not support type '%s'", float_op_to_name(instruction->op, false), buf_ptr(&float_type->name)));
            return ira->codegen->invalid_instruction;
-
-        IrInstruction *result = ir_const(ira, &instruction->base, float_type);
-        ConstExprValue *out_val = &result->value;
-
-        if (float_type->id == ZigTypeIdComptimeFloat) {
-            bigfloat_sqrt(&out_val->data.x_bigfloat, &val->data.x_bigfloat);
-        } else if (float_type->id == ZigTypeIdFloat) {
-            switch (float_type->data.floating.bit_count) {
-                case 16:
-                    out_val->data.x_f16 = f16_sqrt(val->data.x_f16);
-                    break;
-                case 32:
-                    out_val->data.x_f32 = sqrtf(val->data.x_f32);
-                    break;
-                case 64:
-                    out_val->data.x_f64 = sqrt(val->data.x_f64);
-                    break;
-                case 128:
-                    f128M_sqrt(&val->data.x_f128, &out_val->data.x_f128);
-                    break;
-                default:
-                    zig_unreachable();
-            }
-        } else {
-            zig_unreachable();
        }

+        ConstExprValue *op1_const = ir_resolve_const(ira, casted_op1, UndefBad);
+        if (!op1_const)
+            return ira->codegen->invalid_instruction;
+
+        IrInstruction *result = ir_const(ira, &instruction->base, expr_type);
+        ConstExprValue *out_val = &result->value;
+
+        if (expr_type->id == ZigTypeIdVector) {
+            expand_undef_array(ira->codegen, op1_const);
+            out_val->special = ConstValSpecialUndef;
+            expand_undef_array(ira->codegen, out_val);
+            size_t len = expr_type->data.vector.len;
+            for (size_t i = 0; i < len; i += 1) {
+                ConstExprValue *float_operand_op1 = &op1_const->data.x_array.data.s_none.elements[i];
+                ConstExprValue *float_out_val = &out_val->data.x_array.data.s_none.elements[i];
+                assert(float_operand_op1->type == float_type);
+                assert(float_out_val->type == float_type);
+                ir_eval_float_op(ira, instruction, float_type,
+                        op1_const, float_out_val);
+                float_out_val->type = float_type;
+            }
+            out_val->type = expr_type;
+            out_val->special = ConstValSpecialStatic;
+        } else {
+            ir_eval_float_op(ira, instruction, float_type, op1_const, out_val);
+        }
        return result;
    }

    ir_assert(float_type->id == ZigTypeIdFloat, &instruction->base);
-    if (float_type->data.floating.bit_count != 16 &&
-        float_type->data.floating.bit_count != 32 &&
-        float_type->data.floating.bit_count != 64) {
-        ir_add_error(ira, instruction->type, buf_sprintf("compiler TODO: add implementation of sqrt for '%s'", buf_ptr(&float_type->name)));
-        return ira->codegen->invalid_instruction;
-    }

-    IrInstruction *result = ir_build_sqrt(&ira->new_irb, instruction->base.scope,
-            instruction->base.source_node, nullptr, casted_op);
-    result->value.type = float_type;
+    IrInstruction *result = ir_build_float_op(&ira->new_irb, instruction->base.scope,
+            instruction->base.source_node, nullptr, casted_op1, instruction->op);
+    result->value.type = expr_type;
    return result;
 }

@ -25143,8 +25550,10 @@ static IrInstruction *ir_analyze_instruction_base(IrAnalyze *ira, IrInstruction
            return ir_analyze_instruction_merge_err_ret_traces(ira, (IrInstructionMergeErrRetTraces *)instruction);
        case IrInstructionIdMarkErrRetTracePtr:
            return ir_analyze_instruction_mark_err_ret_trace_ptr(ira, (IrInstructionMarkErrRetTracePtr *)instruction);
-        case IrInstructionIdSqrt:
-            return ir_analyze_instruction_sqrt(ira, (IrInstructionSqrt *)instruction);
+        case IrInstructionIdFloatOp:
+            return ir_analyze_instruction_float_op(ira, (IrInstructionFloatOp *)instruction);
+        case IrInstructionIdMulAdd:
+            return ir_analyze_instruction_mul_add(ira, (IrInstructionMulAdd *)instruction);
        case IrInstructionIdIntToErr:
            return ir_analyze_instruction_int_to_err(ira, (IrInstructionIntToErr *)instruction);
        case IrInstructionIdErrToInt:
@ -25391,7 +25800,8 @@ bool ir_has_side_effects(IrInstruction *instruction) {
        case IrInstructionIdCoroFree:
        case IrInstructionIdCoroPromise:
        case IrInstructionIdPromiseResultType:
-        case IrInstructionIdSqrt:
+        case IrInstructionIdFloatOp:
+        case IrInstructionIdMulAdd:
        case IrInstructionIdAtomicLoad:
        case IrInstructionIdIntCast:
        case IrInstructionIdFloatCast:
--- a/src/ir.hpp
+++ b/src/ir.hpp
@ -26,5 +26,6 @@ bool ir_has_side_effects(IrInstruction *instruction);
 struct IrAnalyze;
 ConstExprValue *const_ptr_pointee(IrAnalyze *ira, CodeGen *codegen, ConstExprValue *const_val,
        AstNode *source_node);
+const char *float_op_to_name(BuiltinFnId op, bool llvm_name);

 #endif
--- a/src/ir_print.cpp
+++ b/src/ir_print.cpp
@ -1563,15 +1563,32 @@ static void ir_print_mark_err_ret_trace_ptr(IrPrint *irp, IrInstructionMarkErrRe
    fprintf(irp->f, ")");
 }

-static void ir_print_sqrt(IrPrint *irp, IrInstructionSqrt *instruction) {
-    fprintf(irp->f, "@sqrt(");
+static void ir_print_float_op(IrPrint *irp, IrInstructionFloatOp *instruction) {
+
+    fprintf(irp->f, "@%s(", float_op_to_name(instruction->op, false));
    if (instruction->type != nullptr) {
        ir_print_other_instruction(irp, instruction->type);
    } else {
        fprintf(irp->f, "null");
    }
    fprintf(irp->f, ",");
-    ir_print_other_instruction(irp, instruction->op);
+    ir_print_other_instruction(irp, instruction->op1);
+    fprintf(irp->f, ")");
+}
+
+static void ir_print_mul_add(IrPrint *irp, IrInstructionMulAdd *instruction) {
+    fprintf(irp->f, "@mulAdd(");
+    if (instruction->type_value != nullptr) {
+        ir_print_other_instruction(irp, instruction->type_value);
+    } else {
+        fprintf(irp->f, "null");
+    }
+    fprintf(irp->f, ",");
+    ir_print_other_instruction(irp, instruction->op1);
+    fprintf(irp->f, ",");
+    ir_print_other_instruction(irp, instruction->op2);
+    fprintf(irp->f, ",");
+    ir_print_other_instruction(irp, instruction->op3);
    fprintf(irp->f, ")");
 }

@ -2053,8 +2070,11 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction) {
        case IrInstructionIdMarkErrRetTracePtr:
            ir_print_mark_err_ret_trace_ptr(irp, (IrInstructionMarkErrRetTracePtr *)instruction);
            break;
-        case IrInstructionIdSqrt:
-            ir_print_sqrt(irp, (IrInstructionSqrt *)instruction);
+        case IrInstructionIdFloatOp:
+            ir_print_float_op(irp, (IrInstructionFloatOp *)instruction);
+            break;
+        case IrInstructionIdMulAdd:
+            ir_print_mul_add(irp, (IrInstructionMulAdd *)instruction);
            break;
        case IrInstructionIdAtomicLoad:
            ir_print_atomic_load(irp, (IrInstructionAtomicLoad *)instruction);
--- a/std/fmt.zig
+++ b/std/fmt.zig
--- a/std/math/big/int.zig
+++ b/std/math/big/int.zig
@ -519,6 +519,7 @@ pub const Int = struct {
    pub fn format(
        self: Int,
        comptime fmt: []const u8,
+        comptime options: std.fmt.FormatOptions,
        context: var,
        comptime FmtError: type,
        output: fn (@typeOf(context), []const u8) FmtError!void,
--- a/std/net.zig
+++ b/std/net.zig
@ -33,7 +33,6 @@ pub const Address = struct {

    pub fn initIp6(ip6: *const Ip6Addr, _port: u16) Address {
        return Address{
-            .family = os.AF_INET6,
            .os_addr = os.sockaddr{
                .in6 = os.sockaddr_in6{
                    .family = os.AF_INET6,
--- a/std/special/build_runner.zig
+++ b/std/special/build_runner.zig
@ -167,7 +167,7 @@ fn usage(builder: *Builder, already_ran_build: bool, out_stream: var) !void {

    const allocator = builder.allocator;
    for (builder.top_level_steps.toSliceConst()) |top_level_step| {
-        try out_stream.print("  {s22} {}\n", top_level_step.step.name, top_level_step.description);
+        try out_stream.print("  {s:22} {}\n", top_level_step.step.name, top_level_step.description);
    }

    try out_stream.write(
@ -188,7 +188,7 @@ fn usage(builder: *Builder, already_ran_build: bool, out_stream: var) !void {
        for (builder.available_options_list.toSliceConst()) |option| {
            const name = try fmt.allocPrint(allocator, "  -D{}=[{}]", option.name, Builder.typeIdName(option.type_id));
            defer allocator.free(name);
-            try out_stream.print("{s24} {}\n", name, option.description);
+            try out_stream.print("{s:24} {}\n", name, option.description);
        }
    }

--- a/std/special/c.zig
+++ b/std/special/c.zig
@ -254,19 +254,32 @@ export fn fmod(x: f64, y: f64) f64 {

 // TODO add intrinsics for these (and probably the double version too)
 // and have the math stuff use the intrinsic. same as @mod and @rem
-export fn floorf(x: f32) f32 {
-    return math.floor(x);
-}
-export fn ceilf(x: f32) f32 {
-    return math.ceil(x);
-}
-export fn floor(x: f64) f64 {
-    return math.floor(x);
-}
-export fn ceil(x: f64) f64 {
-    return math.ceil(x);
-}
-
+export fn floorf(x: f32) f32 {return math.floor(x);}
+export fn ceilf(x: f32) f32 {return math.ceil(x);}
+export fn floor(x: f64) f64 {return math.floor(x);}
+export fn ceil(x: f64) f64 {return math.ceil(x);}
+export fn fma(a: f64, b: f64, c: f64) f64 {return math.fma(f64, a, b, c);}
+export fn fmaf(a: f32, b: f32, c: f32) f32 {return math.fma(f32, a, b, c);}
+export fn sin(a: f64) f64 {return math.sin(a);}
+export fn sinf(a: f32) f32 {return math.sin(a);}
+export fn cos(a: f64) f64 {return math.cos(a);}
+export fn cosf(a: f32) f32 {return math.cos(a);}
+export fn exp(a: f64) f64 {return math.exp(a);}
+export fn expf(a: f32) f32 {return math.exp(a);}
+export fn exp2(a: f64) f64 {return math.exp2(a);}
+export fn exp2f(a: f32) f32 {return math.exp2(a);}
+export fn log(a: f64) f64 {return math.ln(a);}
+export fn logf(a: f32) f32 {return math.ln(a);}
+export fn log2(a: f64) f64 {return math.log2(a);}
+export fn log2f(a: f32) f32 {return math.log2(a);}
+export fn log10(a: f64) f64 {return math.log10(a);}
+export fn log10f(a: f32) f32 {return math.log10(a);}
+export fn fabs(a: f64) f64 {return math.fabs(a);}
+export fn fabsf(a: f32) f32 {return math.fabs(a);}
+export fn trunc(a: f64) f64 {return math.trunc(a);}
+export fn truncf(a: f32) f32 {return math.trunc(a);}
+export fn round(a: f64) f64 {return math.round(a);}
+export fn roundf(a: f32) f32 {return math.round(a);}
 fn generic_fmod(comptime T: type, x: T, y: T) T {
    @setRuntimeSafety(false);

--- a/std/special/compiler_rt.zig
+++ b/std/special/compiler_rt.zig
@ -405,15 +405,15 @@ const use_thumb_1 = usesThumb1(builtin.arch);

 fn usesThumb1(arch: builtin.Arch) bool {
    return switch (arch) {
-        .arm => switch (arch.arm) {
+        .arm => |sub_arch| switch (sub_arch) {
            .v6m => true,
            else => false,
        },
-        .armeb => switch (arch.armeb) {
+        .armeb => |sub_arch| switch (sub_arch) {
            .v6m => true,
            else => false,
        },
-        .thumb => switch (arch.thumb) {
+        .thumb => |sub_arch| switch (sub_arch) {
            .v5,
            .v5te,
            .v4t,
@ -423,7 +423,7 @@ fn usesThumb1(arch: builtin.Arch) bool {
            => true,
            else => false,
        },
-        .thumbeb => switch (arch.thumbeb) {
+        .thumbeb => |sub_arch| switch (sub_arch) {
            .v5,
            .v5te,
            .v4t,
@ -471,6 +471,22 @@ test "usesThumb1" {
    //etc.
 }

+const use_thumb_1_pre_armv6 = usesThumb1PreArmv6(builtin.arch);
+
+fn usesThumb1PreArmv6(arch: builtin.Arch) bool {
+    return switch (arch) {
+        .thumb => |sub_arch| switch (sub_arch) {
+            .v5, .v5te, .v4t => true,
+            else => false,
+        },
+        .thumbeb => |sub_arch| switch (sub_arch) {
+            .v5, .v5te, .v4t => true,
+            else => false,
+        },
+        else => false,
+    };
+}
+
 nakedcc fn __aeabi_memcpy() noreturn {
    @setRuntimeSafety(false);
    if (use_thumb_1) {
@ -505,7 +521,16 @@ nakedcc fn __aeabi_memmove() noreturn {

 nakedcc fn __aeabi_memset() noreturn {
    @setRuntimeSafety(false);
-    if (use_thumb_1) {
+    if (use_thumb_1_pre_armv6) {
+        asm volatile (
+            \\ eors    r1, r2
+            \\ eors    r2, r1
+            \\ eors    r1, r2
+            \\ push    {r7, lr}
+            \\ b       memset
+            \\ pop     {r7, pc}
+        );
+    } else if (use_thumb_1) {
        asm volatile (
            \\ mov     r3, r1
            \\ mov     r1, r2
@ -527,7 +552,15 @@ nakedcc fn __aeabi_memset() noreturn {

 nakedcc fn __aeabi_memclr() noreturn {
    @setRuntimeSafety(false);
-    if (use_thumb_1) {
+    if (use_thumb_1_pre_armv6) {
+        asm volatile (
+            \\ adds    r2, r1, #0
+            \\ movs    r1, #0
+            \\ push    {r7, lr}
+            \\ bl      memset
+            \\ pop     {r7, pc}
+        );
+    } else if (use_thumb_1) {
        asm volatile (
            \\ mov     r2, r1
            \\ movs    r1, #0
--- a/std/zig/parse.zig
+++ b/std/zig/parse.zig
@ -2833,8 +2833,8 @@ fn parseIf(arena: *Allocator, it: *TokenIterator, tree: *Tree, bodyParseFn: Node

    const else_token = eatToken(it, .Keyword_else) orelse return node;
    const payload = try parsePayload(arena, it, tree);
-    const else_expr = try expectNode(arena, it, tree, parseExpr, AstError{
-        .ExpectedExpr = AstError.ExpectedExpr{ .token = it.index },
+    const else_expr = try expectNode(arena, it, tree, bodyParseFn, AstError{
+        .InvalidToken = AstError.InvalidToken{ .token = it.index },
    });
    const else_node = try arena.create(Node.Else);
    else_node.* = Node.Else{
--- a/std/zig/parser_test.zig
+++ b/std/zig/parser_test.zig
@ -2234,6 +2234,18 @@ test "zig fmt: multiline string in array" {
    );
 }

+test "zig fmt: if type expr" {
+    try testCanonical(
+        \\const mycond = true;
+        \\pub fn foo() if (mycond) i32 else void {
+        \\    if (mycond) {
+        \\        return 42;
+        \\    }
+        \\}
+        \\
+    );
+}
+
 const std = @import("std");
 const mem = std.mem;
 const warn = std.debug.warn;
--- a/test/compare_output.zig
+++ b/test/compare_output.zig
@ -122,7 +122,7 @@ pub fn addCases(cases: *tests.CompareOutputContext) void {
        \\
        \\pub fn main() void {
        \\    const stdout = &(io.getStdOut() catch unreachable).outStream().stream;
-        \\    stdout.print("Hello, world!\n{d4} {x3} {c}\n", u32(12), u16(0x12), u8('a')) catch unreachable;
+        \\    stdout.print("Hello, world!\n{d:4} {x:3} {c}\n", u32(12), u16(0x12), u8('a')) catch unreachable;
        \\}
    , "Hello, world!\n0012 012 a\n");

--- a/test/stage1/behavior.zig
+++ b/test/stage1/behavior.zig
@ -69,6 +69,8 @@ comptime {
    _ = @import("behavior/optional.zig");
    _ = @import("behavior/pointers.zig");
    _ = @import("behavior/popcount.zig");
+    _ = @import("behavior/muladd.zig");
+    _ = @import("behavior/floatop.zig");
    _ = @import("behavior/ptrcast.zig");
    _ = @import("behavior/pub_enum.zig");
    _ = @import("behavior/ref_var_in_if_after_if_2nd_switch_prong.zig");
--- a/test/stage1/behavior/floatop.zig
+++ b/test/stage1/behavior/floatop.zig
@ -0,0 +1,243 @@
+const expect = @import("std").testing.expect;
+const pi = @import("std").math.pi;
+const e = @import("std").math.e;
+
+test "@sqrt" {
+    comptime testSqrt();
+    testSqrt();
+}
+
+fn testSqrt() void {
+    {
+        var a: f16 = 4;
+        expect(@sqrt(f16, a) == 2);
+    }
+    {
+        var a: f32 = 9;
+        expect(@sqrt(f32, a) == 3);
+    }
+    {
+        var a: f64 = 25;
+        expect(@sqrt(f64, a) == 5);
+    }
+    {
+        const a: comptime_float = 25.0;
+        expect(@sqrt(comptime_float, a) == 5.0);
+    }
+    // Waiting on a c.zig implementation
+    //{
+    //    var a: f128 = 49;
+    //    expect(@sqrt(f128, a) == 7);
+    //}
+}
+
+test "@sin" {
+    comptime testSin();
+    testSin();
+}
+
+fn testSin() void {
+    // TODO - this is actually useful and should be implemented
+    // (all the trig functions for f16)
+    // but will probably wait till self-hosted
+    //{
+    //    var a: f16 = pi;
+    //    expect(@sin(f16, a/2) == 1);
+    //}
+    {
+        var a: f32 = 0;
+        expect(@sin(f32, a) == 0);
+    }
+    {
+        var a: f64 = 0;
+        expect(@sin(f64, a) == 0);
+    }
+    // TODO
+    //{
+    //    var a: f16 = pi;
+    //    expect(@sqrt(f128, a/2) == 1);
+    //}
+}
+
+test "@cos" {
+    comptime testCos();
+    testCos();
+}
+
+fn testCos() void {
+    {
+        var a: f32 = 0;
+        expect(@cos(f32, a) == 1);
+    }
+    {
+        var a: f64 = 0;
+        expect(@cos(f64, a) == 1);
+    }
+}
+
+test "@exp" {
+    comptime testExp();
+    testExp();
+}
+
+fn testExp() void {
+    {
+        var a: f32 = 0;
+        expect(@exp(f32, a) == 1);
+    }
+    {
+        var a: f64 = 0;
+        expect(@exp(f64, a) == 1);
+    }
+}
+
+test "@exp2" {
+    comptime testExp2();
+    testExp2();
+}
+
+fn testExp2() void {
+    {
+        var a: f32 = 2;
+        expect(@exp2(f32, a) == 4);
+    }
+    {
+        var a: f64 = 2;
+        expect(@exp2(f64, a) == 4);
+    }
+}
+
+test "@ln" {
+    // Old musl (and glibc?), and our current math.ln implementation do not return 1
+    // so also accept those values.
+    comptime testLn();
+    testLn();
+}
+
+fn testLn() void {
+    {
+        var a: f32 = e;
+        expect(@ln(f32, a) == 1 or @ln(f32, a) == @bitCast(f32, u32(0x3f7fffff)));
+    }
+    {
+        var a: f64 = e;
+        expect(@ln(f64, a) == 1 or @ln(f64, a) == @bitCast(f64, u64(0x3ff0000000000000)));
+    }
+}
+
+test "@log2" {
+    comptime testLog2();
+    testLog2();
+}
+
+fn testLog2() void {
+    {
+        var a: f32 = 4;
+        expect(@log2(f32, a) == 2);
+    }
+    {
+        var a: f64 = 4;
+        expect(@log2(f64, a) == 2);
+    }
+}
+
+test "@log10" {
+    comptime testLog10();
+    testLog10();
+}
+
+fn testLog10() void {
+    {
+        var a: f32 = 100;
+        expect(@log10(f32, a) == 2);
+    }
+    {
+        var a: f64 = 1000;
+        expect(@log10(f64, a) == 3);
+    }
+}
+
+test "@fabs" {
+    comptime testFabs();
+    testFabs();
+}
+
+fn testFabs() void {
+    {
+        var a: f32 = -2.5;
+        var b: f32 = 2.5;
+        expect(@fabs(f32, a) == 2.5);
+        expect(@fabs(f32, b) == 2.5);
+    }
+    {
+        var a: f64 = -2.5;
+        var b: f64 = 2.5;
+        expect(@fabs(f64, a) == 2.5);
+        expect(@fabs(f64, b) == 2.5);
+    }
+}
+
+test "@floor" {
+    comptime testFloor();
+    testFloor();
+}
+
+fn testFloor() void {
+    {
+        var a: f32 = 2.1;
+        expect(@floor(f32, a) == 2);
+    }
+    {
+        var a: f64 = 3.5;
+        expect(@floor(f64, a) == 3);
+    }
+}
+
+test "@ceil" {
+    comptime testCeil();
+    testCeil();
+}
+
+fn testCeil() void {
+    {
+        var a: f32 = 2.1;
+        expect(@ceil(f32, a) == 3);
+    }
+    {
+        var a: f64 = 3.5;
+        expect(@ceil(f64, a) == 4);
+    }
+}
+
+test "@trunc" {
+    comptime testTrunc();
+    testTrunc();
+}
+
+fn testTrunc() void {
+    {
+        var a: f32 = 2.1;
+        expect(@trunc(f32, a) == 2);
+    }
+    {
+        var a: f64 = -3.5;
+        expect(@trunc(f64, a) == -3);
+    }
+}
+
+// This is waiting on library support for the Windows build (not sure why the other's don't need it)
+//test "@nearbyInt" {
+//    comptime testNearbyInt();
+//    testNearbyInt();
+//}
+
+//fn testNearbyInt() void {
+//    {
+//        var a: f32 = 2.1;
+//        expect(@nearbyInt(f32, a) == 2);
+//    }
+//    {
+//        var a: f64 = -3.75;
+//        expect(@nearbyInt(f64, a) == -4);
+//    }
+//}
--- a/test/stage1/behavior/muladd.zig
+++ b/test/stage1/behavior/muladd.zig
@ -0,0 +1,34 @@
+const expect = @import("std").testing.expect;
+
+test "@mulAdd" {
+    comptime testMulAdd();
+    testMulAdd();
+}
+
+fn testMulAdd() void {
+    {
+        var a: f16 = 5.5;
+        var b: f16 = 2.5;
+        var c: f16 = 6.25;
+        expect(@mulAdd(f16, a, b, c) == 20);
+    }
+    {
+        var a: f32 = 5.5;
+        var b: f32 = 2.5;
+        var c: f32 = 6.25;
+        expect(@mulAdd(f32, a, b, c) == 20);
+    }
+    {
+        var a: f64 = 5.5;
+        var b: f64 = 2.5;
+        var c: f64 = 6.25;
+        expect(@mulAdd(f64, a, b, c) == 20);
+    }
+    // Awaits implementation in libm.zig
+    //{
+    //    var a: f16 = 5.5;
+    //    var b: f128 = 2.5;
+    //    var c: f128 = 6.25;
+    //    expect(@mulAdd(f128, a, b, c) == 20);
+    //}
+}