/** * This file has no copyright assigned and is placed in the Public Domain. * This file is part of the mingw-w64 runtime package. * No warranty is given; refer to the file DISCLAIMER.PD within this package. */ double fma(double x, double y, double z); #if defined(_ARM_) || defined(__arm__) /* Use hardware FMA on ARM. */ double fma(double x, double y, double z){ __asm__ ( "fmacd %0, %1, %2 \n" : "+w"(z) : "w"(x), "w"(y) ); return z; } #elif defined(_ARM64_) || defined(__aarch64__) /* Use hardware FMA on ARM64. */ double fma(double x, double y, double z){ __asm__ ( "fmadd %d0, %d1, %d2, %d0 \n" : "+w"(z) : "w"(x), "w"(y) ); return z; } #elif defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__) #include #include /* This is in accordance with the IEC 559 double-precision format. * Be advised that due to the hidden bit, the higher half actually has 26 bits. * Multiplying two 27-bit numbers will cause a 1-ULP error, which we cannot * avoid. It is kept in the very last position. */ typedef union iec559_double_ { struct __attribute__((__packed__)) { uint64_t mlo : 27; uint64_t mhi : 25; uint64_t exp : 11; uint64_t sgn : 1; }; double f; } iec559_double; static inline void break_down(iec559_double *restrict lo, iec559_double *restrict hi, double x) { hi->f = x; /* Erase low-order significant bits. `hi->f` now has only 26 significant bits. */ hi->mlo = 0; /* Store the low-order half. It will be normalized by the hardware. */ lo->f = x - hi->f; /* Preserve signness in case of zero. */ lo->sgn = hi->sgn; } double fma(double x, double y, double z) { /* POSIX-2013: 1. If x or y are NaN, a NaN shall be returned. 2. If x multiplied by y is an exact infinity and z is also an infinity but with the opposite sign, a domain error shall occur, and either a NaN (if supported), or an implementation-defined value shall be returned. 3. If one of x and y is infinite, the other is zero, and z is not a NaN, a domain error shall occur, and either a NaN (if supported), or an implementation-defined value shall be returned. 4. If one of x and y is infinite, the other is zero, and z is a NaN, a NaN shall be returned and a domain error may occur. 5. If x* y is not 0*Inf nor Inf*0 and z is a NaN, a NaN shall be returned. */ /* Check whether the result is finite. */ double ret = x * y + z; if(!isfinite(ret)) { return ret; /* If this naive check doesn't yield a finite value, the FMA isn't likely to return one either. Forward the value as is. */ } iec559_double xlo, xhi, ylo, yhi; break_down(&xlo, &xhi, x); break_down(&ylo, &yhi, y); /* The order of these four statements is essential. Don't move them around. */ ret = z; ret += xhi.f * yhi.f; /* The most significant item comes first. */ ret += xhi.f * ylo.f + xlo.f * yhi.f; /* They are equally significant. */ ret += xlo.f * ylo.f; /* The least significant item comes last. */ return ret; } #else #error Please add FMA implementation for this platform. #endif