zig/lib/include/f16cintrin.h

/*===---- f16cintrin.h - F16C intrinsics -----------------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */

#if !defined __IMMINTRIN_H
#error "Never use <f16cintrin.h> directly; include <immintrin.h> instead."
#endif

#ifndef __F16CINTRIN_H
#define __F16CINTRIN_H

/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 \
  __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
  __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))

/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
 * but that's because icc can emulate these without f16c using a library call.
 * Since we don't do that let's leave these in f16cintrin.h.
 */

/// Converts a 16-bit half-precision float value into a 32-bit float
///    value.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
///
/// \param __a
///    A 16-bit half-precision float value.
/// \returns The converted 32-bit float value.
static __inline float __DEFAULT_FN_ATTRS128
_cvtsh_ss(unsigned short __a)
{
  __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
  __v4sf __r = __builtin_ia32_vcvtph2ps(__v);
  return __r[0];
}

/// Converts a 32-bit single-precision float value to a 16-bit
///    half-precision float value.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// unsigned short _cvtss_sh(float a, const int imm);
/// \endcode
///
/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
///
/// \param a
///    A 32-bit single-precision float value to be converted to a 16-bit
///    half-precision float value.
/// \param imm
///    An immediate value controlling rounding using bits [2:0]: \n
///    000: Nearest \n
///    001: Down \n
///    010: Up \n
///    011: Truncate \n
///    1XX: Use MXCSR.RC for rounding
/// \returns The converted 16-bit half-precision float value.
#define _cvtss_sh(a, imm) \
  (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
                                                     (imm)))[0])

/// Converts a 128-bit vector containing 32-bit float values into a
///    128-bit vector containing 16-bit half-precision float values.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m128i _mm_cvtps_ph(__m128 a, const int imm);
/// \endcode
///
/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
///
/// \param a
///    A 128-bit vector containing 32-bit float values.
/// \param imm
///    An immediate value controlling rounding using bits [2:0]: \n
///    000: Nearest \n
///    001: Down \n
///    010: Up \n
///    011: Truncate \n
///    1XX: Use MXCSR.RC for rounding
/// \returns A 128-bit vector containing converted 16-bit half-precision float
///    values. The lower 64 bits are used to store the converted 16-bit
///    half-precision floating-point values.
#define _mm_cvtps_ph(a, imm) \
  (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))

/// Converts a 128-bit vector containing 16-bit half-precision float
///    values into a 128-bit vector containing 32-bit float values.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
///
/// \param __a
///    A 128-bit vector containing 16-bit half-precision float values. The lower
///    64 bits are used in the conversion.
/// \returns A 128-bit vector of [4 x float] containing converted float values.
static __inline __m128 __DEFAULT_FN_ATTRS128
_mm_cvtph_ps(__m128i __a)
{
  return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
}

/// Converts a 256-bit vector of [8 x float] into a 128-bit vector
///    containing 16-bit half-precision float values.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
/// \endcode
///
/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
///
/// \param a
///    A 256-bit vector containing 32-bit single-precision float values to be
///    converted to 16-bit half-precision float values.
/// \param imm
///    An immediate value controlling rounding using bits [2:0]: \n
///    000: Nearest \n
///    001: Down \n
///    010: Up \n
///    011: Truncate \n
///    1XX: Use MXCSR.RC for rounding
/// \returns A 128-bit vector containing the converted 16-bit half-precision
///    float values.
#define _mm256_cvtps_ph(a, imm) \
 (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))

/// Converts a 128-bit vector containing 16-bit half-precision float
///    values into a 256-bit vector of [8 x float].
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
///
/// \param __a
///    A 128-bit vector containing 16-bit half-precision float values to be
///    converted to 32-bit single-precision float values.
/// \returns A vector of [8 x float] containing the converted 32-bit
///    single-precision float values.
static __inline __m256 __DEFAULT_FN_ATTRS256
_mm256_cvtph_ps(__m128i __a)
{
  return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
}

#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256

#endif /* __F16CINTRIN_H */
parseh: add c header files 2015-12-08 16:51:59 -08:00			`/*===---- f16cintrin.h - F16C intrinsics -----------------------------------===`
			`*`
update C headers to llvm9 upstream commit 1931d3cb20a00da732c5210b123656632982fde0 2019-07-19 13:50:45 -07:00			`* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`* See https://llvm.org/LICENSE.txt for license information.`
			`* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
parseh: add c header files 2015-12-08 16:51:59 -08:00			`*`
			`*===-----------------------------------------------------------------------===`
			`*/`

update c_headers/* to LLVM 7.0.0rc1 2018-08-04 23:20:05 -07:00			`#if !defined __IMMINTRIN_H`
			`#error "Never use <f16cintrin.h> directly; include <immintrin.h> instead."`
parseh: add c header files 2015-12-08 16:51:59 -08:00			`#endif`

			`#ifndef __F16CINTRIN_H`
			`#define __F16CINTRIN_H`

			`/* Define the default attributes for the functions in this file. */`
update c_headers/* to LLVM 7.0.0rc1 2018-08-04 23:20:05 -07:00			`#define __DEFAULT_FN_ATTRS128 \`
			`__attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))`
			`#define __DEFAULT_FN_ATTRS256 \`
			`__attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))`
update C headers to clang 4.0.0 closes #389 2017-06-16 11:35:00 -07:00
update c_headers/* to LLVM 7.0.0rc1 2018-08-04 23:20:05 -07:00			`/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,`
			`* but that's because icc can emulate these without f16c using a library call.`
			`* Since we don't do that let's leave these in f16cintrin.h.`
			`*/`

			`/// Converts a 16-bit half-precision float value into a 32-bit float`
update C headers to clang 4.0.0 closes #389 2017-06-16 11:35:00 -07:00			`/// value.`
			`///`
			`/// \headerfile <x86intrin.h>`
			`///`
			`/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.`
			`///`
			`/// \param __a`
			`/// A 16-bit half-precision float value.`
			`/// \returns The converted 32-bit float value.`
update c_headers/* to LLVM 7.0.0rc1 2018-08-04 23:20:05 -07:00			`static __inline float __DEFAULT_FN_ATTRS128`
update C headers to clang 4.0.0 closes #389 2017-06-16 11:35:00 -07:00			`_cvtsh_ss(unsigned short __a)`
			`{`
update C headers to llvm9 upstream commit 1931d3cb20a00da732c5210b123656632982fde0 2019-07-19 13:50:45 -07:00			`__v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};`
			`__v4sf __r = __builtin_ia32_vcvtph2ps(__v);`
			`return __r[0];`
update C headers to clang 4.0.0 closes #389 2017-06-16 11:35:00 -07:00			`}`
parseh: add c header files 2015-12-08 16:51:59 -08:00
update c_headers/* to LLVM 7.0.0rc1 2018-08-04 23:20:05 -07:00			`/// Converts a 32-bit single-precision float value to a 16-bit`
update C headers to clang 4.0.0 closes #389 2017-06-16 11:35:00 -07:00			`/// half-precision float value.`
			`///`
			`/// \headerfile <x86intrin.h>`
			`///`
			`/// \code`
			`/// unsigned short _cvtss_sh(float a, const int imm);`
			`/// \endcode`
			`///`
			`/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.`
			`///`
			`/// \param a`
			`/// A 32-bit single-precision float value to be converted to a 16-bit`
			`/// half-precision float value.`
			`/// \param imm`
			`/// An immediate value controlling rounding using bits [2:0]: \n`
			`/// 000: Nearest \n`
			`/// 001: Down \n`
			`/// 010: Up \n`
			`/// 011: Truncate \n`
			`/// 1XX: Use MXCSR.RC for rounding`
			`/// \returns The converted 16-bit half-precision float value.`
update c_headers/* to LLVM 7.0.0rc1 2018-08-04 23:20:05 -07:00			`#define _cvtss_sh(a, imm) \`
update C headers to clang 5.0.0 2017-09-30 15:20:12 -07:00			`(unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \`
update c_headers/* to LLVM 7.0.0rc1 2018-08-04 23:20:05 -07:00			`(imm)))[0])`
parseh: add c header files 2015-12-08 16:51:59 -08:00
update c_headers/* to LLVM 7.0.0rc1 2018-08-04 23:20:05 -07:00			`/// Converts a 128-bit vector containing 32-bit float values into a`
update C headers to clang 4.0.0 closes #389 2017-06-16 11:35:00 -07:00			`/// 128-bit vector containing 16-bit half-precision float values.`
			`///`
			`/// \headerfile <x86intrin.h>`
			`///`
			`/// \code`
			`/// __m128i _mm_cvtps_ph(__m128 a, const int imm);`
			`/// \endcode`
			`///`
			`/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.`
			`///`
			`/// \param a`
			`/// A 128-bit vector containing 32-bit float values.`
			`/// \param imm`
			`/// An immediate value controlling rounding using bits [2:0]: \n`
			`/// 000: Nearest \n`
			`/// 001: Down \n`
			`/// 010: Up \n`
			`/// 011: Truncate \n`
			`/// 1XX: Use MXCSR.RC for rounding`
			`/// \returns A 128-bit vector containing converted 16-bit half-precision float`
			`/// values. The lower 64 bits are used to store the converted 16-bit`
			`/// half-precision floating-point values.`
update c_headers/* to LLVM 7.0.0rc1 2018-08-04 23:20:05 -07:00			`#define _mm_cvtps_ph(a, imm) \`
			`(__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))`
parseh: add c header files 2015-12-08 16:51:59 -08:00
update c_headers/* to LLVM 7.0.0rc1 2018-08-04 23:20:05 -07:00			`/// Converts a 128-bit vector containing 16-bit half-precision float`
update C headers to clang 4.0.0 closes #389 2017-06-16 11:35:00 -07:00			`/// values into a 128-bit vector containing 32-bit float values.`
			`///`
			`/// \headerfile <x86intrin.h>`
			`///`
			`/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.`
			`///`
			`/// \param __a`
			`/// A 128-bit vector containing 16-bit half-precision float values. The lower`
			`/// 64 bits are used in the conversion.`
			`/// \returns A 128-bit vector of [4 x float] containing converted float values.`
update c_headers/* to LLVM 7.0.0rc1 2018-08-04 23:20:05 -07:00			`static __inline __m128 __DEFAULT_FN_ATTRS128`
parseh: add c header files 2015-12-08 16:51:59 -08:00			`_mm_cvtph_ps(__m128i __a)`
			`{`
			`return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);`
			`}`

update c_headers/* to LLVM 7.0.0rc1 2018-08-04 23:20:05 -07:00			`/// Converts a 256-bit vector of [8 x float] into a 128-bit vector`
			`/// containing 16-bit half-precision float values.`
			`///`
			`/// \headerfile <x86intrin.h>`
			`///`
			`/// \code`
			`/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);`
			`/// \endcode`
			`///`
			`/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.`
			`///`
			`/// \param a`
			`/// A 256-bit vector containing 32-bit single-precision float values to be`
			`/// converted to 16-bit half-precision float values.`
			`/// \param imm`
			`/// An immediate value controlling rounding using bits [2:0]: \n`
			`/// 000: Nearest \n`
			`/// 001: Down \n`
			`/// 010: Up \n`
			`/// 011: Truncate \n`
			`/// 1XX: Use MXCSR.RC for rounding`
			`/// \returns A 128-bit vector containing the converted 16-bit half-precision`
			`/// float values.`
			`#define _mm256_cvtps_ph(a, imm) \`
			`(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))`

			`/// Converts a 128-bit vector containing 16-bit half-precision float`
			`/// values into a 256-bit vector of [8 x float].`
			`///`
			`/// \headerfile <x86intrin.h>`
			`///`
			`/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.`
			`///`
			`/// \param __a`
			`/// A 128-bit vector containing 16-bit half-precision float values to be`
			`/// converted to 32-bit single-precision float values.`
			`/// \returns A vector of [8 x float] containing the converted 32-bit`
			`/// single-precision float values.`
			`static __inline __m256 __DEFAULT_FN_ATTRS256`
			`_mm256_cvtph_ps(__m128i __a)`
			`{`
			`return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);`
			`}`

			`#undef __DEFAULT_FN_ATTRS128`
			`#undef __DEFAULT_FN_ATTRS256`
parseh: add c header files 2015-12-08 16:51:59 -08:00
			`#endif /* __F16CINTRIN_H */`