zig/lib/include/ia32intrin.h

/* ===-------- ia32intrin.h ---------------------------------------------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */

#ifndef __X86INTRIN_H
#error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
#endif

#ifndef __IA32INTRIN_H
#define __IA32INTRIN_H

/** Find the first set bit starting from the lsb. Result is undefined if
 *  input is 0.
 *
 *  \headerfile <x86intrin.h>
 *
 *  This intrinsic corresponds to the <c> BSF </c> instruction or the
 *  <c> TZCNT </c> instruction.
 *
 *  \param __A
 *     A 32-bit integer operand.
 *  \returns A 32-bit integer containing the bit number.
 */
static __inline__ int __attribute__((__always_inline__, __nodebug__))
__bsfd(int __A) {
  return __builtin_ctz(__A);
}

/** Find the first set bit starting from the msb. Result is undefined if
 *  input is 0.
 *
 *  \headerfile <x86intrin.h>
 *
 *  This intrinsic corresponds to the <c> BSR </c> instruction or the
 *  <c> LZCNT </c> instruction and an <c> XOR </c>.
 *
 *  \param __A
 *     A 32-bit integer operand.
 *  \returns A 32-bit integer containing the bit number.
 */
static __inline__ int __attribute__((__always_inline__, __nodebug__))
__bsrd(int __A) {
  return 31 - __builtin_clz(__A);
}

/** Swaps the bytes in the input. Converting little endian to big endian or
 *  vice versa.
 *
 *  \headerfile <x86intrin.h>
 *
 *  This intrinsic corresponds to the <c> BSWAP </c> instruction.
 *
 *  \param __A
 *     A 32-bit integer operand.
 *  \returns A 32-bit integer containing the swapped bytes.
 */
static __inline__ int __attribute__((__always_inline__, __nodebug__))
__bswapd(int __A) {
  return __builtin_bswap32(__A);
}

static __inline__ int __attribute__((__always_inline__, __nodebug__))
_bswap(int __A) {
  return __builtin_bswap32(__A);
}

#define _bit_scan_forward(A) __bsfd((A))
#define _bit_scan_reverse(A) __bsrd((A))

#ifdef __x86_64__
/** Find the first set bit starting from the lsb. Result is undefined if
 *  input is 0.
 *
 *  \headerfile <x86intrin.h>
 *
 *  This intrinsic corresponds to the <c> BSF </c> instruction or the
 *  <c> TZCNT </c> instruction.
 *
 *  \param __A
 *     A 64-bit integer operand.
 *  \returns A 32-bit integer containing the bit number.
 */
static __inline__ int __attribute__((__always_inline__, __nodebug__))
__bsfq(long long __A) {
  return __builtin_ctzll(__A);
}

/** Find the first set bit starting from the msb. Result is undefined if
 *  input is 0.
 *
 *  \headerfile <x86intrin.h>
 *
 *  This intrinsic corresponds to the <c> BSR </c> instruction or the
 *  <c> LZCNT </c> instruction and an <c> XOR </c>.
 *
 *  \param __A
 *     A 64-bit integer operand.
 *  \returns A 32-bit integer containing the bit number.
 */
static __inline__ int __attribute__((__always_inline__, __nodebug__))
__bsrq(long long __A) {
  return 63 - __builtin_clzll(__A);
}

/** Swaps the bytes in the input. Converting little endian to big endian or
 *  vice versa.
 *
 *  \headerfile <x86intrin.h>
 *
 *  This intrinsic corresponds to the <c> BSWAP </c> instruction.
 *
 *  \param __A
 *     A 64-bit integer operand.
 *  \returns A 64-bit integer containing the swapped bytes.
 */
static __inline__ long long __attribute__((__always_inline__, __nodebug__))
__bswapq(long long __A) {
  return __builtin_bswap64(__A);
}

#define _bswap64(A) __bswapq((A))
#endif

/** Counts the number of bits in the source operand having a value of 1.
 *
 *  \headerfile <x86intrin.h>
 *
 *  This intrinsic corresponds to the <c> POPCNT </c> instruction or a
 *  a sequence of arithmetic and logic ops to calculate it.
 *
 *  \param __A
 *     An unsigned 32-bit integer operand.
 *  \returns A 32-bit integer containing the number of bits with value 1 in the
 *     source operand.
 */
static __inline__ int __attribute__((__always_inline__, __nodebug__))
__popcntd(unsigned int __A)
{
  return __builtin_popcount(__A);
}

#define _popcnt32(A) __popcntd((A))

#ifdef __x86_64__
/** Counts the number of bits in the source operand having a value of 1.
 *
 *  \headerfile <x86intrin.h>
 *
 *  This intrinsic corresponds to the <c> POPCNT </c> instruction or a
 *  a sequence of arithmetic and logic ops to calculate it.
 *
 *  \param __A
 *     An unsigned 64-bit integer operand.
 *  \returns A 64-bit integer containing the number of bits with value 1 in the
 *     source operand.
 */
static __inline__ long long __attribute__((__always_inline__, __nodebug__))
__popcntq(unsigned long long __A)
{
  return __builtin_popcountll(__A);
}

#define _popcnt64(A) __popcntq((A))
#endif /* __x86_64__ */

#ifdef __x86_64__
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
__readeflags(void)
{
  return __builtin_ia32_readeflags_u64();
}

static __inline__ void __attribute__((__always_inline__, __nodebug__))
__writeeflags(unsigned long long __f)
{
  __builtin_ia32_writeeflags_u64(__f);
}

#else /* !__x86_64__ */
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__readeflags(void)
{
  return __builtin_ia32_readeflags_u32();
}

static __inline__ void __attribute__((__always_inline__, __nodebug__))
__writeeflags(unsigned int __f)
{
  __builtin_ia32_writeeflags_u32(__f);
}
#endif /* !__x86_64__ */

/** Cast a 32-bit float value to a 32-bit unsigned integer value
 *
 *  \headerfile <x86intrin.h>
 *  This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction in x86_64,
 *  and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
 *
 *  \param __A
 *     A 32-bit float value.
 *  \returns a 32-bit unsigned integer containing the converted value.
 */
static __inline__ unsigned int __attribute__((__always_inline__))
_castf32_u32(float __A) {
  unsigned int D;
  __builtin_memcpy(&D, &__A, sizeof(__A));
  return D;
}

/** Cast a 64-bit float value to a 64-bit unsigned integer value
 *
 *  \headerfile <x86intrin.h>
 *  This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
 *  and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
 *
 *  \param __A
 *     A 64-bit float value.
 *  \returns a 64-bit unsigned integer containing the converted value.
 */
static __inline__ unsigned long long __attribute__((__always_inline__))
_castf64_u64(double __A) {
  unsigned long long D;
  __builtin_memcpy(&D, &__A, sizeof(__A));
  return D;
}

/** Cast a 32-bit unsigned integer value to a 32-bit float value
 *
 *  \headerfile <x86intrin.h>
 *  This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
 *  and corresponds to the <c> FLDS </c> instruction in ia32.
 *
 *  \param __A
 *     A 32-bit unsigned integer value.
 *  \returns a 32-bit float value containing the converted value.
 */
static __inline__ float __attribute__((__always_inline__))
_castu32_f32(unsigned int __A) {
  float D;
  __builtin_memcpy(&D, &__A, sizeof(__A));
  return D;
}

/** Cast a 64-bit unsigned integer value to a 64-bit float value
 *
 *  \headerfile <x86intrin.h>
 *  This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
 *  and corresponds to the <c> FLDL </c> instruction in ia32.
 *
 *  \param __A
 *     A 64-bit unsigned integer value.
 *  \returns a 64-bit float value containing the converted value.
 */
static __inline__ double __attribute__((__always_inline__))
_castu64_f64(unsigned long long __A) {
  double D;
  __builtin_memcpy(&D, &__A, sizeof(__A));
  return D;
}

/** Adds the unsigned integer operand to the CRC-32C checksum of the
 *     unsigned char operand.
 *
 *  \headerfile <x86intrin.h>
 *
 *  This intrinsic corresponds to the <c> CRC32B </c> instruction.
 *
 *  \param __C
 *     An unsigned integer operand to add to the CRC-32C checksum of operand
 *     \a  __D.
 *  \param __D
 *     An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
 *  \returns The result of adding operand \a __C to the CRC-32C checksum of
 *     operand \a __D.
 */
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
__crc32b(unsigned int __C, unsigned char __D)
{
  return __builtin_ia32_crc32qi(__C, __D);
}

/** Adds the unsigned integer operand to the CRC-32C checksum of the
 *     unsigned short operand.
 *
 *  \headerfile <x86intrin.h>
 *
 *  This intrinsic corresponds to the <c> CRC32W </c> instruction.
 *
 *  \param __C
 *     An unsigned integer operand to add to the CRC-32C checksum of operand
 *     \a  __D.
 *  \param __D
 *     An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
 *  \returns The result of adding operand \a __C to the CRC-32C checksum of
 *     operand \a __D.
 */
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
__crc32w(unsigned int __C, unsigned short __D)
{
  return __builtin_ia32_crc32hi(__C, __D);
}

/** Adds the unsigned integer operand to the CRC-32C checksum of the
 *     second unsigned integer operand.
 *
 *  \headerfile <x86intrin.h>
 *
 *  This intrinsic corresponds to the <c> CRC32D </c> instruction.
 *
 *  \param __C
 *     An unsigned integer operand to add to the CRC-32C checksum of operand
 *     \a  __D.
 *  \param __D
 *     An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
 *  \returns The result of adding operand \a __C to the CRC-32C checksum of
 *     operand \a __D.
 */
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
__crc32d(unsigned int __C, unsigned int __D)
{
  return __builtin_ia32_crc32si(__C, __D);
}

#ifdef __x86_64__
/** Adds the unsigned integer operand to the CRC-32C checksum of the
 *     unsigned 64-bit integer operand.
 *
 *  \headerfile <x86intrin.h>
 *
 *  This intrinsic corresponds to the <c> CRC32Q </c> instruction.
 *
 *  \param __C
 *     An unsigned integer operand to add to the CRC-32C checksum of operand
 *     \a  __D.
 *  \param __D
 *     An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
 *  \returns The result of adding operand \a __C to the CRC-32C checksum of
 *     operand \a __D.
 */
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
__crc32q(unsigned long long __C, unsigned long long __D)
{
  return __builtin_ia32_crc32di(__C, __D);
}
#endif /* __x86_64__ */

static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
__rdpmc(int __A) {
  return __builtin_ia32_rdpmc(__A);
}

/* __rdtscp */
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
__rdtscp(unsigned int *__A) {
  return __builtin_ia32_rdtscp(__A);
}

#define _rdtsc() __rdtsc()

#define _rdpmc(A) __rdpmc(A)

static __inline__ void __attribute__((__always_inline__, __nodebug__))
_wbinvd(void) {
  __builtin_ia32_wbinvd();
}

static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
__rolb(unsigned char __X, int __C) {
  return __builtin_rotateleft8(__X, __C);
}

static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
__rorb(unsigned char __X, int __C) {
  return __builtin_rotateright8(__X, __C);
}

static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
__rolw(unsigned short __X, int __C) {
  return __builtin_rotateleft16(__X, __C);
}

static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
__rorw(unsigned short __X, int __C) {
  return __builtin_rotateright16(__X, __C);
}

static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__rold(unsigned int __X, int __C) {
  return __builtin_rotateleft32(__X, __C);
}

static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__rord(unsigned int __X, int __C) {
  return __builtin_rotateright32(__X, __C);
}

#ifdef __x86_64__
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
__rolq(unsigned long long __X, int __C) {
  return __builtin_rotateleft64(__X, __C);
}

static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
__rorq(unsigned long long __X, int __C) {
  return __builtin_rotateright64(__X, __C);
}
#endif /* __x86_64__ */

#ifndef _MSC_VER
/* These are already provided as builtins for MSVC. */
/* Select the correct function based on the size of long. */
#ifdef __LP64__
#define _lrotl(a,b) __rolq((a), (b))
#define _lrotr(a,b) __rorq((a), (b))
#else
#define _lrotl(a,b) __rold((a), (b))
#define _lrotr(a,b) __rord((a), (b))
#endif
#define _rotl(a,b) __rold((a), (b))
#define _rotr(a,b) __rord((a), (b))
#endif // _MSC_VER

/* These are not builtins so need to be provided in all modes. */
#define _rotwl(a,b) __rolw((a), (b))
#define _rotwr(a,b) __rorw((a), (b))

#endif /* __IA32INTRIN_H */