update C headers to llvm9

upstream commit 1931d3cb20a00da732c5210b123656632982fde0
master
Andrew Kelley 2019-07-19 16:50:45 -04:00
parent 70da0762f7
commit 2117fbdae3
No known key found for this signature in database
GPG Key ID: 7C5F548F728501A9
130 changed files with 9519 additions and 3542 deletions

View File

@ -1,22 +1,8 @@
/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -44,12 +30,32 @@
// implementation. Declaring in the global namespace and pulling into namespace
// std covers all of the known knowns.
#ifdef _OPENMP
#define __DEVICE__ static __attribute__((always_inline))
#else
#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
#endif
// For C++ 17 we need to include noexcept attribute to be compatible
// with the header-defined version. This may be removed once
// variant is supported.
#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
#define __NOEXCEPT noexcept
#else
#define __NOEXCEPT
#endif
#if !(defined(_OPENMP) && defined(__cplusplus))
__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
__DEVICE__ long abs(long __n) { return ::labs(__n); }
__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
__DEVICE__ double abs(double __x) { return ::fabs(__x); }
#endif
// TODO: remove once variat is supported.
#if defined(_OPENMP) && defined(__cplusplus)
__DEVICE__ const float abs(const float __x) { return ::fabsf((float)__x); }
__DEVICE__ const double abs(const double __x) { return ::fabs((double)__x); }
#endif
__DEVICE__ float acos(float __x) { return ::acosf(__x); }
__DEVICE__ float asin(float __x) { return ::asinf(__x); }
__DEVICE__ float atan(float __x) { return ::atanf(__x); }
@ -58,9 +64,11 @@ __DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
__DEVICE__ float cos(float __x) { return ::cosf(__x); }
__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
__DEVICE__ float exp(float __x) { return ::expf(__x); }
__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
__DEVICE__ float fabs(float __x) __NOEXCEPT { return ::fabsf(__x); }
__DEVICE__ float floor(float __x) { return ::floorf(__x); }
__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
// TODO: remove when variant is supported
#ifndef _OPENMP
__DEVICE__ int fpclassify(float __x) {
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
FP_ZERO, __x);
@ -69,6 +77,7 @@ __DEVICE__ int fpclassify(double __x) {
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
FP_ZERO, __x);
}
#endif
__DEVICE__ float frexp(float __arg, int *__exp) {
return ::frexpf(__arg, __exp);
}
@ -448,7 +457,10 @@ using ::remainderf;
using ::remquof;
using ::rintf;
using ::roundf;
// TODO: remove once variant is supported
#ifndef _OPENMP
using ::scalblnf;
#endif
using ::scalbnf;
using ::sinf;
using ::sinhf;
@ -467,6 +479,7 @@ _GLIBCXX_END_NAMESPACE_VERSION
} // namespace std
#endif
#undef __NOEXCEPT
#undef __DEVICE__
#endif

View File

@ -1,22 +1,8 @@
/*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- __clang_cuda_device_functions.h - CUDA runtime support -----------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -24,15 +10,21 @@
#ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__
#define __CLANG_CUDA_DEVICE_FUNCTIONS_H__
#ifndef _OPENMP
#if CUDA_VERSION < 9000
#error This file is intended to be used with CUDA-9+ only.
#endif
#endif
// __DEVICE__ is a helper macro with common set of attributes for the wrappers
// we implement in this file. We need static in order to avoid emitting unused
// functions and __forceinline__ helps inlining these wrappers at -O1.
#pragma push_macro("__DEVICE__")
#ifdef _OPENMP
#define __DEVICE__ static __attribute__((always_inline))
#else
#define __DEVICE__ static __device__ __forceinline__
#endif
// libdevice provides fast low precision and slow full-recision implementations
// for some functions. Which one gets selected depends on
@ -45,6 +37,15 @@
#define __FAST_OR_SLOW(fast, slow) slow
#endif
// For C++ 17 we need to include noexcept attribute to be compatible
// with the header-defined version. This may be removed once
// variant is supported.
#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
#define __NOEXCEPT noexcept
#else
#define __NOEXCEPT
#endif
__DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); }
__DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); }
__DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); }
@ -52,8 +53,13 @@ __DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); }
__DEVICE__ unsigned long long __brevll(unsigned long long __a) {
return __nv_brevll(__a);
}
#if defined(__cplusplus)
__DEVICE__ void __brkpt() { asm volatile("brkpt;"); }
__DEVICE__ void __brkpt(int __a) { __brkpt(); }
#else
__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { asm volatile("brkpt;"); }
__DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); }
#endif
__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,
unsigned int __c) {
return __nv_byte_perm(__a, __b, __c);
@ -237,6 +243,9 @@ __DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); }
__DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }
__DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }
__DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }
#ifdef _MSC_VER
__DEVICE__ int __finitel(long double __a);
#endif
__DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }
__DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }
__DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }
@ -445,8 +454,14 @@ __DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); }
__DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); }
__DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); }
__DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); }
#ifdef _MSC_VER
__DEVICE__ int __isinfl(long double __a);
#endif
__DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); }
__DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); }
#ifdef _MSC_VER
__DEVICE__ int __isnanl(long double __a);
#endif
__DEVICE__ double __ll2double_rd(long long __a) {
return __nv_ll2double_rd(__a);
}
@ -520,8 +535,8 @@ __DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {
__DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); }
__DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); }
__DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); }
__DEVICE__ void __sincosf(float __a, float *__sptr, float *__cptr) {
return __nv_fast_sincosf(__a, __sptr, __cptr);
__DEVICE__ void __sincosf(float __a, float *__s, float *__c) {
return __nv_fast_sincosf(__a, __s, __c);
}
__DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); }
__DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); }
@ -1468,7 +1483,8 @@ __DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {
return r;
}
#endif // CUDA_VERSION >= 9020
__DEVICE__ int abs(int __a) { return __nv_abs(__a); }
__DEVICE__ int abs(int __a) __NOEXCEPT { return __nv_abs(__a); }
__DEVICE__ double fabs(double __a) __NOEXCEPT { return __nv_fabs(__a); }
__DEVICE__ double acos(double __a) { return __nv_acos(__a); }
__DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }
__DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }
@ -1487,8 +1503,10 @@ __DEVICE__ double cbrt(double __a) { return __nv_cbrt(__a); }
__DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); }
__DEVICE__ double ceil(double __a) { return __nv_ceil(__a); }
__DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); }
#ifndef _OPENMP
__DEVICE__ int clock() { return __nvvm_read_ptx_sreg_clock(); }
__DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }
#endif
__DEVICE__ double copysign(double __a, double __b) {
return __nv_copysign(__a, __b);
}
@ -1525,7 +1543,6 @@ __DEVICE__ float exp2f(float __a) { return __nv_exp2f(__a); }
__DEVICE__ float expf(float __a) { return __nv_expf(__a); }
__DEVICE__ double expm1(double __a) { return __nv_expm1(__a); }
__DEVICE__ float expm1f(float __a) { return __nv_expm1f(__a); }
__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }
__DEVICE__ float fabsf(float __a) { return __nv_fabsf(__a); }
__DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); }
__DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); }
@ -1563,16 +1580,16 @@ __DEVICE__ double j1(double __a) { return __nv_j1(__a); }
__DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }
__DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }
__DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }
#if defined(__LP64__)
__DEVICE__ long labs(long __a) { return llabs(__a); };
#if defined(__LP64__) || defined(_WIN64)
__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_llabs(__a); };
#else
__DEVICE__ long labs(long __a) { return __nv_abs(__a); };
__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_abs(__a); };
#endif
__DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); }
__DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); }
__DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); }
__DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); }
__DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); }
__DEVICE__ long long llabs(long long __a) __NOEXCEPT { return __nv_llabs(__a); }
__DEVICE__ long long llmax(long long __a, long long __b) {
return __nv_llmax(__a, __b);
}
@ -1597,7 +1614,7 @@ __DEVICE__ float logbf(float __a) { return __nv_logbf(__a); }
__DEVICE__ float logf(float __a) {
return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a);
}
#if defined(__LP64__)
#if defined(__LP64__) || defined(_WIN64)
__DEVICE__ long lrint(double __a) { return llrint(__a); }
__DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); }
__DEVICE__ long lround(double __a) { return llround(__a); }
@ -1609,12 +1626,16 @@ __DEVICE__ long lround(double __a) { return round(__a); }
__DEVICE__ long lroundf(float __a) { return roundf(__a); }
#endif
__DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); }
// These functions shouldn't be declared when including this header
// for math function resolution purposes.
#ifndef _OPENMP
__DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) {
return __builtin_memcpy(__a, __b, __c);
}
__DEVICE__ void *memset(void *__a, int __b, size_t __c) {
return __builtin_memset(__a, __b, __c);
}
#endif
__DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); }
__DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); }
__DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); }
@ -1698,6 +1719,8 @@ __DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); }
__DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }
__DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }
__DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }
// TODO: remove once variant is supported
#ifndef _OPENMP
__DEVICE__ double scalbln(double __a, long __b) {
if (__b > INT_MAX)
return __a > 0 ? HUGE_VAL : -HUGE_VAL;
@ -1712,18 +1735,19 @@ __DEVICE__ float scalblnf(float __a, long __b) {
return __a > 0 ? 0.f : -0.f;
return scalbnf(__a, (int)__b);
}
#endif
__DEVICE__ double sin(double __a) { return __nv_sin(__a); }
__DEVICE__ void sincos(double __a, double *__sptr, double *__cptr) {
return __nv_sincos(__a, __sptr, __cptr);
__DEVICE__ void sincos(double __a, double *__s, double *__c) {
return __nv_sincos(__a, __s, __c);
}
__DEVICE__ void sincosf(float __a, float *__sptr, float *__cptr) {
return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __sptr, __cptr);
__DEVICE__ void sincosf(float __a, float *__s, float *__c) {
return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __s, __c);
}
__DEVICE__ void sincospi(double __a, double *__sptr, double *__cptr) {
return __nv_sincospi(__a, __sptr, __cptr);
__DEVICE__ void sincospi(double __a, double *__s, double *__c) {
return __nv_sincospi(__a, __s, __c);
}
__DEVICE__ void sincospif(float __a, float *__sptr, float *__cptr) {
return __nv_sincospif(__a, __sptr, __cptr);
__DEVICE__ void sincospif(float __a, float *__s, float *__c) {
return __nv_sincospif(__a, __s, __c);
}
__DEVICE__ float sinf(float __a) {
return __FAST_OR_SLOW(__nv_fast_sinf, __nv_sinf)(__a);
@ -1763,6 +1787,7 @@ __DEVICE__ float y1f(float __a) { return __nv_y1f(__a); }
__DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }
__DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
#undef __NOEXCEPT
#pragma pop_macro("__DEVICE__")
#pragma pop_macro("__FAST_OR_SLOW")
#endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__

View File

@ -1,22 +1,8 @@
/*===--- __clang_cuda_intrinsics.h - Device-side CUDA intrinsic wrappers ---===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===-- __clang_cuda_libdevice_declares.h - decls for libdevice functions --===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -24,443 +10,453 @@
#ifndef __CLANG_CUDA_LIBDEVICE_DECLARES_H__
#define __CLANG_CUDA_LIBDEVICE_DECLARES_H__
#if defined(__cplusplus)
extern "C" {
#endif
__device__ int __nv_abs(int __a);
__device__ double __nv_acos(double __a);
__device__ float __nv_acosf(float __a);
__device__ double __nv_acosh(double __a);
__device__ float __nv_acoshf(float __a);
__device__ double __nv_asin(double __a);
__device__ float __nv_asinf(float __a);
__device__ double __nv_asinh(double __a);
__device__ float __nv_asinhf(float __a);
__device__ double __nv_atan2(double __a, double __b);
__device__ float __nv_atan2f(float __a, float __b);
__device__ double __nv_atan(double __a);
__device__ float __nv_atanf(float __a);
__device__ double __nv_atanh(double __a);
__device__ float __nv_atanhf(float __a);
__device__ int __nv_brev(int __a);
__device__ long long __nv_brevll(long long __a);
__device__ int __nv_byte_perm(int __a, int __b, int __c);
__device__ double __nv_cbrt(double __a);
__device__ float __nv_cbrtf(float __a);
__device__ double __nv_ceil(double __a);
__device__ float __nv_ceilf(float __a);
__device__ int __nv_clz(int __a);
__device__ int __nv_clzll(long long __a);
__device__ double __nv_copysign(double __a, double __b);
__device__ float __nv_copysignf(float __a, float __b);
__device__ double __nv_cos(double __a);
__device__ float __nv_cosf(float __a);
__device__ double __nv_cosh(double __a);
__device__ float __nv_coshf(float __a);
__device__ double __nv_cospi(double __a);
__device__ float __nv_cospif(float __a);
__device__ double __nv_cyl_bessel_i0(double __a);
__device__ float __nv_cyl_bessel_i0f(float __a);
__device__ double __nv_cyl_bessel_i1(double __a);
__device__ float __nv_cyl_bessel_i1f(float __a);
__device__ double __nv_dadd_rd(double __a, double __b);
__device__ double __nv_dadd_rn(double __a, double __b);
__device__ double __nv_dadd_ru(double __a, double __b);
__device__ double __nv_dadd_rz(double __a, double __b);
__device__ double __nv_ddiv_rd(double __a, double __b);
__device__ double __nv_ddiv_rn(double __a, double __b);
__device__ double __nv_ddiv_ru(double __a, double __b);
__device__ double __nv_ddiv_rz(double __a, double __b);
__device__ double __nv_dmul_rd(double __a, double __b);
__device__ double __nv_dmul_rn(double __a, double __b);
__device__ double __nv_dmul_ru(double __a, double __b);
__device__ double __nv_dmul_rz(double __a, double __b);
__device__ float __nv_double2float_rd(double __a);
__device__ float __nv_double2float_rn(double __a);
__device__ float __nv_double2float_ru(double __a);
__device__ float __nv_double2float_rz(double __a);
__device__ int __nv_double2hiint(double __a);
__device__ int __nv_double2int_rd(double __a);
__device__ int __nv_double2int_rn(double __a);
__device__ int __nv_double2int_ru(double __a);
__device__ int __nv_double2int_rz(double __a);
__device__ long long __nv_double2ll_rd(double __a);
__device__ long long __nv_double2ll_rn(double __a);
__device__ long long __nv_double2ll_ru(double __a);
__device__ long long __nv_double2ll_rz(double __a);
__device__ int __nv_double2loint(double __a);
__device__ unsigned int __nv_double2uint_rd(double __a);
__device__ unsigned int __nv_double2uint_rn(double __a);
__device__ unsigned int __nv_double2uint_ru(double __a);
__device__ unsigned int __nv_double2uint_rz(double __a);
__device__ unsigned long long __nv_double2ull_rd(double __a);
__device__ unsigned long long __nv_double2ull_rn(double __a);
__device__ unsigned long long __nv_double2ull_ru(double __a);
__device__ unsigned long long __nv_double2ull_rz(double __a);
__device__ unsigned long long __nv_double_as_longlong(double __a);
__device__ double __nv_drcp_rd(double __a);
__device__ double __nv_drcp_rn(double __a);
__device__ double __nv_drcp_ru(double __a);
__device__ double __nv_drcp_rz(double __a);
__device__ double __nv_dsqrt_rd(double __a);
__device__ double __nv_dsqrt_rn(double __a);
__device__ double __nv_dsqrt_ru(double __a);
__device__ double __nv_dsqrt_rz(double __a);
__device__ double __nv_dsub_rd(double __a, double __b);
__device__ double __nv_dsub_rn(double __a, double __b);
__device__ double __nv_dsub_ru(double __a, double __b);
__device__ double __nv_dsub_rz(double __a, double __b);
__device__ double __nv_erfc(double __a);
__device__ float __nv_erfcf(float __a);
__device__ double __nv_erfcinv(double __a);
__device__ float __nv_erfcinvf(float __a);
__device__ double __nv_erfcx(double __a);
__device__ float __nv_erfcxf(float __a);
__device__ double __nv_erf(double __a);
__device__ float __nv_erff(float __a);
__device__ double __nv_erfinv(double __a);
__device__ float __nv_erfinvf(float __a);
__device__ double __nv_exp10(double __a);
__device__ float __nv_exp10f(float __a);
__device__ double __nv_exp2(double __a);
__device__ float __nv_exp2f(float __a);
__device__ double __nv_exp(double __a);
__device__ float __nv_expf(float __a);
__device__ double __nv_expm1(double __a);
__device__ float __nv_expm1f(float __a);
__device__ double __nv_fabs(double __a);
__device__ float __nv_fabsf(float __a);
__device__ float __nv_fadd_rd(float __a, float __b);
__device__ float __nv_fadd_rn(float __a, float __b);
__device__ float __nv_fadd_ru(float __a, float __b);
__device__ float __nv_fadd_rz(float __a, float __b);
__device__ float __nv_fast_cosf(float __a);
__device__ float __nv_fast_exp10f(float __a);
__device__ float __nv_fast_expf(float __a);
__device__ float __nv_fast_fdividef(float __a, float __b);
__device__ float __nv_fast_log10f(float __a);
__device__ float __nv_fast_log2f(float __a);
__device__ float __nv_fast_logf(float __a);
__device__ float __nv_fast_powf(float __a, float __b);
__device__ void __nv_fast_sincosf(float __a, float *__sptr, float *__cptr);
__device__ float __nv_fast_sinf(float __a);
__device__ float __nv_fast_tanf(float __a);
__device__ double __nv_fdim(double __a, double __b);
__device__ float __nv_fdimf(float __a, float __b);
__device__ float __nv_fdiv_rd(float __a, float __b);
__device__ float __nv_fdiv_rn(float __a, float __b);
__device__ float __nv_fdiv_ru(float __a, float __b);
__device__ float __nv_fdiv_rz(float __a, float __b);
__device__ int __nv_ffs(int __a);
__device__ int __nv_ffsll(long long __a);
__device__ int __nv_finitef(float __a);
__device__ unsigned short __nv_float2half_rn(float __a);
__device__ int __nv_float2int_rd(float __a);
__device__ int __nv_float2int_rn(float __a);
__device__ int __nv_float2int_ru(float __a);
__device__ int __nv_float2int_rz(float __a);
__device__ long long __nv_float2ll_rd(float __a);
__device__ long long __nv_float2ll_rn(float __a);
__device__ long long __nv_float2ll_ru(float __a);
__device__ long long __nv_float2ll_rz(float __a);
__device__ unsigned int __nv_float2uint_rd(float __a);
__device__ unsigned int __nv_float2uint_rn(float __a);
__device__ unsigned int __nv_float2uint_ru(float __a);
__device__ unsigned int __nv_float2uint_rz(float __a);
__device__ unsigned long long __nv_float2ull_rd(float __a);
__device__ unsigned long long __nv_float2ull_rn(float __a);
__device__ unsigned long long __nv_float2ull_ru(float __a);
__device__ unsigned long long __nv_float2ull_rz(float __a);
__device__ int __nv_float_as_int(float __a);
__device__ unsigned int __nv_float_as_uint(float __a);
__device__ double __nv_floor(double __a);
__device__ float __nv_floorf(float __a);
__device__ double __nv_fma(double __a, double __b, double __c);
__device__ float __nv_fmaf(float __a, float __b, float __c);
__device__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c);
__device__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c);
__device__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c);
__device__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c);
__device__ float __nv_fmaf_rd(float __a, float __b, float __c);
__device__ float __nv_fmaf_rn(float __a, float __b, float __c);
__device__ float __nv_fmaf_ru(float __a, float __b, float __c);
__device__ float __nv_fmaf_rz(float __a, float __b, float __c);
__device__ double __nv_fma_rd(double __a, double __b, double __c);
__device__ double __nv_fma_rn(double __a, double __b, double __c);
__device__ double __nv_fma_ru(double __a, double __b, double __c);
__device__ double __nv_fma_rz(double __a, double __b, double __c);
__device__ double __nv_fmax(double __a, double __b);
__device__ float __nv_fmaxf(float __a, float __b);
__device__ double __nv_fmin(double __a, double __b);
__device__ float __nv_fminf(float __a, float __b);
__device__ double __nv_fmod(double __a, double __b);
__device__ float __nv_fmodf(float __a, float __b);
__device__ float __nv_fmul_rd(float __a, float __b);
__device__ float __nv_fmul_rn(float __a, float __b);
__device__ float __nv_fmul_ru(float __a, float __b);
__device__ float __nv_fmul_rz(float __a, float __b);
__device__ float __nv_frcp_rd(float __a);
__device__ float __nv_frcp_rn(float __a);
__device__ float __nv_frcp_ru(float __a);
__device__ float __nv_frcp_rz(float __a);
__device__ double __nv_frexp(double __a, int *__b);
__device__ float __nv_frexpf(float __a, int *__b);
__device__ float __nv_frsqrt_rn(float __a);
__device__ float __nv_fsqrt_rd(float __a);
__device__ float __nv_fsqrt_rn(float __a);
__device__ float __nv_fsqrt_ru(float __a);
__device__ float __nv_fsqrt_rz(float __a);
__device__ float __nv_fsub_rd(float __a, float __b);
__device__ float __nv_fsub_rn(float __a, float __b);
__device__ float __nv_fsub_ru(float __a, float __b);
__device__ float __nv_fsub_rz(float __a, float __b);
__device__ int __nv_hadd(int __a, int __b);
__device__ float __nv_half2float(unsigned short __h);
__device__ double __nv_hiloint2double(int __a, int __b);
__device__ double __nv_hypot(double __a, double __b);
__device__ float __nv_hypotf(float __a, float __b);
__device__ int __nv_ilogb(double __a);
__device__ int __nv_ilogbf(float __a);
__device__ double __nv_int2double_rn(int __a);
__device__ float __nv_int2float_rd(int __a);
__device__ float __nv_int2float_rn(int __a);
__device__ float __nv_int2float_ru(int __a);
__device__ float __nv_int2float_rz(int __a);
__device__ float __nv_int_as_float(int __a);
__device__ int __nv_isfinited(double __a);
__device__ int __nv_isinfd(double __a);
__device__ int __nv_isinff(float __a);
__device__ int __nv_isnand(double __a);
__device__ int __nv_isnanf(float __a);
__device__ double __nv_j0(double __a);
__device__ float __nv_j0f(float __a);
__device__ double __nv_j1(double __a);
__device__ float __nv_j1f(float __a);
__device__ float __nv_jnf(int __a, float __b);
__device__ double __nv_jn(int __a, double __b);
__device__ double __nv_ldexp(double __a, int __b);
__device__ float __nv_ldexpf(float __a, int __b);
__device__ double __nv_lgamma(double __a);
__device__ float __nv_lgammaf(float __a);
__device__ double __nv_ll2double_rd(long long __a);
__device__ double __nv_ll2double_rn(long long __a);
__device__ double __nv_ll2double_ru(long long __a);
__device__ double __nv_ll2double_rz(long long __a);
__device__ float __nv_ll2float_rd(long long __a);
__device__ float __nv_ll2float_rn(long long __a);
__device__ float __nv_ll2float_ru(long long __a);
__device__ float __nv_ll2float_rz(long long __a);
__device__ long long __nv_llabs(long long __a);
__device__ long long __nv_llmax(long long __a, long long __b);
__device__ long long __nv_llmin(long long __a, long long __b);
__device__ long long __nv_llrint(double __a);
__device__ long long __nv_llrintf(float __a);
__device__ long long __nv_llround(double __a);
__device__ long long __nv_llroundf(float __a);
__device__ double __nv_log10(double __a);
__device__ float __nv_log10f(float __a);
__device__ double __nv_log1p(double __a);
__device__ float __nv_log1pf(float __a);
__device__ double __nv_log2(double __a);
__device__ float __nv_log2f(float __a);
__device__ double __nv_logb(double __a);
__device__ float __nv_logbf(float __a);
__device__ double __nv_log(double __a);
__device__ float __nv_logf(float __a);
__device__ double __nv_longlong_as_double(long long __a);
__device__ int __nv_max(int __a, int __b);
__device__ int __nv_min(int __a, int __b);
__device__ double __nv_modf(double __a, double *__b);
__device__ float __nv_modff(float __a, float *__b);
__device__ int __nv_mul24(int __a, int __b);
__device__ long long __nv_mul64hi(long long __a, long long __b);
__device__ int __nv_mulhi(int __a, int __b);
__device__ double __nv_nan(const signed char *__a);
__device__ float __nv_nanf(const signed char *__a);
__device__ double __nv_nearbyint(double __a);
__device__ float __nv_nearbyintf(float __a);
__device__ double __nv_nextafter(double __a, double __b);
__device__ float __nv_nextafterf(float __a, float __b);
__device__ double __nv_norm3d(double __a, double __b, double __c);
__device__ float __nv_norm3df(float __a, float __b, float __c);
__device__ double __nv_norm4d(double __a, double __b, double __c, double __d);
__device__ float __nv_norm4df(float __a, float __b, float __c, float __d);
__device__ double __nv_normcdf(double __a);
__device__ float __nv_normcdff(float __a);
__device__ double __nv_normcdfinv(double __a);
__device__ float __nv_normcdfinvf(float __a);
__device__ float __nv_normf(int __a, const float *__b);
__device__ double __nv_norm(int __a, const double *__b);
__device__ int __nv_popc(int __a);
__device__ int __nv_popcll(long long __a);
__device__ double __nv_pow(double __a, double __b);
__device__ float __nv_powf(float __a, float __b);
__device__ double __nv_powi(double __a, int __b);
__device__ float __nv_powif(float __a, int __b);
__device__ double __nv_rcbrt(double __a);
__device__ float __nv_rcbrtf(float __a);
__device__ double __nv_rcp64h(double __a);
__device__ double __nv_remainder(double __a, double __b);
__device__ float __nv_remainderf(float __a, float __b);
__device__ double __nv_remquo(double __a, double __b, int *__c);
__device__ float __nv_remquof(float __a, float __b, int *__c);
__device__ int __nv_rhadd(int __a, int __b);
__device__ double __nv_rhypot(double __a, double __b);
__device__ float __nv_rhypotf(float __a, float __b);
__device__ double __nv_rint(double __a);
__device__ float __nv_rintf(float __a);
__device__ double __nv_rnorm3d(double __a, double __b, double __c);
__device__ float __nv_rnorm3df(float __a, float __b, float __c);
__device__ double __nv_rnorm4d(double __a, double __b, double __c, double __d);
__device__ float __nv_rnorm4df(float __a, float __b, float __c, float __d);
__device__ float __nv_rnormf(int __a, const float *__b);
__device__ double __nv_rnorm(int __a, const double *__b);
__device__ double __nv_round(double __a);
__device__ float __nv_roundf(float __a);
__device__ double __nv_rsqrt(double __a);
__device__ float __nv_rsqrtf(float __a);
__device__ int __nv_sad(int __a, int __b, int __c);
__device__ float __nv_saturatef(float __a);
__device__ double __nv_scalbn(double __a, int __b);
__device__ float __nv_scalbnf(float __a, int __b);
__device__ int __nv_signbitd(double __a);
__device__ int __nv_signbitf(float __a);
__device__ void __nv_sincos(double __a, double *__b, double *__c);
__device__ void __nv_sincosf(float __a, float *__b, float *__c);
__device__ void __nv_sincospi(double __a, double *__b, double *__c);
__device__ void __nv_sincospif(float __a, float *__b, float *__c);
__device__ double __nv_sin(double __a);
__device__ float __nv_sinf(float __a);
__device__ double __nv_sinh(double __a);
__device__ float __nv_sinhf(float __a);
__device__ double __nv_sinpi(double __a);
__device__ float __nv_sinpif(float __a);
__device__ double __nv_sqrt(double __a);
__device__ float __nv_sqrtf(float __a);
__device__ double __nv_tan(double __a);
__device__ float __nv_tanf(float __a);
__device__ double __nv_tanh(double __a);
__device__ float __nv_tanhf(float __a);
__device__ double __nv_tgamma(double __a);
__device__ float __nv_tgammaf(float __a);
__device__ double __nv_trunc(double __a);
__device__ float __nv_truncf(float __a);
__device__ int __nv_uhadd(unsigned int __a, unsigned int __b);
__device__ double __nv_uint2double_rn(unsigned int __i);
__device__ float __nv_uint2float_rd(unsigned int __a);
__device__ float __nv_uint2float_rn(unsigned int __a);
__device__ float __nv_uint2float_ru(unsigned int __a);
__device__ float __nv_uint2float_rz(unsigned int __a);
__device__ float __nv_uint_as_float(unsigned int __a);
__device__ double __nv_ull2double_rd(unsigned long long __a);
__device__ double __nv_ull2double_rn(unsigned long long __a);
__device__ double __nv_ull2double_ru(unsigned long long __a);
__device__ double __nv_ull2double_rz(unsigned long long __a);
__device__ float __nv_ull2float_rd(unsigned long long __a);
__device__ float __nv_ull2float_rn(unsigned long long __a);
__device__ float __nv_ull2float_ru(unsigned long long __a);
__device__ float __nv_ull2float_rz(unsigned long long __a);
__device__ unsigned long long __nv_ullmax(unsigned long long __a,
#if defined(_OPENMP)
#define __DEVICE__
#elif defined(__CUDA__)
#define __DEVICE__ __device__
#endif
__DEVICE__ int __nv_abs(int __a);
__DEVICE__ double __nv_acos(double __a);
__DEVICE__ float __nv_acosf(float __a);
__DEVICE__ double __nv_acosh(double __a);
__DEVICE__ float __nv_acoshf(float __a);
__DEVICE__ double __nv_asin(double __a);
__DEVICE__ float __nv_asinf(float __a);
__DEVICE__ double __nv_asinh(double __a);
__DEVICE__ float __nv_asinhf(float __a);
__DEVICE__ double __nv_atan2(double __a, double __b);
__DEVICE__ float __nv_atan2f(float __a, float __b);
__DEVICE__ double __nv_atan(double __a);
__DEVICE__ float __nv_atanf(float __a);
__DEVICE__ double __nv_atanh(double __a);
__DEVICE__ float __nv_atanhf(float __a);
__DEVICE__ int __nv_brev(int __a);
__DEVICE__ long long __nv_brevll(long long __a);
__DEVICE__ int __nv_byte_perm(int __a, int __b, int __c);
__DEVICE__ double __nv_cbrt(double __a);
__DEVICE__ float __nv_cbrtf(float __a);
__DEVICE__ double __nv_ceil(double __a);
__DEVICE__ float __nv_ceilf(float __a);
__DEVICE__ int __nv_clz(int __a);
__DEVICE__ int __nv_clzll(long long __a);
__DEVICE__ double __nv_copysign(double __a, double __b);
__DEVICE__ float __nv_copysignf(float __a, float __b);
__DEVICE__ double __nv_cos(double __a);
__DEVICE__ float __nv_cosf(float __a);
__DEVICE__ double __nv_cosh(double __a);
__DEVICE__ float __nv_coshf(float __a);
__DEVICE__ double __nv_cospi(double __a);
__DEVICE__ float __nv_cospif(float __a);
__DEVICE__ double __nv_cyl_bessel_i0(double __a);
__DEVICE__ float __nv_cyl_bessel_i0f(float __a);
__DEVICE__ double __nv_cyl_bessel_i1(double __a);
__DEVICE__ float __nv_cyl_bessel_i1f(float __a);
__DEVICE__ double __nv_dadd_rd(double __a, double __b);
__DEVICE__ double __nv_dadd_rn(double __a, double __b);
__DEVICE__ double __nv_dadd_ru(double __a, double __b);
__DEVICE__ double __nv_dadd_rz(double __a, double __b);
__DEVICE__ double __nv_ddiv_rd(double __a, double __b);
__DEVICE__ double __nv_ddiv_rn(double __a, double __b);
__DEVICE__ double __nv_ddiv_ru(double __a, double __b);
__DEVICE__ double __nv_ddiv_rz(double __a, double __b);
__DEVICE__ double __nv_dmul_rd(double __a, double __b);
__DEVICE__ double __nv_dmul_rn(double __a, double __b);
__DEVICE__ double __nv_dmul_ru(double __a, double __b);
__DEVICE__ double __nv_dmul_rz(double __a, double __b);
__DEVICE__ float __nv_double2float_rd(double __a);
__DEVICE__ float __nv_double2float_rn(double __a);
__DEVICE__ float __nv_double2float_ru(double __a);
__DEVICE__ float __nv_double2float_rz(double __a);
__DEVICE__ int __nv_double2hiint(double __a);
__DEVICE__ int __nv_double2int_rd(double __a);
__DEVICE__ int __nv_double2int_rn(double __a);
__DEVICE__ int __nv_double2int_ru(double __a);
__DEVICE__ int __nv_double2int_rz(double __a);
__DEVICE__ long long __nv_double2ll_rd(double __a);
__DEVICE__ long long __nv_double2ll_rn(double __a);
__DEVICE__ long long __nv_double2ll_ru(double __a);
__DEVICE__ long long __nv_double2ll_rz(double __a);
__DEVICE__ int __nv_double2loint(double __a);
__DEVICE__ unsigned int __nv_double2uint_rd(double __a);
__DEVICE__ unsigned int __nv_double2uint_rn(double __a);
__DEVICE__ unsigned int __nv_double2uint_ru(double __a);
__DEVICE__ unsigned int __nv_double2uint_rz(double __a);
__DEVICE__ unsigned long long __nv_double2ull_rd(double __a);
__DEVICE__ unsigned long long __nv_double2ull_rn(double __a);
__DEVICE__ unsigned long long __nv_double2ull_ru(double __a);
__DEVICE__ unsigned long long __nv_double2ull_rz(double __a);
__DEVICE__ unsigned long long __nv_double_as_longlong(double __a);
__DEVICE__ double __nv_drcp_rd(double __a);
__DEVICE__ double __nv_drcp_rn(double __a);
__DEVICE__ double __nv_drcp_ru(double __a);
__DEVICE__ double __nv_drcp_rz(double __a);
__DEVICE__ double __nv_dsqrt_rd(double __a);
__DEVICE__ double __nv_dsqrt_rn(double __a);
__DEVICE__ double __nv_dsqrt_ru(double __a);
__DEVICE__ double __nv_dsqrt_rz(double __a);
__DEVICE__ double __nv_dsub_rd(double __a, double __b);
__DEVICE__ double __nv_dsub_rn(double __a, double __b);
__DEVICE__ double __nv_dsub_ru(double __a, double __b);
__DEVICE__ double __nv_dsub_rz(double __a, double __b);
__DEVICE__ double __nv_erfc(double __a);
__DEVICE__ float __nv_erfcf(float __a);
__DEVICE__ double __nv_erfcinv(double __a);
__DEVICE__ float __nv_erfcinvf(float __a);
__DEVICE__ double __nv_erfcx(double __a);
__DEVICE__ float __nv_erfcxf(float __a);
__DEVICE__ double __nv_erf(double __a);
__DEVICE__ float __nv_erff(float __a);
__DEVICE__ double __nv_erfinv(double __a);
__DEVICE__ float __nv_erfinvf(float __a);
__DEVICE__ double __nv_exp10(double __a);
__DEVICE__ float __nv_exp10f(float __a);
__DEVICE__ double __nv_exp2(double __a);
__DEVICE__ float __nv_exp2f(float __a);
__DEVICE__ double __nv_exp(double __a);
__DEVICE__ float __nv_expf(float __a);
__DEVICE__ double __nv_expm1(double __a);
__DEVICE__ float __nv_expm1f(float __a);
__DEVICE__ double __nv_fabs(double __a);
__DEVICE__ float __nv_fabsf(float __a);
__DEVICE__ float __nv_fadd_rd(float __a, float __b);
__DEVICE__ float __nv_fadd_rn(float __a, float __b);
__DEVICE__ float __nv_fadd_ru(float __a, float __b);
__DEVICE__ float __nv_fadd_rz(float __a, float __b);
__DEVICE__ float __nv_fast_cosf(float __a);
__DEVICE__ float __nv_fast_exp10f(float __a);
__DEVICE__ float __nv_fast_expf(float __a);
__DEVICE__ float __nv_fast_fdividef(float __a, float __b);
__DEVICE__ float __nv_fast_log10f(float __a);
__DEVICE__ float __nv_fast_log2f(float __a);
__DEVICE__ float __nv_fast_logf(float __a);
__DEVICE__ float __nv_fast_powf(float __a, float __b);
__DEVICE__ void __nv_fast_sincosf(float __a, float *__s, float *__c);
__DEVICE__ float __nv_fast_sinf(float __a);
__DEVICE__ float __nv_fast_tanf(float __a);
__DEVICE__ double __nv_fdim(double __a, double __b);
__DEVICE__ float __nv_fdimf(float __a, float __b);
__DEVICE__ float __nv_fdiv_rd(float __a, float __b);
__DEVICE__ float __nv_fdiv_rn(float __a, float __b);
__DEVICE__ float __nv_fdiv_ru(float __a, float __b);
__DEVICE__ float __nv_fdiv_rz(float __a, float __b);
__DEVICE__ int __nv_ffs(int __a);
__DEVICE__ int __nv_ffsll(long long __a);
__DEVICE__ int __nv_finitef(float __a);
__DEVICE__ unsigned short __nv_float2half_rn(float __a);
__DEVICE__ int __nv_float2int_rd(float __a);
__DEVICE__ int __nv_float2int_rn(float __a);
__DEVICE__ int __nv_float2int_ru(float __a);
__DEVICE__ int __nv_float2int_rz(float __a);
__DEVICE__ long long __nv_float2ll_rd(float __a);
__DEVICE__ long long __nv_float2ll_rn(float __a);
__DEVICE__ long long __nv_float2ll_ru(float __a);
__DEVICE__ long long __nv_float2ll_rz(float __a);
__DEVICE__ unsigned int __nv_float2uint_rd(float __a);
__DEVICE__ unsigned int __nv_float2uint_rn(float __a);
__DEVICE__ unsigned int __nv_float2uint_ru(float __a);
__DEVICE__ unsigned int __nv_float2uint_rz(float __a);
__DEVICE__ unsigned long long __nv_float2ull_rd(float __a);
__DEVICE__ unsigned long long __nv_float2ull_rn(float __a);
__DEVICE__ unsigned long long __nv_float2ull_ru(float __a);
__DEVICE__ unsigned long long __nv_float2ull_rz(float __a);
__DEVICE__ int __nv_float_as_int(float __a);
__DEVICE__ unsigned int __nv_float_as_uint(float __a);
__DEVICE__ double __nv_floor(double __a);
__DEVICE__ float __nv_floorf(float __a);
__DEVICE__ double __nv_fma(double __a, double __b, double __c);
__DEVICE__ float __nv_fmaf(float __a, float __b, float __c);
__DEVICE__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c);
__DEVICE__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c);
__DEVICE__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c);
__DEVICE__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c);
__DEVICE__ float __nv_fmaf_rd(float __a, float __b, float __c);
__DEVICE__ float __nv_fmaf_rn(float __a, float __b, float __c);
__DEVICE__ float __nv_fmaf_ru(float __a, float __b, float __c);
__DEVICE__ float __nv_fmaf_rz(float __a, float __b, float __c);
__DEVICE__ double __nv_fma_rd(double __a, double __b, double __c);
__DEVICE__ double __nv_fma_rn(double __a, double __b, double __c);
__DEVICE__ double __nv_fma_ru(double __a, double __b, double __c);
__DEVICE__ double __nv_fma_rz(double __a, double __b, double __c);
__DEVICE__ double __nv_fmax(double __a, double __b);
__DEVICE__ float __nv_fmaxf(float __a, float __b);
__DEVICE__ double __nv_fmin(double __a, double __b);
__DEVICE__ float __nv_fminf(float __a, float __b);
__DEVICE__ double __nv_fmod(double __a, double __b);
__DEVICE__ float __nv_fmodf(float __a, float __b);
__DEVICE__ float __nv_fmul_rd(float __a, float __b);
__DEVICE__ float __nv_fmul_rn(float __a, float __b);
__DEVICE__ float __nv_fmul_ru(float __a, float __b);
__DEVICE__ float __nv_fmul_rz(float __a, float __b);
__DEVICE__ float __nv_frcp_rd(float __a);
__DEVICE__ float __nv_frcp_rn(float __a);
__DEVICE__ float __nv_frcp_ru(float __a);
__DEVICE__ float __nv_frcp_rz(float __a);
__DEVICE__ double __nv_frexp(double __a, int *__b);
__DEVICE__ float __nv_frexpf(float __a, int *__b);
__DEVICE__ float __nv_frsqrt_rn(float __a);
__DEVICE__ float __nv_fsqrt_rd(float __a);
__DEVICE__ float __nv_fsqrt_rn(float __a);
__DEVICE__ float __nv_fsqrt_ru(float __a);
__DEVICE__ float __nv_fsqrt_rz(float __a);
__DEVICE__ float __nv_fsub_rd(float __a, float __b);
__DEVICE__ float __nv_fsub_rn(float __a, float __b);
__DEVICE__ float __nv_fsub_ru(float __a, float __b);
__DEVICE__ float __nv_fsub_rz(float __a, float __b);
__DEVICE__ int __nv_hadd(int __a, int __b);
__DEVICE__ float __nv_half2float(unsigned short __h);
__DEVICE__ double __nv_hiloint2double(int __a, int __b);
__DEVICE__ double __nv_hypot(double __a, double __b);
__DEVICE__ float __nv_hypotf(float __a, float __b);
__DEVICE__ int __nv_ilogb(double __a);
__DEVICE__ int __nv_ilogbf(float __a);
__DEVICE__ double __nv_int2double_rn(int __a);
__DEVICE__ float __nv_int2float_rd(int __a);
__DEVICE__ float __nv_int2float_rn(int __a);
__DEVICE__ float __nv_int2float_ru(int __a);
__DEVICE__ float __nv_int2float_rz(int __a);
__DEVICE__ float __nv_int_as_float(int __a);
__DEVICE__ int __nv_isfinited(double __a);
__DEVICE__ int __nv_isinfd(double __a);
__DEVICE__ int __nv_isinff(float __a);
__DEVICE__ int __nv_isnand(double __a);
__DEVICE__ int __nv_isnanf(float __a);
__DEVICE__ double __nv_j0(double __a);
__DEVICE__ float __nv_j0f(float __a);
__DEVICE__ double __nv_j1(double __a);
__DEVICE__ float __nv_j1f(float __a);
__DEVICE__ float __nv_jnf(int __a, float __b);
__DEVICE__ double __nv_jn(int __a, double __b);
__DEVICE__ double __nv_ldexp(double __a, int __b);
__DEVICE__ float __nv_ldexpf(float __a, int __b);
__DEVICE__ double __nv_lgamma(double __a);
__DEVICE__ float __nv_lgammaf(float __a);
__DEVICE__ double __nv_ll2double_rd(long long __a);
__DEVICE__ double __nv_ll2double_rn(long long __a);
__DEVICE__ double __nv_ll2double_ru(long long __a);
__DEVICE__ double __nv_ll2double_rz(long long __a);
__DEVICE__ float __nv_ll2float_rd(long long __a);
__DEVICE__ float __nv_ll2float_rn(long long __a);
__DEVICE__ float __nv_ll2float_ru(long long __a);
__DEVICE__ float __nv_ll2float_rz(long long __a);
__DEVICE__ long long __nv_llabs(long long __a);
__DEVICE__ long long __nv_llmax(long long __a, long long __b);
__DEVICE__ long long __nv_llmin(long long __a, long long __b);
__DEVICE__ long long __nv_llrint(double __a);
__DEVICE__ long long __nv_llrintf(float __a);
__DEVICE__ long long __nv_llround(double __a);
__DEVICE__ long long __nv_llroundf(float __a);
__DEVICE__ double __nv_log10(double __a);
__DEVICE__ float __nv_log10f(float __a);
__DEVICE__ double __nv_log1p(double __a);
__DEVICE__ float __nv_log1pf(float __a);
__DEVICE__ double __nv_log2(double __a);
__DEVICE__ float __nv_log2f(float __a);
__DEVICE__ double __nv_logb(double __a);
__DEVICE__ float __nv_logbf(float __a);
__DEVICE__ double __nv_log(double __a);
__DEVICE__ float __nv_logf(float __a);
__DEVICE__ double __nv_longlong_as_double(long long __a);
__DEVICE__ int __nv_max(int __a, int __b);
__DEVICE__ int __nv_min(int __a, int __b);
__DEVICE__ double __nv_modf(double __a, double *__b);
__DEVICE__ float __nv_modff(float __a, float *__b);
__DEVICE__ int __nv_mul24(int __a, int __b);
__DEVICE__ long long __nv_mul64hi(long long __a, long long __b);
__DEVICE__ int __nv_mulhi(int __a, int __b);
__DEVICE__ double __nv_nan(const signed char *__a);
__DEVICE__ float __nv_nanf(const signed char *__a);
__DEVICE__ double __nv_nearbyint(double __a);
__DEVICE__ float __nv_nearbyintf(float __a);
__DEVICE__ double __nv_nextafter(double __a, double __b);
__DEVICE__ float __nv_nextafterf(float __a, float __b);
__DEVICE__ double __nv_norm3d(double __a, double __b, double __c);
__DEVICE__ float __nv_norm3df(float __a, float __b, float __c);
__DEVICE__ double __nv_norm4d(double __a, double __b, double __c, double __d);
__DEVICE__ float __nv_norm4df(float __a, float __b, float __c, float __d);
__DEVICE__ double __nv_normcdf(double __a);
__DEVICE__ float __nv_normcdff(float __a);
__DEVICE__ double __nv_normcdfinv(double __a);
__DEVICE__ float __nv_normcdfinvf(float __a);
__DEVICE__ float __nv_normf(int __a, const float *__b);
__DEVICE__ double __nv_norm(int __a, const double *__b);
__DEVICE__ int __nv_popc(int __a);
__DEVICE__ int __nv_popcll(long long __a);
__DEVICE__ double __nv_pow(double __a, double __b);
__DEVICE__ float __nv_powf(float __a, float __b);
__DEVICE__ double __nv_powi(double __a, int __b);
__DEVICE__ float __nv_powif(float __a, int __b);
__DEVICE__ double __nv_rcbrt(double __a);
__DEVICE__ float __nv_rcbrtf(float __a);
__DEVICE__ double __nv_rcp64h(double __a);
__DEVICE__ double __nv_remainder(double __a, double __b);
__DEVICE__ float __nv_remainderf(float __a, float __b);
__DEVICE__ double __nv_remquo(double __a, double __b, int *__c);
__DEVICE__ float __nv_remquof(float __a, float __b, int *__c);
__DEVICE__ int __nv_rhadd(int __a, int __b);
__DEVICE__ double __nv_rhypot(double __a, double __b);
__DEVICE__ float __nv_rhypotf(float __a, float __b);
__DEVICE__ double __nv_rint(double __a);
__DEVICE__ float __nv_rintf(float __a);
__DEVICE__ double __nv_rnorm3d(double __a, double __b, double __c);
__DEVICE__ float __nv_rnorm3df(float __a, float __b, float __c);
__DEVICE__ double __nv_rnorm4d(double __a, double __b, double __c, double __d);
__DEVICE__ float __nv_rnorm4df(float __a, float __b, float __c, float __d);
__DEVICE__ float __nv_rnormf(int __a, const float *__b);
__DEVICE__ double __nv_rnorm(int __a, const double *__b);
__DEVICE__ double __nv_round(double __a);
__DEVICE__ float __nv_roundf(float __a);
__DEVICE__ double __nv_rsqrt(double __a);
__DEVICE__ float __nv_rsqrtf(float __a);
__DEVICE__ int __nv_sad(int __a, int __b, int __c);
__DEVICE__ float __nv_saturatef(float __a);
__DEVICE__ double __nv_scalbn(double __a, int __b);
__DEVICE__ float __nv_scalbnf(float __a, int __b);
__DEVICE__ int __nv_signbitd(double __a);
__DEVICE__ int __nv_signbitf(float __a);
__DEVICE__ void __nv_sincos(double __a, double *__b, double *__c);
__DEVICE__ void __nv_sincosf(float __a, float *__b, float *__c);
__DEVICE__ void __nv_sincospi(double __a, double *__b, double *__c);
__DEVICE__ void __nv_sincospif(float __a, float *__b, float *__c);
__DEVICE__ double __nv_sin(double __a);
__DEVICE__ float __nv_sinf(float __a);
__DEVICE__ double __nv_sinh(double __a);
__DEVICE__ float __nv_sinhf(float __a);
__DEVICE__ double __nv_sinpi(double __a);
__DEVICE__ float __nv_sinpif(float __a);
__DEVICE__ double __nv_sqrt(double __a);
__DEVICE__ float __nv_sqrtf(float __a);
__DEVICE__ double __nv_tan(double __a);
__DEVICE__ float __nv_tanf(float __a);
__DEVICE__ double __nv_tanh(double __a);
__DEVICE__ float __nv_tanhf(float __a);
__DEVICE__ double __nv_tgamma(double __a);
__DEVICE__ float __nv_tgammaf(float __a);
__DEVICE__ double __nv_trunc(double __a);
__DEVICE__ float __nv_truncf(float __a);
__DEVICE__ int __nv_uhadd(unsigned int __a, unsigned int __b);
__DEVICE__ double __nv_uint2double_rn(unsigned int __i);
__DEVICE__ float __nv_uint2float_rd(unsigned int __a);
__DEVICE__ float __nv_uint2float_rn(unsigned int __a);
__DEVICE__ float __nv_uint2float_ru(unsigned int __a);
__DEVICE__ float __nv_uint2float_rz(unsigned int __a);
__DEVICE__ float __nv_uint_as_float(unsigned int __a);
__DEVICE__ double __nv_ull2double_rd(unsigned long long __a);
__DEVICE__ double __nv_ull2double_rn(unsigned long long __a);
__DEVICE__ double __nv_ull2double_ru(unsigned long long __a);
__DEVICE__ double __nv_ull2double_rz(unsigned long long __a);
__DEVICE__ float __nv_ull2float_rd(unsigned long long __a);
__DEVICE__ float __nv_ull2float_rn(unsigned long long __a);
__DEVICE__ float __nv_ull2float_ru(unsigned long long __a);
__DEVICE__ float __nv_ull2float_rz(unsigned long long __a);
__DEVICE__ unsigned long long __nv_ullmax(unsigned long long __a,
unsigned long long __b);
__device__ unsigned long long __nv_ullmin(unsigned long long __a,
__DEVICE__ unsigned long long __nv_ullmin(unsigned long long __a,
unsigned long long __b);
__device__ unsigned int __nv_umax(unsigned int __a, unsigned int __b);
__device__ unsigned int __nv_umin(unsigned int __a, unsigned int __b);
__device__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b);
__device__ unsigned long long __nv_umul64hi(unsigned long long __a,
__DEVICE__ unsigned int __nv_umax(unsigned int __a, unsigned int __b);
__DEVICE__ unsigned int __nv_umin(unsigned int __a, unsigned int __b);
__DEVICE__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b);
__DEVICE__ unsigned long long __nv_umul64hi(unsigned long long __a,
unsigned long long __b);
__device__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b);
__device__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b);
__device__ unsigned int __nv_usad(unsigned int __a, unsigned int __b,
__DEVICE__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b);
__DEVICE__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b);
__DEVICE__ unsigned int __nv_usad(unsigned int __a, unsigned int __b,
unsigned int __c);
#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020
__device__ int __nv_vabs2(int __a);
__device__ int __nv_vabs4(int __a);
__device__ int __nv_vabsdiffs2(int __a, int __b);
__device__ int __nv_vabsdiffs4(int __a, int __b);
__device__ int __nv_vabsdiffu2(int __a, int __b);
__device__ int __nv_vabsdiffu4(int __a, int __b);
__device__ int __nv_vabsss2(int __a);
__device__ int __nv_vabsss4(int __a);
__device__ int __nv_vadd2(int __a, int __b);
__device__ int __nv_vadd4(int __a, int __b);
__device__ int __nv_vaddss2(int __a, int __b);
__device__ int __nv_vaddss4(int __a, int __b);
__device__ int __nv_vaddus2(int __a, int __b);
__device__ int __nv_vaddus4(int __a, int __b);
__device__ int __nv_vavgs2(int __a, int __b);
__device__ int __nv_vavgs4(int __a, int __b);
__device__ int __nv_vavgu2(int __a, int __b);
__device__ int __nv_vavgu4(int __a, int __b);
__device__ int __nv_vcmpeq2(int __a, int __b);
__device__ int __nv_vcmpeq4(int __a, int __b);
__device__ int __nv_vcmpges2(int __a, int __b);
__device__ int __nv_vcmpges4(int __a, int __b);
__device__ int __nv_vcmpgeu2(int __a, int __b);
__device__ int __nv_vcmpgeu4(int __a, int __b);
__device__ int __nv_vcmpgts2(int __a, int __b);
__device__ int __nv_vcmpgts4(int __a, int __b);
__device__ int __nv_vcmpgtu2(int __a, int __b);
__device__ int __nv_vcmpgtu4(int __a, int __b);
__device__ int __nv_vcmples2(int __a, int __b);
__device__ int __nv_vcmples4(int __a, int __b);
__device__ int __nv_vcmpleu2(int __a, int __b);
__device__ int __nv_vcmpleu4(int __a, int __b);
__device__ int __nv_vcmplts2(int __a, int __b);
__device__ int __nv_vcmplts4(int __a, int __b);
__device__ int __nv_vcmpltu2(int __a, int __b);
__device__ int __nv_vcmpltu4(int __a, int __b);
__device__ int __nv_vcmpne2(int __a, int __b);
__device__ int __nv_vcmpne4(int __a, int __b);
__device__ int __nv_vhaddu2(int __a, int __b);
__device__ int __nv_vhaddu4(int __a, int __b);
__device__ int __nv_vmaxs2(int __a, int __b);
__device__ int __nv_vmaxs4(int __a, int __b);
__device__ int __nv_vmaxu2(int __a, int __b);
__device__ int __nv_vmaxu4(int __a, int __b);
__device__ int __nv_vmins2(int __a, int __b);
__device__ int __nv_vmins4(int __a, int __b);
__device__ int __nv_vminu2(int __a, int __b);
__device__ int __nv_vminu4(int __a, int __b);
__device__ int __nv_vneg2(int __a);
__device__ int __nv_vneg4(int __a);
__device__ int __nv_vnegss2(int __a);
__device__ int __nv_vnegss4(int __a);
__device__ int __nv_vsads2(int __a, int __b);
__device__ int __nv_vsads4(int __a, int __b);
__device__ int __nv_vsadu2(int __a, int __b);
__device__ int __nv_vsadu4(int __a, int __b);
__device__ int __nv_vseteq2(int __a, int __b);
__device__ int __nv_vseteq4(int __a, int __b);
__device__ int __nv_vsetges2(int __a, int __b);
__device__ int __nv_vsetges4(int __a, int __b);
__device__ int __nv_vsetgeu2(int __a, int __b);
__device__ int __nv_vsetgeu4(int __a, int __b);
__device__ int __nv_vsetgts2(int __a, int __b);
__device__ int __nv_vsetgts4(int __a, int __b);
__device__ int __nv_vsetgtu2(int __a, int __b);
__device__ int __nv_vsetgtu4(int __a, int __b);
__device__ int __nv_vsetles2(int __a, int __b);
__device__ int __nv_vsetles4(int __a, int __b);
__device__ int __nv_vsetleu2(int __a, int __b);
__device__ int __nv_vsetleu4(int __a, int __b);
__device__ int __nv_vsetlts2(int __a, int __b);
__device__ int __nv_vsetlts4(int __a, int __b);
__device__ int __nv_vsetltu2(int __a, int __b);
__device__ int __nv_vsetltu4(int __a, int __b);
__device__ int __nv_vsetne2(int __a, int __b);
__device__ int __nv_vsetne4(int __a, int __b);
__device__ int __nv_vsub2(int __a, int __b);
__device__ int __nv_vsub4(int __a, int __b);
__device__ int __nv_vsubss2(int __a, int __b);
__device__ int __nv_vsubss4(int __a, int __b);
__device__ int __nv_vsubus2(int __a, int __b);
__device__ int __nv_vsubus4(int __a, int __b);
__DEVICE__ int __nv_vabs2(int __a);
__DEVICE__ int __nv_vabs4(int __a);
__DEVICE__ int __nv_vabsdiffs2(int __a, int __b);
__DEVICE__ int __nv_vabsdiffs4(int __a, int __b);
__DEVICE__ int __nv_vabsdiffu2(int __a, int __b);
__DEVICE__ int __nv_vabsdiffu4(int __a, int __b);
__DEVICE__ int __nv_vabsss2(int __a);
__DEVICE__ int __nv_vabsss4(int __a);
__DEVICE__ int __nv_vadd2(int __a, int __b);
__DEVICE__ int __nv_vadd4(int __a, int __b);
__DEVICE__ int __nv_vaddss2(int __a, int __b);
__DEVICE__ int __nv_vaddss4(int __a, int __b);
__DEVICE__ int __nv_vaddus2(int __a, int __b);
__DEVICE__ int __nv_vaddus4(int __a, int __b);
__DEVICE__ int __nv_vavgs2(int __a, int __b);
__DEVICE__ int __nv_vavgs4(int __a, int __b);
__DEVICE__ int __nv_vavgu2(int __a, int __b);
__DEVICE__ int __nv_vavgu4(int __a, int __b);
__DEVICE__ int __nv_vcmpeq2(int __a, int __b);
__DEVICE__ int __nv_vcmpeq4(int __a, int __b);
__DEVICE__ int __nv_vcmpges2(int __a, int __b);
__DEVICE__ int __nv_vcmpges4(int __a, int __b);
__DEVICE__ int __nv_vcmpgeu2(int __a, int __b);
__DEVICE__ int __nv_vcmpgeu4(int __a, int __b);
__DEVICE__ int __nv_vcmpgts2(int __a, int __b);
__DEVICE__ int __nv_vcmpgts4(int __a, int __b);
__DEVICE__ int __nv_vcmpgtu2(int __a, int __b);
__DEVICE__ int __nv_vcmpgtu4(int __a, int __b);
__DEVICE__ int __nv_vcmples2(int __a, int __b);
__DEVICE__ int __nv_vcmples4(int __a, int __b);
__DEVICE__ int __nv_vcmpleu2(int __a, int __b);
__DEVICE__ int __nv_vcmpleu4(int __a, int __b);
__DEVICE__ int __nv_vcmplts2(int __a, int __b);
__DEVICE__ int __nv_vcmplts4(int __a, int __b);
__DEVICE__ int __nv_vcmpltu2(int __a, int __b);
__DEVICE__ int __nv_vcmpltu4(int __a, int __b);
__DEVICE__ int __nv_vcmpne2(int __a, int __b);
__DEVICE__ int __nv_vcmpne4(int __a, int __b);
__DEVICE__ int __nv_vhaddu2(int __a, int __b);
__DEVICE__ int __nv_vhaddu4(int __a, int __b);
__DEVICE__ int __nv_vmaxs2(int __a, int __b);
__DEVICE__ int __nv_vmaxs4(int __a, int __b);
__DEVICE__ int __nv_vmaxu2(int __a, int __b);
__DEVICE__ int __nv_vmaxu4(int __a, int __b);
__DEVICE__ int __nv_vmins2(int __a, int __b);
__DEVICE__ int __nv_vmins4(int __a, int __b);
__DEVICE__ int __nv_vminu2(int __a, int __b);
__DEVICE__ int __nv_vminu4(int __a, int __b);
__DEVICE__ int __nv_vneg2(int __a);
__DEVICE__ int __nv_vneg4(int __a);
__DEVICE__ int __nv_vnegss2(int __a);
__DEVICE__ int __nv_vnegss4(int __a);
__DEVICE__ int __nv_vsads2(int __a, int __b);
__DEVICE__ int __nv_vsads4(int __a, int __b);
__DEVICE__ int __nv_vsadu2(int __a, int __b);
__DEVICE__ int __nv_vsadu4(int __a, int __b);
__DEVICE__ int __nv_vseteq2(int __a, int __b);
__DEVICE__ int __nv_vseteq4(int __a, int __b);
__DEVICE__ int __nv_vsetges2(int __a, int __b);
__DEVICE__ int __nv_vsetges4(int __a, int __b);
__DEVICE__ int __nv_vsetgeu2(int __a, int __b);
__DEVICE__ int __nv_vsetgeu4(int __a, int __b);
__DEVICE__ int __nv_vsetgts2(int __a, int __b);
__DEVICE__ int __nv_vsetgts4(int __a, int __b);
__DEVICE__ int __nv_vsetgtu2(int __a, int __b);
__DEVICE__ int __nv_vsetgtu4(int __a, int __b);
__DEVICE__ int __nv_vsetles2(int __a, int __b);
__DEVICE__ int __nv_vsetles4(int __a, int __b);
__DEVICE__ int __nv_vsetleu2(int __a, int __b);
__DEVICE__ int __nv_vsetleu4(int __a, int __b);
__DEVICE__ int __nv_vsetlts2(int __a, int __b);
__DEVICE__ int __nv_vsetlts4(int __a, int __b);
__DEVICE__ int __nv_vsetltu2(int __a, int __b);
__DEVICE__ int __nv_vsetltu4(int __a, int __b);
__DEVICE__ int __nv_vsetne2(int __a, int __b);
__DEVICE__ int __nv_vsetne4(int __a, int __b);
__DEVICE__ int __nv_vsub2(int __a, int __b);
__DEVICE__ int __nv_vsub4(int __a, int __b);
__DEVICE__ int __nv_vsubss2(int __a, int __b);
__DEVICE__ int __nv_vsubss4(int __a, int __b);
__DEVICE__ int __nv_vsubus2(int __a, int __b);
__DEVICE__ int __nv_vsubus4(int __a, int __b);
#endif // CUDA_VERSION
__device__ double __nv_y0(double __a);
__device__ float __nv_y0f(float __a);
__device__ double __nv_y1(double __a);
__device__ float __nv_y1f(float __a);
__device__ float __nv_ynf(int __a, float __b);
__device__ double __nv_yn(int __a, double __b);
__DEVICE__ double __nv_y0(double __a);
__DEVICE__ float __nv_y0f(float __a);
__DEVICE__ double __nv_y1(double __a);
__DEVICE__ float __nv_y1f(float __a);
__DEVICE__ float __nv_ynf(int __a, float __b);
__DEVICE__ double __nv_yn(int __a, double __b);
#if defined(__cplusplus)
} // extern "C"
#endif
#endif // __CLANG_CUDA_LIBDEVICE_DECLARES_H__

View File

@ -1,22 +1,8 @@
/*===- __clang_math_forward_declares.h - Prototypes of __device__ math fns --===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -34,14 +20,37 @@
// would preclude the use of our own __device__ overloads for these functions.
#pragma push_macro("__DEVICE__")
#ifdef _OPENMP
#define __DEVICE__ static __inline__ __attribute__((always_inline))
#else
#define __DEVICE__ \
static __inline__ __attribute__((always_inline)) __attribute__((device))
#endif
__DEVICE__ double abs(double);
__DEVICE__ float abs(float);
__DEVICE__ int abs(int);
// For C++ 17 we need to include noexcept attribute to be compatible
// with the header-defined version. This may be removed once
// variant is supported.
#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
#define __NOEXCEPT noexcept
#else
#define __NOEXCEPT
#endif
#if !(defined(_OPENMP) && defined(__cplusplus))
__DEVICE__ long abs(long);
__DEVICE__ long long abs(long long);
__DEVICE__ double abs(double);
__DEVICE__ float abs(float);
#endif
// While providing the CUDA declarations and definitions for math functions,
// we may manually define additional functions.
// TODO: Once variant is supported the additional functions will have
// to be removed.
#if defined(_OPENMP) && defined(__cplusplus)
__DEVICE__ const double abs(const double);
__DEVICE__ const float abs(const float);
#endif
__DEVICE__ int abs(int) __NOEXCEPT;
__DEVICE__ double acos(double);
__DEVICE__ float acos(float);
__DEVICE__ double acosh(double);
@ -76,8 +85,8 @@ __DEVICE__ double exp(double);
__DEVICE__ float exp(float);
__DEVICE__ double expm1(double);
__DEVICE__ float expm1(float);
__DEVICE__ double fabs(double);
__DEVICE__ float fabs(float);
__DEVICE__ double fabs(double) __NOEXCEPT;
__DEVICE__ float fabs(float) __NOEXCEPT;
__DEVICE__ double fdim(double, double);
__DEVICE__ float fdim(float, float);
__DEVICE__ double floor(double);
@ -98,12 +107,18 @@ __DEVICE__ double hypot(double, double);
__DEVICE__ float hypot(float, float);
__DEVICE__ int ilogb(double);
__DEVICE__ int ilogb(float);
#ifdef _MSC_VER
__DEVICE__ bool isfinite(long double);
#endif
__DEVICE__ bool isfinite(double);
__DEVICE__ bool isfinite(float);
__DEVICE__ bool isgreater(double, double);
__DEVICE__ bool isgreaterequal(double, double);
__DEVICE__ bool isgreaterequal(float, float);
__DEVICE__ bool isgreater(float, float);
#ifdef _MSC_VER
__DEVICE__ bool isinf(long double);
#endif
__DEVICE__ bool isinf(double);
__DEVICE__ bool isinf(float);
__DEVICE__ bool isless(double, double);
@ -112,18 +127,21 @@ __DEVICE__ bool islessequal(float, float);
__DEVICE__ bool isless(float, float);
__DEVICE__ bool islessgreater(double, double);
__DEVICE__ bool islessgreater(float, float);
#ifdef _MSC_VER
__DEVICE__ bool isnan(long double);
#endif
__DEVICE__ bool isnan(double);
__DEVICE__ bool isnan(float);
__DEVICE__ bool isnormal(double);
__DEVICE__ bool isnormal(float);
__DEVICE__ bool isunordered(double, double);
__DEVICE__ bool isunordered(float, float);
__DEVICE__ long labs(long);
__DEVICE__ long labs(long) __NOEXCEPT;
__DEVICE__ double ldexp(double, int);
__DEVICE__ float ldexp(float, int);
__DEVICE__ double lgamma(double);
__DEVICE__ float lgamma(float);
__DEVICE__ long long llabs(long long);
__DEVICE__ long long llabs(long long) __NOEXCEPT;
__DEVICE__ long long llrint(double);
__DEVICE__ long long llrint(float);
__DEVICE__ double log10(double);
@ -134,6 +152,9 @@ __DEVICE__ double log2(double);
__DEVICE__ float log2(float);
__DEVICE__ double logb(double);
__DEVICE__ float logb(float);
#if defined(_OPENMP) && defined(__cplusplus)
__DEVICE__ long double log(long double);
#endif
__DEVICE__ double log(double);
__DEVICE__ float log(float);
__DEVICE__ long lrint(double);
@ -281,6 +302,7 @@ _GLIBCXX_END_NAMESPACE_VERSION
} // namespace std
#endif
#undef __NOEXCEPT
#pragma pop_macro("__DEVICE__")
#endif

View File

@ -1,22 +1,8 @@
/*===---- __clang_cuda_runtime_wrapper.h - CUDA runtime support -------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -62,7 +48,7 @@
#include "cuda.h"
#if !defined(CUDA_VERSION)
#error "cuda.h did not define CUDA_VERSION"
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10000
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10010
#error "Unsupported CUDA version!"
#endif
@ -426,5 +412,15 @@ __device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
#pragma pop_macro("__USE_FAST_MATH__")
#pragma pop_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__")
// CUDA runtime uses this undocumented function to access kernel launch
// configuration. The declaration is in crt/device_functions.h but that file
// includes a lot of other stuff we don't want. Instead, we'll provide our own
// declaration for it here.
#if CUDA_VERSION >= 9020
extern "C" unsigned __cudaPushCallConfiguration(dim3 gridDim, dim3 blockDim,
size_t sharedMem = 0,
void *stream = 0);
#endif
#endif // __CUDA__
#endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__

View File

@ -1,24 +1,8 @@
/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---===
*
* Copyright (c) 2014 Chandler Carruth
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- __wmmintrin_aes.h - AES intrinsics -------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- __wmmintrin_pclmul.h - PCMUL intrinsics ---------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- adxintrin.h - ADX intrinsics -------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

20
lib/include/altivec.h vendored
View File

@ -1,22 +1,8 @@
/*===---- altivec.h - Standard header for type generic math ---------------===*\
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
\*===----------------------------------------------------------------------===*/

View File

@ -1,22 +1,8 @@
/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- arm64intr.h - ARM64 Windows intrinsics -------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -611,6 +597,14 @@ __crc32cd(uint32_t __a, uint64_t __b) {
}
#endif
/* Armv8.3-A Javascript conversion intrinsic */
#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT)
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__jcvt(double __a) {
return __builtin_arm_jcvt(__a);
}
#endif
/* 10.1 Special register intrinsics */
#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
@ -619,6 +613,16 @@ __crc32cd(uint32_t __a, uint64_t __b) {
#define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
#define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
// Memory Tagging Extensions (MTE) Intrinsics
#if __ARM_FEATURE_MEMORY_TAGGING
#define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask)
#define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset)
#define __arm_mte_exclude_tag(__ptr, __excluded) __builtin_arm_gmi(__ptr, __excluded)
#define __arm_mte_get_tag(__ptr) __builtin_arm_ldg(__ptr)
#define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr)
#define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
#endif
#if defined(__cplusplus)
}
#endif

396
lib/include/arm_neon.h vendored

File diff suppressed because it is too large Load Diff

20
lib/include/armintr.h vendored
View File

@ -1,22 +1,8 @@
/*===---- armintr.h - ARM Windows intrinsics -------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -146,21 +132,13 @@ _mm256_andnot_si256(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_avg_epu8(__m256i __a, __m256i __b)
{
typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
return (__m256i)__builtin_convertvector(
((__builtin_convertvector((__v32qu)__a, __v32hu) +
__builtin_convertvector((__v32qu)__b, __v32hu)) + 1)
>> 1, __v32qu);
return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_avg_epu16(__m256i __a, __m256i __b)
{
typedef unsigned int __v16su __attribute__((__vector_size__(64)));
return (__m256i)__builtin_convertvector(
((__builtin_convertvector((__v16hu)__a, __v16su) +
__builtin_convertvector((__v16hu)__b, __v16su)) + 1)
>> 1, __v16hu);
return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256

279
lib/include/avx512bf16intrin.h vendored Normal file
View File

@ -0,0 +1,279 @@
/*===------------ avx512bf16intrin.h - AVX512_BF16 intrinsics --------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512BF16INTRIN_H
#define __AVX512BF16INTRIN_H
typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64)));
typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
typedef unsigned short __bfloat16;
#define __DEFAULT_FN_ATTRS512 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \
__min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512bf16")))
/// Convert One BF16 Data to One Single Float Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic does not correspond to a specific instruction.
///
/// \param __A
/// A bfloat data.
/// \returns A float data whose sign field and exponent field keep unchanged,
/// and fraction field is extended to 23 bits.
static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(__bfloat16 __A) {
return __builtin_ia32_cvtsbf162ss_32(__A);
}
/// Convert Two Packed Single Data to One Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
///
/// \param __A
/// A 512-bit vector of [16 x float].
/// \param __B
/// A 512-bit vector of [16 x float].
/// \returns A 512-bit vector of [32 x bfloat] whose lower 256 bits come from
/// conversion of __B, and higher 256 bits come from conversion of __A.
static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_cvtne2ps_pbh(__m512 __A, __m512 __B) {
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_512((__v16sf) __A,
(__v16sf) __B);
}
/// Convert Two Packed Single Data to One Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
///
/// \param __A
/// A 512-bit vector of [16 x float].
/// \param __B
/// A 512-bit vector of [16 x float].
/// \param __W
/// A 512-bit vector of [32 x bfloat].
/// \param __U
/// A 32-bit mask value specifying what is chosen for each element.
/// A 1 means conversion of __A or __B. A 0 means element from __W.
/// \returns A 512-bit vector of [32 x bfloat] whose lower 256 bits come from
/// conversion of __B, and higher 256 bits come from conversion of __A.
static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_mask_cvtne2ps_pbh(__m512bh __W, __mmask32 __U, __m512 __A, __m512 __B) {
return (__m512bh)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_cvtne2ps_pbh(__A, __B),
(__v32hi)__W);
}
/// Convert Two Packed Single Data to One Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
///
/// \param __A
/// A 512-bit vector of [16 x float].
/// \param __B
/// A 512-bit vector of [16 x float].
/// \param __U
/// A 32-bit mask value specifying what is chosen for each element.
/// A 1 means conversion of __A or __B. A 0 means element is zero.
/// \returns A 512-bit vector of [32 x bfloat] whose lower 256 bits come from
/// conversion of __B, and higher 256 bits come from conversion of __A.
static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtne2ps_pbh(__mmask32 __U, __m512 __A, __m512 __B) {
return (__m512bh)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_cvtne2ps_pbh(__A, __B),
(__v32hi)_mm512_setzero_si512());
}
/// Convert Packed Single Data to Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
///
/// \param __A
/// A 512-bit vector of [16 x float].
/// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A.
static __inline__ __m256bh __DEFAULT_FN_ATTRS512
_mm512_cvtneps_pbh(__m512 __A) {
return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A,
(__v16hi)_mm256_undefined_si256(),
(__mmask16)-1);
}
/// Convert Packed Single Data to Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
///
/// \param __A
/// A 512-bit vector of [16 x float].
/// \param __W
/// A 256-bit vector of [16 x bfloat].
/// \param __U
/// A 16-bit mask value specifying what is chosen for each element.
/// A 1 means conversion of __A. A 0 means element from __W.
/// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A.
static __inline__ __m256bh __DEFAULT_FN_ATTRS512
_mm512_mask_cvtneps_pbh(__m256bh __W, __mmask16 __U, __m512 __A) {
return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A,
(__v16hi)__W,
(__mmask16)__U);
}
/// Convert Packed Single Data to Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
///
/// \param __A
/// A 512-bit vector of [16 x float].
/// \param __U
/// A 16-bit mask value specifying what is chosen for each element.
/// A 1 means conversion of __A. A 0 means element is zero.
/// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A.
static __inline__ __m256bh __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtneps_pbh(__mmask16 __U, __m512 __A) {
return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A,
(__v16hi)_mm256_setzero_si256(),
(__mmask16)__U);
}
/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
///
/// \param __A
/// A 512-bit vector of [32 x bfloat].
/// \param __B
/// A 512-bit vector of [32 x bfloat].
/// \param __D
/// A 512-bit vector of [16 x float].
/// \returns A 512-bit vector of [16 x float] comes from Dot Product of
/// __A, __B and __D
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_dpbf16_ps(__m512 __D, __m512bh __A, __m512bh __B) {
return (__m512)__builtin_ia32_dpbf16ps_512((__v16sf) __D,
(__v16si) __A,
(__v16si) __B);
}
/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
///
/// \param __A
/// A 512-bit vector of [32 x bfloat].
/// \param __B
/// A 512-bit vector of [32 x bfloat].
/// \param __D
/// A 512-bit vector of [16 x float].
/// \param __U
/// A 16-bit mask value specifying what is chosen for each element.
/// A 1 means __A and __B's dot product accumulated with __D. A 0 means __D.
/// \returns A 512-bit vector of [16 x float] comes from Dot Product of
/// __A, __B and __D
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_dpbf16_ps(__m512 __D, __mmask16 __U, __m512bh __A, __m512bh __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_dpbf16_ps(__D, __A, __B),
(__v16sf)__D);
}
/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
///
/// \param __A
/// A 512-bit vector of [32 x bfloat].
/// \param __B
/// A 512-bit vector of [32 x bfloat].
/// \param __D
/// A 512-bit vector of [16 x float].
/// \param __U
/// A 16-bit mask value specifying what is chosen for each element.
/// A 1 means __A and __B's dot product accumulated with __D. A 0 means 0.
/// \returns A 512-bit vector of [16 x float] comes from Dot Product of
/// __A, __B and __D
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_dpbf16_ps(__mmask16 __U, __m512 __D, __m512bh __A, __m512bh __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_dpbf16_ps(__D, __A, __B),
(__v16sf)_mm512_setzero_si512());
}
/// Convert Packed BF16 Data to Packed float Data.
///
/// \headerfile <x86intrin.h>
///
/// \param __A
/// A 256-bit vector of [16 x bfloat].
/// \returns A 512-bit vector of [16 x float] come from convertion of __A
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps(__m256bh __A) {
return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32(
(__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16));
}
/// Convert Packed BF16 Data to Packed float Data using zeroing mask.
///
/// \headerfile <x86intrin.h>
///
/// \param __U
/// A 16-bit mask. Elements are zeroed out when the corresponding mask
/// bit is not set.
/// \param __A
/// A 256-bit vector of [16 x bfloat].
/// \returns A 512-bit vector of [16 x float] come from convertion of __A
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtpbh_ps(__mmask16 __U, __m256bh __A) {
return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32(
(__m512i)_mm512_maskz_cvtepi16_epi32((__mmask16)__U, (__m256i)__A), 16));
}
/// Convert Packed BF16 Data to Packed float Data using merging mask.
///
/// \headerfile <x86intrin.h>
///
/// \param __S
/// A 512-bit vector of [16 x float]. Elements are copied from __S when
/// the corresponding mask bit is not set.
/// \param __U
/// A 16-bit mask.
/// \param __A
/// A 256-bit vector of [16 x bfloat].
/// \returns A 512-bit vector of [16 x float] come from convertion of __A
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_cvtpbh_ps(__m512 __S, __mmask16 __U, __m256bh __A) {
return _mm512_castsi512_ps((__m512i)_mm512_mask_slli_epi32(
(__m512i)__S, (__mmask16)__U,
(__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16));
}
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS512
#endif

View File

@ -1,23 +1,9 @@
/*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,23 +1,9 @@
/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -719,11 +705,7 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_avg_epu8 (__m512i __A, __m512i __B)
{
typedef unsigned short __v64hu __attribute__((__vector_size__(128)));
return (__m512i)__builtin_convertvector(
((__builtin_convertvector((__v64qu) __A, __v64hu) +
__builtin_convertvector((__v64qu) __B, __v64hu)) + 1)
>> 1, __v64qu);
return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
@ -746,11 +728,7 @@ _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_avg_epu16 (__m512i __A, __m512i __B)
{
typedef unsigned int __v32su __attribute__((__vector_size__(128)));
return (__m512i)__builtin_convertvector(
((__builtin_convertvector((__v32hu) __A, __v32su) +
__builtin_convertvector((__v32hu) __B, __v32su)) + 1)
>> 1, __v32hu);
return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
@ -1733,14 +1711,14 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
(__v64qi) _mm512_setzero_si512());
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
{
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
(__mmask64) __B);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
{
return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
@ -1751,7 +1729,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512
_mm512_loadu_epi16 (void const *__P)
{
struct __loadu_epi16 {
__m512i __v;
__m512i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi16*)__P)->__v;
}
@ -1777,7 +1755,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512
_mm512_loadu_epi8 (void const *__P)
{
struct __loadu_epi8 {
__m512i __v;
__m512i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi8*)__P)->__v;
}
@ -1803,7 +1781,7 @@ static __inline void __DEFAULT_FN_ATTRS512
_mm512_storeu_epi16 (void *__P, __m512i __A)
{
struct __storeu_epi16 {
__m512i __v;
__m512i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi16*)__P)->__v = __A;
}
@ -1820,7 +1798,7 @@ static __inline void __DEFAULT_FN_ATTRS512
_mm512_storeu_epi8 (void *__P, __m512i __A)
{
struct __storeu_epi8 {
__m512i __v;
__m512i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi8*)__P)->__v = __A;
}

View File

@ -1,23 +1,9 @@
/*===------------- avx512cdintrin.h - AVX512CD intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -34,49 +20,45 @@
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_conflict_epi64 (__m512i __A)
{
return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
(__v8di) _mm512_setzero_si512 (),
(__mmask8) -1);
return (__m512i) __builtin_ia32_vpconflictdi_512 ((__v8di) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
(__v8di) __W,
(__mmask8) __U);
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_conflict_epi64(__A),
(__v8di)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
(__v8di) _mm512_setzero_si512 (),
(__mmask8) __U);
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_conflict_epi64(__A),
(__v8di)_mm512_setzero_si512 ());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_conflict_epi32 (__m512i __A)
{
return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
(__v16si) _mm512_setzero_si512 (),
(__mmask16) -1);
return (__m512i) __builtin_ia32_vpconflictsi_512 ((__v16si) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
(__v16si) __W,
(__mmask16) __U);
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_conflict_epi32(__A),
(__v16si)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
(__v16si) _mm512_setzero_si512 (),
(__mmask16) __U);
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_conflict_epi32(__A),
(__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS

View File

@ -1,22 +1,8 @@
/*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -40,9 +26,13 @@ typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
typedef unsigned int __v16su __attribute__((__vector_size__(64)));
typedef float __m512 __attribute__((__vector_size__(64)));
typedef double __m512d __attribute__((__vector_size__(64)));
typedef long long __m512i __attribute__((__vector_size__(64)));
typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
typedef unsigned char __mmask8;
typedef unsigned short __mmask16;
@ -1991,12 +1981,12 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_add_round_pd(W, U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_add_round_pd((A), (B), (R)), \
(__v8df)(__m512d)(W));
(__v8df)(__m512d)(W))
#define _mm512_maskz_add_round_pd(U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_add_round_pd((A), (B), (R)), \
(__v8df)_mm512_setzero_pd());
(__v8df)_mm512_setzero_pd())
#define _mm512_add_round_ps(A, B, R) \
(__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
@ -2005,12 +1995,12 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_add_round_ps(W, U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_add_round_ps((A), (B), (R)), \
(__v16sf)(__m512)(W));
(__v16sf)(__m512)(W))
#define _mm512_maskz_add_round_ps(U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_add_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps());
(__v16sf)_mm512_setzero_ps())
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
@ -2106,12 +2096,12 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_sub_round_pd((A), (B), (R)), \
(__v8df)(__m512d)(W));
(__v8df)(__m512d)(W))
#define _mm512_maskz_sub_round_pd(U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_sub_round_pd((A), (B), (R)), \
(__v8df)_mm512_setzero_pd());
(__v8df)_mm512_setzero_pd())
#define _mm512_sub_round_ps(A, B, R) \
(__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
@ -2120,12 +2110,12 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
(__v16sf)(__m512)(W));
(__v16sf)(__m512)(W))
#define _mm512_maskz_sub_round_ps(U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps());
(__v16sf)_mm512_setzero_ps())
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
@ -2221,12 +2211,12 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_mul_round_pd((A), (B), (R)), \
(__v8df)(__m512d)(W));
(__v8df)(__m512d)(W))
#define _mm512_maskz_mul_round_pd(U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_mul_round_pd((A), (B), (R)), \
(__v8df)_mm512_setzero_pd());
(__v8df)_mm512_setzero_pd())
#define _mm512_mul_round_ps(A, B, R) \
(__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
@ -2235,12 +2225,12 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
(__v16sf)(__m512)(W));
(__v16sf)(__m512)(W))
#define _mm512_maskz_mul_round_ps(U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps());
(__v16sf)_mm512_setzero_ps())
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
@ -2349,12 +2339,12 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_div_round_pd(W, U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_div_round_pd((A), (B), (R)), \
(__v8df)(__m512d)(W));
(__v8df)(__m512d)(W))
#define _mm512_maskz_div_round_pd(U, A, B, R) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_div_round_pd((A), (B), (R)), \
(__v8df)_mm512_setzero_pd());
(__v8df)_mm512_setzero_pd())
#define _mm512_div_round_ps(A, B, R) \
(__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
@ -2363,12 +2353,12 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
#define _mm512_mask_div_round_ps(W, U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_div_round_ps((A), (B), (R)), \
(__v16sf)(__m512)(W));
(__v16sf)(__m512)(W))
#define _mm512_maskz_div_round_ps(U, A, B, R) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_div_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps());
(__v16sf)_mm512_setzero_ps())
#define _mm512_roundscale_ps(A, B) \
(__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
@ -3789,20 +3779,9 @@ _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
(__v16hi)_mm256_setzero_si256(), \
(__mmask16)(W))
#define _mm512_cvtps_ph(A, I) \
(__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
(__v16hi)_mm256_setzero_si256(), \
(__mmask16)-1)
#define _mm512_mask_cvtps_ph(U, W, A, I) \
(__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
(__v16hi)(__m256i)(U), \
(__mmask16)(W))
#define _mm512_maskz_cvtps_ph(W, A, I) \
(__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
(__v16hi)_mm256_setzero_si256(), \
(__mmask16)(W))
#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
#define _mm512_cvt_roundph_ps(A, R) \
(__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
@ -4324,7 +4303,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512
_mm512_loadu_si512 (void const *__P)
{
struct __loadu_si512 {
__m512i __v;
__m512i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_si512*)__P)->__v;
}
@ -4333,7 +4312,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512
_mm512_loadu_epi32 (void const *__P)
{
struct __loadu_epi32 {
__m512i __v;
__m512i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi32*)__P)->__v;
}
@ -4360,7 +4339,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512
_mm512_loadu_epi64 (void const *__P)
{
struct __loadu_epi64 {
__m512i __v;
__m512i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi64*)__P)->__v;
}
@ -4420,7 +4399,7 @@ static __inline __m512d __DEFAULT_FN_ATTRS512
_mm512_loadu_pd(void const *__p)
{
struct __loadu_pd {
__m512d __v;
__m512d_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_pd*)__p)->__v;
}
@ -4429,7 +4408,7 @@ static __inline __m512 __DEFAULT_FN_ATTRS512
_mm512_loadu_ps(void const *__p)
{
struct __loadu_ps {
__m512 __v;
__m512_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_ps*)__p)->__v;
}
@ -4504,7 +4483,7 @@ static __inline void __DEFAULT_FN_ATTRS512
_mm512_storeu_epi64 (void *__P, __m512i __A)
{
struct __storeu_epi64 {
__m512i __v;
__m512i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi64*)__P)->__v = __A;
}
@ -4520,7 +4499,7 @@ static __inline void __DEFAULT_FN_ATTRS512
_mm512_storeu_si512 (void *__P, __m512i __A)
{
struct __storeu_si512 {
__m512i __v;
__m512i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_si512*)__P)->__v = __A;
}
@ -4529,7 +4508,7 @@ static __inline void __DEFAULT_FN_ATTRS512
_mm512_storeu_epi32 (void *__P, __m512i __A)
{
struct __storeu_epi32 {
__m512i __v;
__m512i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi32*)__P)->__v = __A;
}
@ -4551,7 +4530,7 @@ static __inline void __DEFAULT_FN_ATTRS512
_mm512_storeu_pd(void *__P, __m512d __A)
{
struct __storeu_pd {
__m512d __v;
__m512d_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_pd*)__P)->__v = __A;
}
@ -4567,7 +4546,7 @@ static __inline void __DEFAULT_FN_ATTRS512
_mm512_storeu_ps(void *__P, __m512 __A)
{
struct __storeu_ps {
__m512 __v;
__m512_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_ps*)__P)->__v = __A;
}
@ -9329,7 +9308,7 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
__v2du __t6 = __t4 op __t5; \
__v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
__v2du __t8 = __t6 op __t7; \
return __t8[0];
return __t8[0]
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
_mm512_mask_reduce_operator(+);
@ -9381,7 +9360,7 @@ _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
__m128d __t6 = __t4 op __t5; \
__m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
__m128d __t8 = __t6 op __t7; \
return __t8[0];
return __t8[0]
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
_mm512_mask_reduce_operator(+);
@ -9415,7 +9394,7 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
__v4su __t8 = __t6 op __t7; \
__v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
__v4su __t10 = __t8 op __t9; \
return __t10[0];
return __t10[0]
static __inline__ int __DEFAULT_FN_ATTRS512
_mm512_reduce_add_epi32(__m512i __W) {
@ -9473,7 +9452,7 @@ _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
__m128 __t8 = __t6 op __t7; \
__m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
__m128 __t10 = __t8 op __t9; \
return __t10[0];
return __t10[0]
static __inline__ float __DEFAULT_FN_ATTRS512
_mm512_reduce_add_ps(__m512 __W) {
@ -9505,7 +9484,7 @@ _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
__m512i __t4 = _mm512_##op(__t2, __t3); \
__m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \
__v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \
return __t6[0];
return __t6[0]
static __inline__ long long __DEFAULT_FN_ATTRS512
_mm512_reduce_max_epi64(__m512i __V) {
@ -9563,7 +9542,7 @@ _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
__m128i __t8 = _mm_##op(__t6, __t7); \
__m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \
__v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \
return __t10[0];
return __t10[0]
static __inline__ int __DEFAULT_FN_ATTRS512
_mm512_reduce_max_epi32(__m512i __V) {
@ -9619,7 +9598,7 @@ _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
__m128d __t6 = _mm_##op(__t4, __t5); \
__m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
__m128d __t8 = _mm_##op(__t6, __t7); \
return __t8[0];
return __t8[0]
static __inline__ double __DEFAULT_FN_ATTRS512
_mm512_reduce_max_pd(__m512d __V) {
@ -9655,7 +9634,7 @@ _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
__m128 __t8 = _mm_##op(__t6, __t7); \
__m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
__m128 __t10 = _mm_##op(__t8, __t9); \
return __t10[0];
return __t10[0]
static __inline__ float __DEFAULT_FN_ATTRS512
_mm512_reduce_max_ps(__m512 __V) {

View File

@ -1,23 +1,9 @@
/*===------------- avx512ifmaintrin.h - IFMA intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,23 +1,9 @@
/*===------------- avx512ifmavlintrin.h - IFMA intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,23 +1,9 @@
/*===------------- avx512pfintrin.h - PF intrinsics ------------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,23 +1,9 @@
/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,23 +1,9 @@
/*===------------- avx512vbmiintrin.h - VBMI intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,23 +1,9 @@
/*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

474
lib/include/avx512vlbf16intrin.h vendored Normal file
View File

@ -0,0 +1,474 @@
/*===--------- avx512vlbf16intrin.h - AVX512_BF16 intrinsics ---------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vlbf16intrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VLBF16INTRIN_H
#define __AVX512VLBF16INTRIN_H
typedef short __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl, avx512bf16"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl, avx512bf16"), __min_vector_width__(256)))
/// Convert Two Packed Single Data to One Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
///
/// \param __A
/// A 128-bit vector of [4 x float].
/// \param __B
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from
/// conversion of __B, and higher 64 bits come from conversion of __A.
static __inline__ __m128bh __DEFAULT_FN_ATTRS128
_mm_cvtne2ps_pbh(__m128 __A, __m128 __B) {
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_128((__v4sf) __A,
(__v4sf) __B);
}
/// Convert Two Packed Single Data to One Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
///
/// \param __A
/// A 128-bit vector of [4 x float].
/// \param __B
/// A 128-bit vector of [4 x float].
/// \param __W
/// A 128-bit vector of [8 x bfloat].
/// \param __U
/// A 8-bit mask value specifying what is chosen for each element.
/// A 1 means conversion of __A or __B. A 0 means element from __W.
/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from
/// conversion of __B, and higher 64 bits come from conversion of __A.
static __inline__ __m128bh __DEFAULT_FN_ATTRS128
_mm_mask_cvtne2ps_pbh(__m128bh __W, __mmask8 __U, __m128 __A, __m128 __B) {
return (__m128bh)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_cvtne2ps_pbh(__A, __B),
(__v8hi)__W);
}
/// Convert Two Packed Single Data to One Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
///
/// \param __A
/// A 128-bit vector of [4 x float].
/// \param __B
/// A 128-bit vector of [4 x float].
/// \param __U
/// A 8-bit mask value specifying what is chosen for each element.
/// A 1 means conversion of __A or __B. A 0 means element is zero.
/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from
/// conversion of __B, and higher 64 bits come from conversion of __A.
static __inline__ __m128bh __DEFAULT_FN_ATTRS128
_mm_maskz_cvtne2ps_pbh(__mmask8 __U, __m128 __A, __m128 __B) {
return (__m128bh)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_cvtne2ps_pbh(__A, __B),
(__v8hi)_mm_setzero_si128());
}
/// Convert Two Packed Single Data to One Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
///
/// \param __A
/// A 256-bit vector of [8 x float].
/// \param __B
/// A 256-bit vector of [8 x float].
/// \returns A 256-bit vector of [16 x bfloat] whose lower 128 bits come from
/// conversion of __B, and higher 128 bits come from conversion of __A.
static __inline__ __m256bh __DEFAULT_FN_ATTRS256
_mm256_cvtne2ps_pbh(__m256 __A, __m256 __B) {
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_256((__v8sf) __A,
(__v8sf) __B);
}
/// Convert Two Packed Single Data to One Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
///
/// \param __A
/// A 256-bit vector of [8 x float].
/// \param __B
/// A 256-bit vector of [8 x float].
/// \param __W
/// A 256-bit vector of [16 x bfloat].
/// \param __U
/// A 16-bit mask value specifying what is chosen for each element.
/// A 1 means conversion of __A or __B. A 0 means element from __W.
/// \returns A 256-bit vector of [16 x bfloat] whose lower 128 bits come from
/// conversion of __B, and higher 128 bits come from conversion of __A.
static __inline__ __m256bh __DEFAULT_FN_ATTRS256
_mm256_mask_cvtne2ps_pbh(__m256bh __W, __mmask16 __U, __m256 __A, __m256 __B) {
return (__m256bh)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_cvtne2ps_pbh(__A, __B),
(__v16hi)__W);
}
/// Convert Two Packed Single Data to One Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
///
/// \param __A
/// A 256-bit vector of [8 x float].
/// \param __B
/// A 256-bit vector of [8 x float].
/// \param __U
/// A 16-bit mask value specifying what is chosen for each element.
/// A 1 means conversion of __A or __B. A 0 means element is zero.
/// \returns A 256-bit vector of [16 x bfloat] whose lower 128 bits come from
/// conversion of __B, and higher 128 bits come from conversion of __A.
static __inline__ __m256bh __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtne2ps_pbh(__mmask16 __U, __m256 __A, __m256 __B) {
return (__m256bh)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_cvtne2ps_pbh(__A, __B),
(__v16hi)_mm256_setzero_si256());
}
/// Convert Packed Single Data to Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
///
/// \param __A
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from
/// conversion of __A, and higher 64 bits are 0.
static __inline__ __m128bh __DEFAULT_FN_ATTRS128
_mm_cvtneps_pbh(__m128 __A) {
return (__m128bh)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A,
(__v8hi)_mm_undefined_si128(),
(__mmask8)-1);
}
/// Convert Packed Single Data to Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
///
/// \param __A
/// A 128-bit vector of [4 x float].
/// \param __W
/// A 128-bit vector of [8 x bfloat].
/// \param __U
/// A 4-bit mask value specifying what is chosen for each element.
/// A 1 means conversion of __A. A 0 means element from __W.
/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from
/// conversion of __A, and higher 64 bits are 0.
static __inline__ __m128bh __DEFAULT_FN_ATTRS128
_mm_mask_cvtneps_pbh(__m128bh __W, __mmask8 __U, __m128 __A) {
return (__m128bh)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A,
(__v8hi)__W,
(__mmask8)__U);
}
/// Convert Packed Single Data to Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
///
/// \param __A
/// A 128-bit vector of [4 x float].
/// \param __U
/// A 4-bit mask value specifying what is chosen for each element.
/// A 1 means conversion of __A. A 0 means element is zero.
/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from
/// conversion of __A, and higher 64 bits are 0.
static __inline__ __m128bh __DEFAULT_FN_ATTRS128
_mm_maskz_cvtneps_pbh(__mmask8 __U, __m128 __A) {
return (__m128bh)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A,
(__v8hi)_mm_setzero_si128(),
(__mmask8)__U);
}
/// Convert Packed Single Data to Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
///
/// \param __A
/// A 256-bit vector of [8 x float].
/// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A.
static __inline__ __m128bh __DEFAULT_FN_ATTRS256
_mm256_cvtneps_pbh(__m256 __A) {
return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A,
(__v8hi)_mm_undefined_si128(),
(__mmask8)-1);
}
/// Convert Packed Single Data to Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
///
/// \param __A
/// A 256-bit vector of [8 x float].
/// \param __W
/// A 256-bit vector of [8 x bfloat].
/// \param __U
/// A 8-bit mask value specifying what is chosen for each element.
/// A 1 means conversion of __A. A 0 means element from __W.
/// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A.
static __inline__ __m128bh __DEFAULT_FN_ATTRS256
_mm256_mask_cvtneps_pbh(__m128bh __W, __mmask8 __U, __m256 __A) {
return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A,
(__v8hi)__W,
(__mmask8)__U);
}
/// Convert Packed Single Data to Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
///
/// \param __A
/// A 256-bit vector of [8 x float].
/// \param __U
/// A 8-bit mask value specifying what is chosen for each element.
/// A 1 means conversion of __A. A 0 means element is zero.
/// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A.
static __inline__ __m128bh __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtneps_pbh(__mmask8 __U, __m256 __A) {
return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A,
(__v8hi)_mm_setzero_si128(),
(__mmask8)__U);
}
/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
///
/// \param __A
/// A 128-bit vector of [8 x bfloat].
/// \param __B
/// A 128-bit vector of [8 x bfloat].
/// \param __D
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] comes from Dot Product of
/// __A, __B and __D
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_dpbf16_ps(__m128 __D, __m128bh __A, __m128bh __B) {
return (__m128)__builtin_ia32_dpbf16ps_128((__v4sf)__D,
(__v4si)__A,
(__v4si)__B);
}
/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
///
/// \param __A
/// A 128-bit vector of [8 x bfloat].
/// \param __B
/// A 128-bit vector of [8 x bfloat].
/// \param __D
/// A 128-bit vector of [4 x float].
/// \param __U
/// A 8-bit mask value specifying what is chosen for each element.
/// A 1 means __A and __B's dot product accumulated with __D. A 0 means __D.
/// \returns A 128-bit vector of [4 x float] comes from Dot Product of
/// __A, __B and __D
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_dpbf16_ps(__m128 __D, __mmask8 __U, __m128bh __A, __m128bh __B) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_dpbf16_ps(__D, __A, __B),
(__v4sf)__D);
}
/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
///
/// \param __A
/// A 128-bit vector of [8 x bfloat].
/// \param __B
/// A 128-bit vector of [8 x bfloat].
/// \param __D
/// A 128-bit vector of [4 x float].
/// \param __U
/// A 8-bit mask value specifying what is chosen for each element.
/// A 1 means __A and __B's dot product accumulated with __D. A 0 means 0.
/// \returns A 128-bit vector of [4 x float] comes from Dot Product of
/// __A, __B and __D
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_dpbf16_ps(__mmask8 __U, __m128 __D, __m128bh __A, __m128bh __B) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_dpbf16_ps(__D, __A, __B),
(__v4sf)_mm_setzero_si128());
}
/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
///
/// \param __A
/// A 256-bit vector of [16 x bfloat].
/// \param __B
/// A 256-bit vector of [16 x bfloat].
/// \param __D
/// A 256-bit vector of [8 x float].
/// \returns A 256-bit vector of [8 x float] comes from Dot Product of
/// __A, __B and __D
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_dpbf16_ps(__m256 __D, __m256bh __A, __m256bh __B) {
return (__m256)__builtin_ia32_dpbf16ps_256((__v8sf)__D,
(__v8si)__A,
(__v8si)__B);
}
/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
///
/// \param __A
/// A 256-bit vector of [16 x bfloat].
/// \param __B
/// A 256-bit vector of [16 x bfloat].
/// \param __D
/// A 256-bit vector of [8 x float].
/// \param __U
/// A 16-bit mask value specifying what is chosen for each element.
/// A 1 means __A and __B's dot product accumulated with __D. A 0 means __D.
/// \returns A 256-bit vector of [8 x float] comes from Dot Product of
/// __A, __B and __D
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_mask_dpbf16_ps(__m256 __D, __mmask8 __U, __m256bh __A, __m256bh __B) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_dpbf16_ps(__D, __A, __B),
(__v8sf)__D);
}
/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
///
/// \param __A
/// A 256-bit vector of [16 x bfloat].
/// \param __B
/// A 256-bit vector of [16 x bfloat].
/// \param __D
/// A 256-bit vector of [8 x float].
/// \param __U
/// A 8-bit mask value specifying what is chosen for each element.
/// A 1 means __A and __B's dot product accumulated with __D. A 0 means 0.
/// \returns A 256-bit vector of [8 x float] comes from Dot Product of
/// __A, __B and __D
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_maskz_dpbf16_ps(__mmask8 __U, __m256 __D, __m256bh __A, __m256bh __B) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_dpbf16_ps(__D, __A, __B),
(__v8sf)_mm256_setzero_si256());
}
/// Convert One Single float Data to One BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
///
/// \param __A
/// A float data.
/// \returns A bf16 data whose sign field and exponent field keep unchanged,
/// and fraction field is truncated to 7 bits.
static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) {
__v4sf __V = {__A, 0, 0, 0};
__v8hi __R = __builtin_ia32_cvtneps2bf16_128_mask(
(__v4sf)__V, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
return __R[0];
}
/// Convert Packed BF16 Data to Packed float Data.
///
/// \headerfile <x86intrin.h>
///
/// \param __A
/// A 128-bit vector of [8 x bfloat].
/// \returns A 256-bit vector of [8 x float] come from convertion of __A
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) {
return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32(
(__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16));
}
/// Convert Packed BF16 Data to Packed float Data using zeroing mask.
///
/// \headerfile <x86intrin.h>
///
/// \param __U
/// A 8-bit mask. Elements are zeroed out when the corresponding mask
/// bit is not set.
/// \param __A
/// A 128-bit vector of [8 x bfloat].
/// \returns A 256-bit vector of [8 x float] come from convertion of __A
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32(
(__m256i)_mm256_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16));
}
/// Convert Packed BF16 Data to Packed float Data using merging mask.
///
/// \headerfile <x86intrin.h>
///
/// \param __S
/// A 256-bit vector of [8 x float]. Elements are copied from __S when
/// the corresponding mask bit is not set.
/// \param __U
/// A 8-bit mask. Elements are zeroed out when the corresponding mask
/// bit is not set.
/// \param __A
/// A 128-bit vector of [8 x bfloat].
/// \returns A 256-bit vector of [8 x float] come from convertion of __A
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_mask_cvtpbh_ps(__m256 __S, __mmask8 __U, __m128bh __A) {
return _mm256_castsi256_ps((__m256i)_mm256_mask_slli_epi32(
(__m256i)__S, (__mmask8)__U, (__m256i)_mm256_cvtepi16_epi32((__m128i)__A),
16));
}
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
#endif

View File

@ -1,23 +1,9 @@
/*===---- avx512vlbitalgintrin.h - BITALG intrinsics -----------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -2301,7 +2287,7 @@ static __inline __m128i __DEFAULT_FN_ATTRS128
_mm_loadu_epi16 (void const *__P)
{
struct __loadu_epi16 {
__m128i __v;
__m128i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi16*)__P)->__v;
}
@ -2327,7 +2313,7 @@ static __inline __m256i __DEFAULT_FN_ATTRS256
_mm256_loadu_epi16 (void const *__P)
{
struct __loadu_epi16 {
__m256i __v;
__m256i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi16*)__P)->__v;
}
@ -2353,7 +2339,7 @@ static __inline __m128i __DEFAULT_FN_ATTRS128
_mm_loadu_epi8 (void const *__P)
{
struct __loadu_epi8 {
__m128i __v;
__m128i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi8*)__P)->__v;
}
@ -2379,7 +2365,7 @@ static __inline __m256i __DEFAULT_FN_ATTRS256
_mm256_loadu_epi8 (void const *__P)
{
struct __loadu_epi8 {
__m256i __v;
__m256i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi8*)__P)->__v;
}
@ -2405,7 +2391,7 @@ static __inline void __DEFAULT_FN_ATTRS128
_mm_storeu_epi16 (void *__P, __m128i __A)
{
struct __storeu_epi16 {
__m128i __v;
__m128i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi16*)__P)->__v = __A;
}
@ -2422,7 +2408,7 @@ static __inline void __DEFAULT_FN_ATTRS256
_mm256_storeu_epi16 (void *__P, __m256i __A)
{
struct __storeu_epi16 {
__m256i __v;
__m256i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi16*)__P)->__v = __A;
}
@ -2439,7 +2425,7 @@ static __inline void __DEFAULT_FN_ATTRS128
_mm_storeu_epi8 (void *__P, __m128i __A)
{
struct __storeu_epi8 {
__m128i __v;
__m128i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi8*)__P)->__v = __A;
}
@ -2456,7 +2442,7 @@ static __inline void __DEFAULT_FN_ATTRS256
_mm256_storeu_epi8 (void *__P, __m256i __A)
{
struct __storeu_epi8 {
__m256i __v;
__m256i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi8*)__P)->__v = __A;
}

View File

@ -1,22 +1,8 @@
/*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -60,99 +46,89 @@ _mm256_broadcastmw_epi32 (__mmask16 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_conflict_epi64 (__m128i __A)
{
return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
(__v2di) _mm_undefined_si128 (),
(__mmask8) -1);
return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
(__v2di) __W,
(__mmask8) __U);
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_conflict_epi64(__A),
(__v2di)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
(__v2di)
_mm_setzero_si128 (),
(__mmask8) __U);
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_conflict_epi64(__A),
(__v2di)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_conflict_epi64 (__m256i __A)
{
return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
(__v4di) _mm256_undefined_si256 (),
(__mmask8) -1);
return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
(__v4di) __W,
(__mmask8) __U);
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_conflict_epi64(__A),
(__v4di)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
(__v4di) _mm256_setzero_si256 (),
(__mmask8) __U);
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_conflict_epi64(__A),
(__v4di)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_conflict_epi32 (__m128i __A)
{
return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
(__v4si) _mm_undefined_si128 (),
(__mmask8) -1);
return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
(__v4si) __W,
(__mmask8) __U);
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_conflict_epi32(__A),
(__v4si)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
(__v4si) _mm_setzero_si128 (),
(__mmask8) __U);
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_conflict_epi32(__A),
(__v4si)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_conflict_epi32 (__m256i __A)
{
return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
(__v8si) _mm256_undefined_si256 (),
(__mmask8) -1);
return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
(__v8si) __W,
(__mmask8) __U);
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_conflict_epi32(__A),
(__v8si)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
(__v8si)
_mm256_setzero_si256 (),
(__mmask8) __U);
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_conflict_epi32(__A),
(__v8si)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128

View File

@ -1,22 +1,8 @@
/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -523,23 +509,21 @@ _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
static __inline__ __m128 __DEFAULT_FN_ATTRS256
_mm256_cvtepi64_ps (__m256i __A) {
return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
(__v4sf) _mm_setzero_ps(),
(__mmask8) -1);
return (__m128)__builtin_convertvector((__v4di)__A, __v4sf);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS256
_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
(__v4sf) __W,
(__mmask8) __U);
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm256_cvtepi64_ps(__A),
(__v4sf)__W);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
(__v4sf) _mm_setzero_ps(),
(__mmask8) __U);
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm256_cvtepi64_ps(__A),
(__v4sf)_mm_setzero_ps());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@ -771,23 +755,21 @@ _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
static __inline__ __m128 __DEFAULT_FN_ATTRS256
_mm256_cvtepu64_ps (__m256i __A) {
return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
(__v4sf) _mm_setzero_ps(),
(__mmask8) -1);
return (__m128)__builtin_convertvector((__v4du)__A, __v4sf);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS256
_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
(__v4sf) __W,
(__mmask8) __U);
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm256_cvtepu64_ps(__A),
(__v4sf)__W);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
(__v4sf) _mm_setzero_ps(),
(__mmask8) __U);
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm256_cvtepu64_ps(__A),
(__v4sf)_mm_setzero_ps());
}
#define _mm_range_pd(A, B, C) \

View File

@ -1,22 +1,8 @@
/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -5513,7 +5499,7 @@ static __inline __m128i __DEFAULT_FN_ATTRS128
_mm_loadu_epi64 (void const *__P)
{
struct __loadu_epi64 {
__m128i __v;
__m128i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi64*)__P)->__v;
}
@ -5539,7 +5525,7 @@ static __inline __m256i __DEFAULT_FN_ATTRS256
_mm256_loadu_epi64 (void const *__P)
{
struct __loadu_epi64 {
__m256i __v;
__m256i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi64*)__P)->__v;
}
@ -5565,7 +5551,7 @@ static __inline __m128i __DEFAULT_FN_ATTRS128
_mm_loadu_epi32 (void const *__P)
{
struct __loadu_epi32 {
__m128i __v;
__m128i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi32*)__P)->__v;
}
@ -5591,7 +5577,7 @@ static __inline __m256i __DEFAULT_FN_ATTRS256
_mm256_loadu_epi32 (void const *__P)
{
struct __loadu_epi32 {
__m256i __v;
__m256i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi32*)__P)->__v;
}
@ -5717,7 +5703,7 @@ static __inline void __DEFAULT_FN_ATTRS128
_mm_storeu_epi64 (void *__P, __m128i __A)
{
struct __storeu_epi64 {
__m128i __v;
__m128i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi64*)__P)->__v = __A;
}
@ -5734,7 +5720,7 @@ static __inline void __DEFAULT_FN_ATTRS256
_mm256_storeu_epi64 (void *__P, __m256i __A)
{
struct __storeu_epi64 {
__m256i __v;
__m256i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi64*)__P)->__v = __A;
}
@ -5751,7 +5737,7 @@ static __inline void __DEFAULT_FN_ATTRS128
_mm_storeu_epi32 (void *__P, __m128i __A)
{
struct __storeu_epi32 {
__m128i __v;
__m128i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi32*)__P)->__v = __A;
}
@ -5768,7 +5754,7 @@ static __inline void __DEFAULT_FN_ATTRS256
_mm256_storeu_epi32 (void *__P, __m256i __A)
{
struct __storeu_epi32 {
__m256i __v;
__m256i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi32*)__P)->__v = __A;
}
@ -7000,7 +6986,7 @@ _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
__builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_cvtsepi32_epi8 (__m256i __A)
{
return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
@ -7023,7 +7009,7 @@ _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
__M);
}
static __inline__ void __DEFAULT_FN_ATTRS128
static __inline__ void __DEFAULT_FN_ATTRS256
_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
{
__builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
@ -7581,7 +7567,7 @@ _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
__M);
}
static __inline__ void __DEFAULT_FN_ATTRS256
static __inline__ void __DEFAULT_FN_ATTRS128
_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
{
__builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
@ -8425,22 +8411,6 @@ _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
(__mmask8) __U);
}
static __inline __m128i __DEFAULT_FN_ATTRS128
_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
{
return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
(__v8hi) __W,
(__mmask8) __U);
}
static __inline __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
{
return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
(__v8hi) _mm_setzero_si128 (),
(__mmask8) __U);
}
#define _mm_mask_cvt_roundps_ph(W, U, A, I) \
(__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
(__v8hi)(__m128i)(W), \
@ -8451,21 +8421,9 @@ _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
(__v8hi)_mm_setzero_si128(), \
(__mmask8)(U))
static __inline __m128i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A)
{
return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
(__v8hi) __W,
(__mmask8) __U);
}
#define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph
#define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
static __inline __m128i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
{
return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
(__v8hi) _mm_setzero_si128(),
(__mmask8) __U);
}
#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
(__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
(__v8hi)(__m128i)(W), \
@ -8476,6 +8434,9 @@ _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
(__v8hi)_mm_setzero_si128(), \
(__mmask8)(U))
#define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph
#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256

View File

@ -1,23 +1,9 @@
/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,23 +1,9 @@
/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

121
lib/include/avx512vlvp2intersectintrin.h vendored Normal file
View File

@ -0,0 +1,121 @@
/*===------ avx512vlvp2intersectintrin.h - VL VP2INTERSECT intrinsics ------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vlvp2intersectintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AVX512VLVP2INTERSECT_H
#define _AVX512VLVP2INTERSECT_H
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \
__min_vector_width__(256)))
/// Store, in an even/odd pair of mask registers, the indicators of the
/// locations of value matches between dwords in operands __a and __b.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VP2INTERSECTD </c> instruction.
///
/// \param __a
/// A 256-bit vector of [8 x i32].
/// \param __b
/// A 256-bit vector of [8 x i32]
/// \param __m0
/// A pointer point to 8-bit mask
/// \param __m1
/// A pointer point to 8-bit mask
static __inline__ void __DEFAULT_FN_ATTRS256
_mm256_2intersect_epi32(__m256i __a, __m256i __b, __mmask8 *__m0, __mmask8 *__m1) {
__builtin_ia32_vp2intersect_d_256((__v8si)__a, (__v8si)__b, __m0, __m1);
}
/// Store, in an even/odd pair of mask registers, the indicators of the
/// locations of value matches between quadwords in operands __a and __b.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VP2INTERSECTQ </c> instruction.
///
/// \param __a
/// A 256-bit vector of [4 x i64].
/// \param __b
/// A 256-bit vector of [4 x i64]
/// \param __m0
/// A pointer point to 8-bit mask
/// \param __m1
/// A pointer point to 8-bit mask
static __inline__ void __DEFAULT_FN_ATTRS256
_mm256_2intersect_epi64(__m256i __a, __m256i __b, __mmask8 *__m0, __mmask8 *__m1) {
__builtin_ia32_vp2intersect_q_256((__v4di)__a, (__v4di)__b, __m0, __m1);
}
/// Store, in an even/odd pair of mask registers, the indicators of the
/// locations of value matches between dwords in operands __a and __b.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VP2INTERSECTD </c> instruction.
///
/// \param __a
/// A 128-bit vector of [4 x i32].
/// \param __b
/// A 128-bit vector of [4 x i32]
/// \param __m0
/// A pointer point to 8-bit mask
/// \param __m1
/// A pointer point to 8-bit mask
static __inline__ void __DEFAULT_FN_ATTRS128
_mm_2intersect_epi32(__m128i __a, __m128i __b, __mmask8 *__m0, __mmask8 *__m1) {
__builtin_ia32_vp2intersect_d_128((__v4si)__a, (__v4si)__b, __m0, __m1);
}
/// Store, in an even/odd pair of mask registers, the indicators of the
/// locations of value matches between quadwords in operands __a and __b.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VP2INTERSECTQ </c> instruction.
///
/// \param __a
/// A 128-bit vector of [2 x i64].
/// \param __b
/// A 128-bit vector of [2 x i64]
/// \param __m0
/// A pointer point to 8-bit mask
/// \param __m1
/// A pointer point to 8-bit mask
static __inline__ void __DEFAULT_FN_ATTRS128
_mm_2intersect_epi64(__m128i __a, __m128i __b, __mmask8 *__m0, __mmask8 *__m1) {
__builtin_ia32_vp2intersect_q_128((__v2di)__a, (__v2di)__b, __m0, __m1);
}
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
#endif

View File

@ -1,23 +1,9 @@
/*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

77
lib/include/avx512vp2intersectintrin.h vendored Normal file
View File

@ -0,0 +1,77 @@
/*===------- avx512vpintersectintrin.h - VP2INTERSECT intrinsics ------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vp2intersect.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AVX512VP2INTERSECT_H
#define _AVX512VP2INTERSECT_H
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vp2intersect"), \
__min_vector_width__(512)))
/// Store, in an even/odd pair of mask registers, the indicators of the
/// locations of value matches between dwords in operands __a and __b.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VP2INTERSECTD </c> instruction.
///
/// \param __a
/// A 512-bit vector of [16 x i32].
/// \param __b
/// A 512-bit vector of [16 x i32]
/// \param __m0
/// A pointer point to 16-bit mask
/// \param __m1
/// A pointer point to 16-bit mask
static __inline__ void __DEFAULT_FN_ATTRS
_mm512_2intersect_epi32(__m512i __a, __m512i __b, __mmask16 *__m0, __mmask16 *__m1) {
__builtin_ia32_vp2intersect_d_512((__v16si)__a, (__v16si)__b, __m0, __m1);
}
/// Store, in an even/odd pair of mask registers, the indicators of the
/// locations of value matches between quadwords in operands __a and __b.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VP2INTERSECTQ </c> instruction.
///
/// \param __a
/// A 512-bit vector of [8 x i64].
/// \param __b
/// A 512-bit vector of [8 x i64]
/// \param __m0
/// A pointer point to 8-bit mask
/// \param __m1
/// A pointer point to 8-bit mask
static __inline__ void __DEFAULT_FN_ATTRS
_mm512_2intersect_epi64(__m512i __a, __m512i __b, __mmask8 *__m0, __mmask8 *__m1) {
__builtin_ia32_vp2intersect_q_512((__v8di)__a, (__v8di)__b, __m0, __m1);
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@ -1,23 +1,9 @@
/*===----- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics-------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,23 +1,9 @@
/*===---- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics -------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- avxintrin.h - AVX intrinsics -------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -45,9 +31,13 @@ typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32)));
* appear in the interface though. */
typedef signed char __v32qs __attribute__((__vector_size__(32)));
typedef float __m256 __attribute__ ((__vector_size__ (32)));
typedef double __m256d __attribute__((__vector_size__(32)));
typedef long long __m256i __attribute__((__vector_size__(32)));
typedef float __m256 __attribute__ ((__vector_size__ (32), __aligned__(32)));
typedef double __m256d __attribute__((__vector_size__(32), __aligned__(32)));
typedef long long __m256i __attribute__((__vector_size__(32), __aligned__(32)));
typedef float __m256_u __attribute__ ((__vector_size__ (32), __aligned__(1)));
typedef double __m256d_u __attribute__((__vector_size__(32), __aligned__(1)));
typedef long long __m256i_u __attribute__((__vector_size__(32), __aligned__(1)));
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
@ -3113,7 +3103,7 @@ static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_loadu_pd(double const *__p)
{
struct __loadu_pd {
__m256d __v;
__m256d_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_pd*)__p)->__v;
}
@ -3133,7 +3123,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_loadu_ps(float const *__p)
{
struct __loadu_ps {
__m256 __v;
__m256_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_ps*)__p)->__v;
}
@ -3166,10 +3156,10 @@ _mm256_load_si256(__m256i const *__p)
/// A pointer to a 256-bit integer vector containing integer values.
/// \returns A 256-bit integer vector containing the moved values.
static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_loadu_si256(__m256i const *__p)
_mm256_loadu_si256(__m256i_u const *__p)
{
struct __loadu_si256 {
__m256i __v;
__m256i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_si256*)__p)->__v;
}
@ -3246,7 +3236,7 @@ static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu_pd(double *__p, __m256d __a)
{
struct __storeu_pd {
__m256d __v;
__m256d_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_pd*)__p)->__v = __a;
}
@ -3266,7 +3256,7 @@ static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu_ps(float *__p, __m256 __a)
{
struct __storeu_ps {
__m256 __v;
__m256_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_ps*)__p)->__v = __a;
}
@ -3301,10 +3291,10 @@ _mm256_store_si256(__m256i *__p, __m256i __a)
/// \param __a
/// A 256-bit integer vector containing the values to be moved.
static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu_si256(__m256i *__p, __m256i __a)
_mm256_storeu_si256(__m256i_u *__p, __m256i __a)
{
struct __storeu_si256 {
__m256i __v;
__m256i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_si256*)__p)->__v = __a;
}
@ -4834,7 +4824,7 @@ _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
/// address of the memory location does not have to be aligned.
/// \returns A 256-bit integer vector containing the concatenated result.
static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)
_mm256_loadu2_m128i(__m128i_u const *__addr_hi, __m128i_u const *__addr_lo)
{
__m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));
return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);
@ -4918,7 +4908,7 @@ _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
/// \param __a
/// A 256-bit integer vector.
static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
_mm256_storeu2_m128i(__m128i_u *__addr_hi, __m128i_u *__addr_lo, __m256i __a)
{
__m128i __v128;

View File

@ -1,22 +1,8 @@
/*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- bmiintrin.h - BMI intrinsics -------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- cetintrin.h - CET intrinsic --------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- cldemoteintrin.h - CLDEMOTE intrinsic ----------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- clwbintrin.h - CLWB intrinsic ------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===----------------------- clzerointrin.h - CLZERO ----------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

24
lib/include/cpuid.h vendored
View File

@ -1,22 +1,8 @@
/*===---- cpuid.h - X86 cpu model detection --------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -191,6 +177,7 @@
#define bit_CLDEMOTE 0x02000000
#define bit_MOVDIRI 0x08000000
#define bit_MOVDIR64B 0x10000000
#define bit_ENQCMD 0x20000000
/* Features in %edx for leaf 7 sub-leaf 0 */
#define bit_AVX5124VNNIW 0x00000004
@ -198,6 +185,9 @@
#define bit_PCONFIG 0x00040000
#define bit_IBT 0x00100000
/* Features in %eax for leaf 7 sub-leaf 1 */
#define bit_AVX512BF16 0x00000020
/* Features in %eax for leaf 13 sub-leaf 1 */
#define bit_XSAVEOPT 0x00000001
#define bit_XSAVEC 0x00000002

View File

@ -1,22 +1,8 @@
/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -26,8 +12,11 @@
#include <xmmintrin.h>
typedef double __m128d __attribute__((__vector_size__(16)));
typedef long long __m128i __attribute__((__vector_size__(16)));
typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1)));
typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1)));
/* Type defines. */
typedef double __v2df __attribute__ ((__vector_size__ (16)));
@ -1652,7 +1641,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_loadu_pd(double const *__dp)
{
struct __loadu_pd {
__m128d __v;
__m128d_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_pd*)__dp)->__v;
}
@ -2042,7 +2031,7 @@ static __inline__ void __DEFAULT_FN_ATTRS
_mm_storeu_pd(double *__dp, __m128d __a)
{
struct __storeu_pd {
__m128d __v;
__m128d_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_pd*)__dp)->__v = __a;
}
@ -2316,11 +2305,7 @@ _mm_adds_epu16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_avg_epu8(__m128i __a, __m128i __b)
{
typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));
return (__m128i)__builtin_convertvector(
((__builtin_convertvector((__v16qu)__a, __v16hu) +
__builtin_convertvector((__v16qu)__b, __v16hu)) + 1)
>> 1, __v16qu);
return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
}
/// Computes the rounded avarages of corresponding elements of two
@ -2340,11 +2325,7 @@ _mm_avg_epu8(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_avg_epu16(__m128i __a, __m128i __b)
{
typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));
return (__m128i)__builtin_convertvector(
((__builtin_convertvector((__v8hu)__a, __v8su) +
__builtin_convertvector((__v8hu)__b, __v8su)) + 1)
>> 1, __v8hu);
return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
}
/// Multiplies the corresponding elements of two 128-bit signed [8 x i16]
@ -3564,10 +3545,10 @@ _mm_load_si128(__m128i const *__p)
/// A pointer to a memory location containing integer values.
/// \returns A 128-bit integer vector containing the moved values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_loadu_si128(__m128i const *__p)
_mm_loadu_si128(__m128i_u const *__p)
{
struct __loadu_si128 {
__m128i __v;
__m128i_u __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_si128*)__p)->__v;
}
@ -3585,7 +3566,7 @@ _mm_loadu_si128(__m128i const *__p)
/// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the
/// moved value. The higher order bits are cleared.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_loadl_epi64(__m128i const *__p)
_mm_loadl_epi64(__m128i_u const *__p)
{
struct __mm_loadl_epi64_struct {
long long __u;
@ -4027,10 +4008,10 @@ _mm_store_si128(__m128i *__p, __m128i __b)
/// \param __b
/// A 128-bit integer vector containing the values to be moved.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_storeu_si128(__m128i *__p, __m128i __b)
_mm_storeu_si128(__m128i_u *__p, __m128i __b)
{
struct __storeu_si128 {
__m128i __v;
__m128i_u __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_si128*)__p)->__v = __b;
}
@ -4139,7 +4120,7 @@ _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
/// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the
/// value to be stored.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_storel_epi64(__m128i *__p, __m128i __a)
_mm_storel_epi64(__m128i_u *__p, __m128i __a)
{
struct __mm_storel_epi64_struct {
long long __u;

63
lib/include/enqcmdintrin.h vendored Normal file
View File

@ -0,0 +1,63 @@
/*===------------------ enqcmdintrin.h - enqcmd intrinsics -----------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <enqcmdintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __ENQCMDINTRIN_H
#define __ENQCMDINTRIN_H
/* Define the default attributes for the functions in this file */
#define _DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("enqcmd")))
/// Reads 64-byte command pointed by \a __src, formats 64-byte enqueue store
/// data, and performs 64-byte enqueue store to memory pointed by \a __dst.
/// This intrinsics may only be used in User mode.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsics corresponds to the <c> ENQCMD </c> instruction.
///
/// \param __dst
/// Pointer to the destination of the enqueue store.
/// \param __src
/// Pointer to 64-byte command data.
/// \returns If the command data is successfully written to \a __dst then 0 is
/// returned. Otherwise 1 is returned.
static __inline__ int _DEFAULT_FN_ATTRS
_enqcmd (void *__dst, const void *__src)
{
return __builtin_ia32_enqcmd(__dst, __src);
}
/// Reads 64-byte command pointed by \a __src, formats 64-byte enqueue store
/// data, and performs 64-byte enqueue store to memory pointed by \a __dst
/// This intrinsic may only be used in Privileged mode.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsics corresponds to the <c> ENQCMDS </c> instruction.
///
/// \param __dst
/// Pointer to the destination of the enqueue store.
/// \param __src
/// Pointer to 64-byte command data.
/// \returns If the command data is successfully written to \a __dst then 0 is
/// returned. Otherwise 1 is returned.
static __inline__ int _DEFAULT_FN_ATTRS
_enqcmds (void *__dst, const void *__src)
{
return __builtin_ia32_enqcmds(__dst, __src);
}
#undef _DEFAULT_FN_ATTRS
#endif /* __ENQCMDINTRIN_H */

View File

@ -1,22 +1,8 @@
/*===---- f16cintrin.h - F16C intrinsics -----------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -52,9 +38,9 @@
static __inline float __DEFAULT_FN_ATTRS128
_cvtsh_ss(unsigned short __a)
{
__v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
__v4sf r = __builtin_ia32_vcvtph2ps(v);
return r[0];
__v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
__v4sf __r = __builtin_ia32_vcvtph2ps(__v);
return __r[0];
}
/// Converts a 32-bit single-precision float value to a 16-bit

28
lib/include/float.h vendored
View File

@ -1,22 +1,8 @@
/*===---- float.h - Characteristics of floating point types ----------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -51,7 +37,7 @@
# undef FLT_MANT_DIG
# undef DBL_MANT_DIG
# undef LDBL_MANT_DIG
# if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__)
# if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__) || __cplusplus >= 201103L
# undef DECIMAL_DIG
# endif
# undef FLT_DIG
@ -78,7 +64,7 @@
# undef FLT_MIN
# undef DBL_MIN
# undef LDBL_MIN
# if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__)
# if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__) || __cplusplus >= 201703L
# undef FLT_TRUE_MIN
# undef DBL_TRUE_MIN
# undef LDBL_TRUE_MIN
@ -101,7 +87,7 @@
#define DBL_MANT_DIG __DBL_MANT_DIG__
#define LDBL_MANT_DIG __LDBL_MANT_DIG__
#if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__)
#if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__) || __cplusplus >= 201103L
# define DECIMAL_DIG __DECIMAL_DIG__
#endif
@ -137,7 +123,7 @@
#define DBL_MIN __DBL_MIN__
#define LDBL_MIN __LDBL_MIN__
#if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__)
#if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__) || __cplusplus >= 201703L
# define FLT_TRUE_MIN __FLT_DENORM_MIN__
# define DBL_TRUE_MIN __DBL_DENORM_MIN__
# define LDBL_TRUE_MIN __LDBL_DENORM_MIN__

View File

@ -1,22 +1,8 @@
/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- fmaintrin.h - FMA intrinsics -------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- fxsrintrin.h - FXSR intrinsic ------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,23 +1,9 @@
/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- htmintrin.h - Standard header for PowerPC HTM ---------------===*\
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
\*===----------------------------------------------------------------------===*/

View File

@ -1,22 +1,8 @@
/*===---- htmxlintrin.h - XL compiler HTM execution intrinsics-------------===*\
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
\*===----------------------------------------------------------------------===*/

View File

@ -1,22 +1,8 @@
/* ===-------- ia32intrin.h ---------------------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -28,6 +14,160 @@
#ifndef __IA32INTRIN_H
#define __IA32INTRIN_H
/** Find the first set bit starting from the lsb. Result is undefined if
* input is 0.
*
* \headerfile <x86intrin.h>
*
* This intrinsic corresponds to the <c> BSF </c> instruction or the
* <c> TZCNT </c> instruction.
*
* \param __A
* A 32-bit integer operand.
* \returns A 32-bit integer containing the bit number.
*/
static __inline__ int __attribute__((__always_inline__, __nodebug__))
__bsfd(int __A) {
return __builtin_ctz(__A);
}
/** Find the first set bit starting from the msb. Result is undefined if
* input is 0.
*
* \headerfile <x86intrin.h>
*
* This intrinsic corresponds to the <c> BSR </c> instruction or the
* <c> LZCNT </c> instruction and an <c> XOR </c>.
*
* \param __A
* A 32-bit integer operand.
* \returns A 32-bit integer containing the bit number.
*/
static __inline__ int __attribute__((__always_inline__, __nodebug__))
__bsrd(int __A) {
return 31 - __builtin_clz(__A);
}
/** Swaps the bytes in the input. Converting little endian to big endian or
* vice versa.
*
* \headerfile <x86intrin.h>
*
* This intrinsic corresponds to the <c> BSWAP </c> instruction.
*
* \param __A
* A 32-bit integer operand.
* \returns A 32-bit integer containing the swapped bytes.
*/
static __inline__ int __attribute__((__always_inline__, __nodebug__))
__bswapd(int __A) {
return __builtin_bswap32(__A);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_bswap(int __A) {
return __builtin_bswap32(__A);
}
#define _bit_scan_forward(A) __bsfd((A))
#define _bit_scan_reverse(A) __bsrd((A))
#ifdef __x86_64__
/** Find the first set bit starting from the lsb. Result is undefined if
* input is 0.
*
* \headerfile <x86intrin.h>
*
* This intrinsic corresponds to the <c> BSF </c> instruction or the
* <c> TZCNT </c> instruction.
*
* \param __A
* A 64-bit integer operand.
* \returns A 32-bit integer containing the bit number.
*/
static __inline__ int __attribute__((__always_inline__, __nodebug__))
__bsfq(long long __A) {
return __builtin_ctzll(__A);
}
/** Find the first set bit starting from the msb. Result is undefined if
* input is 0.
*
* \headerfile <x86intrin.h>
*
* This intrinsic corresponds to the <c> BSR </c> instruction or the
* <c> LZCNT </c> instruction and an <c> XOR </c>.
*
* \param __A
* A 64-bit integer operand.
* \returns A 32-bit integer containing the bit number.
*/
static __inline__ int __attribute__((__always_inline__, __nodebug__))
__bsrq(long long __A) {
return 63 - __builtin_clzll(__A);
}
/** Swaps the bytes in the input. Converting little endian to big endian or
* vice versa.
*
* \headerfile <x86intrin.h>
*
* This intrinsic corresponds to the <c> BSWAP </c> instruction.
*
* \param __A
* A 64-bit integer operand.
* \returns A 64-bit integer containing the swapped bytes.
*/
static __inline__ long long __attribute__((__always_inline__, __nodebug__))
__bswapq(long long __A) {
return __builtin_bswap64(__A);
}
#define _bswap64(A) __bswapq((A))
#endif
/** Counts the number of bits in the source operand having a value of 1.
*
* \headerfile <x86intrin.h>
*
* This intrinsic corresponds to the <c> POPCNT </c> instruction or a
* a sequence of arithmetic and logic ops to calculate it.
*
* \param __A
* An unsigned 32-bit integer operand.
* \returns A 32-bit integer containing the number of bits with value 1 in the
* source operand.
*/
static __inline__ int __attribute__((__always_inline__, __nodebug__))
__popcntd(unsigned int __A)
{
return __builtin_popcount(__A);
}
#define _popcnt32(A) __popcntd((A))
#ifdef __x86_64__
/** Counts the number of bits in the source operand having a value of 1.
*
* \headerfile <x86intrin.h>
*
* This intrinsic corresponds to the <c> POPCNT </c> instruction or a
* a sequence of arithmetic and logic ops to calculate it.
*
* \param __A
* An unsigned 64-bit integer operand.
* \returns A 64-bit integer containing the number of bits with value 1 in the
* source operand.
*/
static __inline__ long long __attribute__((__always_inline__, __nodebug__))
__popcntq(unsigned long long __A)
{
return __builtin_popcountll(__A);
}
#define _popcnt64(A) __popcntq((A))
#endif /* __x86_64__ */
#ifdef __x86_64__
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
__readeflags(void)
@ -55,6 +195,92 @@ __writeeflags(unsigned int __f)
}
#endif /* !__x86_64__ */
/** Adds the unsigned integer operand to the CRC-32C checksum of the
* unsigned char operand.
*
* \headerfile <x86intrin.h>
*
* This intrinsic corresponds to the <c> CRC32B </c> instruction.
*
* \param __C
* An unsigned integer operand to add to the CRC-32C checksum of operand
* \a __D.
* \param __D
* An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
* \returns The result of adding operand \a __C to the CRC-32C checksum of
* operand \a __D.
*/
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
__crc32b(unsigned int __C, unsigned char __D)
{
return __builtin_ia32_crc32qi(__C, __D);
}
/** Adds the unsigned integer operand to the CRC-32C checksum of the
* unsigned short operand.
*
* \headerfile <x86intrin.h>
*
* This intrinsic corresponds to the <c> CRC32W </c> instruction.
*
* \param __C
* An unsigned integer operand to add to the CRC-32C checksum of operand
* \a __D.
* \param __D
* An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
* \returns The result of adding operand \a __C to the CRC-32C checksum of
* operand \a __D.
*/
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
__crc32w(unsigned int __C, unsigned short __D)
{
return __builtin_ia32_crc32hi(__C, __D);
}
/** Adds the unsigned integer operand to the CRC-32C checksum of the
* second unsigned integer operand.
*
* \headerfile <x86intrin.h>
*
* This intrinsic corresponds to the <c> CRC32D </c> instruction.
*
* \param __C
* An unsigned integer operand to add to the CRC-32C checksum of operand
* \a __D.
* \param __D
* An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
* \returns The result of adding operand \a __C to the CRC-32C checksum of
* operand \a __D.
*/
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
__crc32d(unsigned int __C, unsigned int __D)
{
return __builtin_ia32_crc32si(__C, __D);
}
#ifdef __x86_64__
/** Adds the unsigned integer operand to the CRC-32C checksum of the
* unsigned 64-bit integer operand.
*
* \headerfile <x86intrin.h>
*
* This intrinsic corresponds to the <c> CRC32Q </c> instruction.
*
* \param __C
* An unsigned integer operand to add to the CRC-32C checksum of operand
* \a __D.
* \param __D
* An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
* \returns The result of adding operand \a __C to the CRC-32C checksum of
* operand \a __D.
*/
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
__crc32q(unsigned long long __C, unsigned long long __D)
{
return __builtin_ia32_crc32di(__C, __D);
}
#endif /* __x86_64__ */
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
__rdpmc(int __A) {
return __builtin_ia32_rdpmc(__A);
@ -75,4 +301,64 @@ _wbinvd(void) {
__builtin_ia32_wbinvd();
}
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
__rolb(unsigned char __X, int __C) {
return __builtin_rotateleft8(__X, __C);
}
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
__rorb(unsigned char __X, int __C) {
return __builtin_rotateright8(__X, __C);
}
static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
__rolw(unsigned short __X, int __C) {
return __builtin_rotateleft16(__X, __C);
}
static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
__rorw(unsigned short __X, int __C) {
return __builtin_rotateright16(__X, __C);
}
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__rold(unsigned int __X, int __C) {
return __builtin_rotateleft32(__X, __C);
}
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__rord(unsigned int __X, int __C) {
return __builtin_rotateright32(__X, __C);
}
#ifdef __x86_64__
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
__rolq(unsigned long long __X, int __C) {
return __builtin_rotateleft64(__X, __C);
}
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
__rorq(unsigned long long __X, int __C) {
return __builtin_rotateright64(__X, __C);
}
#endif /* __x86_64__ */
#ifndef _MSC_VER
/* These are already provided as builtins for MSVC. */
/* Select the correct function based on the size of long. */
#ifdef __LP64__
#define _lrotl(a,b) __rolq((a), (b))
#define _lrotr(a,b) __rorq((a), (b))
#else
#define _lrotl(a,b) __rold((a), (b))
#define _lrotr(a,b) __rord((a), (b))
#endif
#define _rotl(a,b) __rold((a), (b))
#define _rotr(a,b) __rord((a), (b))
#endif // _MSC_VER
/* These are not builtins so need to be provided in all modes. */
#define _rotwl(a,b) __rolw((a), (b))
#define _rotwr(a,b) __rorw((a), (b))
#endif /* __IA32INTRIN_H */

View File

@ -1,22 +1,8 @@
/*===---- immintrin.h - Intel intrinsics -----------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -195,6 +181,15 @@
#include <avx512pfintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BF16__)
#include <avx512bf16intrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512BF16__))
#include <avx512vlbf16intrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
#include <pkuintrin.h>
#endif
@ -241,18 +236,6 @@ _rdrand64_step(unsigned long long *__p)
#endif
#endif /* __RDRND__ */
/* __bit_scan_forward */
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_bit_scan_forward(int __A) {
return __builtin_ctz(__A);
}
/* __bit_scan_reverse */
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_bit_scan_reverse(int __A) {
return 31 - __builtin_clz(__A);
}
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
#ifdef __x86_64__
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
@ -378,9 +361,8 @@ _storebe_i64(void * __P, long long __D) {
#include <fxsrintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)
/* No feature check desired due to internal MSC_VER checks */
#include <xsaveintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
#include <xsaveoptintrin.h>
@ -439,7 +421,21 @@ _storebe_i64(void * __P, long long __D) {
#include <invpcidintrin.h>
#endif
#ifdef _MSC_VER
#if !defined(_MSC_VER) || __has_feature(modules) || \
defined(__AVX512VP2INTERSECT__)
#include <avx512vp2intersectintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
#include <avx512vlvp2intersectintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__ENQCMD__)
#include <enqcmdintrin.h>
#endif
#if defined(_MSC_VER) && __has_extension(gnu_asm)
/* Define the default attributes for these intrinsics */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
#ifdef __cplusplus
@ -521,6 +517,6 @@ _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
#undef __DEFAULT_FN_ATTRS
#endif /* _MSC_VER */
#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
#endif /* __IMMINTRIN_H */

42
lib/include/intrin.h vendored
View File

@ -1,22 +1,8 @@
/* ===-------- intrin.h ---------------------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -200,10 +186,6 @@ __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
_WriteBarrier(void);
unsigned __int32 xbegin(void);
void _xend(void);
static __inline__
#define _XCR_XFEATURE_ENABLED_MASK 0
unsigned __int64 __cdecl _xgetbv(unsigned int);
void __cdecl _xsetbv(unsigned int, unsigned __int64);
/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */
#ifdef __x86_64__
@ -539,12 +521,6 @@ __cpuidex(int __info[4], int __level, int __ecx) {
__asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3])
: "a"(__level), "c"(__ecx));
}
static __inline__ unsigned __int64 __cdecl __DEFAULT_FN_ATTRS
_xgetbv(unsigned int __xcr_no) {
unsigned int __eax, __edx;
__asm__ ("xgetbv" : "=a" (__eax), "=d" (__edx) : "c" (__xcr_no));
return ((unsigned __int64)__edx << 32) | __eax;
}
static __inline__ void __DEFAULT_FN_ATTRS
__halt(void) {
__asm__ volatile ("hlt");
@ -567,15 +543,9 @@ long _InterlockedAdd(long volatile *Addend, long Value);
__int64 _ReadStatusReg(int);
void _WriteStatusReg(int, __int64);
static inline unsigned short _byteswap_ushort (unsigned short val) {
return __builtin_bswap16(val);
}
static inline unsigned long _byteswap_ulong (unsigned long val) {
return __builtin_bswap32(val);
}
static inline unsigned __int64 _byteswap_uint64 (unsigned __int64 val) {
return __builtin_bswap64(val);
}
unsigned short __cdecl _byteswap_ushort(unsigned short val);
unsigned long __cdecl _byteswap_ulong (unsigned long val);
unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64 val);
#endif
/*----------------------------------------------------------------------------*\

View File

@ -1,27 +1,18 @@
/*===---- inttypes.h - Standard header for integer printf macros ----------===*\
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
\*===----------------------------------------------------------------------===*/
#ifndef __CLANG_INTTYPES_H
// AIX system headers need inttypes.h to be re-enterable while _STD_TYPES_T
// is defined until an inclusion of it without _STD_TYPES_T occurs, in which
// case the header guard macro is defined.
#if !defined(_AIX) || !defined(_STD_TYPES_T)
#define __CLANG_INTTYPES_H
#endif
#if defined(_MSC_VER) && _MSC_VER < 1800
#error MSVC does not have inttypes.h prior to Visual Studio 2013

View File

@ -1,22 +1,8 @@
/*===------------- invpcidintrin.h - INVPCID intrinsic ---------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

22
lib/include/iso646.h vendored
View File

@ -1,24 +1,8 @@
/*===---- iso646.h - Standard header for alternate spellings of operators---===
*
* Copyright (c) 2008 Eli Friedman
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

22
lib/include/limits.h vendored
View File

@ -1,24 +1,8 @@
/*===---- limits.h - Standard header for integer sizes --------------------===*\
*
* Copyright (c) 2009 Chris Lattner
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
\*===----------------------------------------------------------------------===*/

View File

@ -1,22 +1,8 @@
/*===---- lwpintrin.h - LWP intrinsics -------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- lzcntintrin.h - LZCNT intrinsics ---------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

20
lib/include/mm3dnow.h vendored
View File

@ -1,22 +1,8 @@
/*===---- mm3dnow.h - 3DNow! intrinsics ------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- mm_malloc.h - Allocating and Freeing Aligned Memory Blocks -------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -24,7 +10,7 @@
#ifndef __MMINTRIN_H
#define __MMINTRIN_H
typedef long long __m64 __attribute__((__vector_size__(8)));
typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8)));
typedef long long __v1di __attribute__((__vector_size__(8)));
typedef int __v2si __attribute__((__vector_size__(8)));

View File

@ -1,22 +1,8 @@
/*===---- module.modulemap - intrinsics module map -------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -168,4 +154,5 @@ module _Builtin_stddef_max_align_t [system] [extern_c] {
module opencl_c {
requires opencl
header "opencl-c.h"
header "opencl-c-base.h"
}

View File

@ -1,22 +1,8 @@
/*===------------------------- movdirintrin.h ------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

20
lib/include/msa.h vendored
View File

@ -1,22 +1,8 @@
/*===---- msa.h - MIPS MSA intrinsics --------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- mwaitxintrin.h - MONITORX/MWAITX intrinsics ----------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- nmmintrin.h - SSE4 intrinsics ------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

578
lib/include/opencl-c-base.h vendored Normal file
View File

@ -0,0 +1,578 @@
//===----- opencl-c-base.h - OpenCL C language base definitions -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _OPENCL_BASE_H_
#define _OPENCL_BASE_H_
// built-in scalar data types:
/**
* An unsigned 8-bit integer.
*/
typedef unsigned char uchar;
/**
* An unsigned 16-bit integer.
*/
typedef unsigned short ushort;
/**
* An unsigned 32-bit integer.
*/
typedef unsigned int uint;
/**
* An unsigned 64-bit integer.
*/
typedef unsigned long ulong;
/**
* The unsigned integer type of the result of the sizeof operator. This
* is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS
* defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if
* CL_DEVICE_ADDRESS_BITS is 64-bits.
*/
typedef __SIZE_TYPE__ size_t;
/**
* A signed integer type that is the result of subtracting two pointers.
* This is a 32-bit signed integer if CL_DEVICE_ADDRESS_BITS
* defined in table 4.3 is 32-bits and is a 64-bit signed integer if
* CL_DEVICE_ADDRESS_BITS is 64-bits.
*/
typedef __PTRDIFF_TYPE__ ptrdiff_t;
/**
* A signed integer type with the property that any valid pointer to
* void can be converted to this type, then converted back to pointer
* to void, and the result will compare equal to the original pointer.
*/
typedef __INTPTR_TYPE__ intptr_t;
/**
* An unsigned integer type with the property that any valid pointer to
* void can be converted to this type, then converted back to pointer
* to void, and the result will compare equal to the original pointer.
*/
typedef __UINTPTR_TYPE__ uintptr_t;
// built-in vector data types:
typedef char char2 __attribute__((ext_vector_type(2)));
typedef char char3 __attribute__((ext_vector_type(3)));
typedef char char4 __attribute__((ext_vector_type(4)));
typedef char char8 __attribute__((ext_vector_type(8)));
typedef char char16 __attribute__((ext_vector_type(16)));
typedef uchar uchar2 __attribute__((ext_vector_type(2)));
typedef uchar uchar3 __attribute__((ext_vector_type(3)));
typedef uchar uchar4 __attribute__((ext_vector_type(4)));
typedef uchar uchar8 __attribute__((ext_vector_type(8)));
typedef uchar uchar16 __attribute__((ext_vector_type(16)));
typedef short short2 __attribute__((ext_vector_type(2)));
typedef short short3 __attribute__((ext_vector_type(3)));
typedef short short4 __attribute__((ext_vector_type(4)));
typedef short short8 __attribute__((ext_vector_type(8)));
typedef short short16 __attribute__((ext_vector_type(16)));
typedef ushort ushort2 __attribute__((ext_vector_type(2)));
typedef ushort ushort3 __attribute__((ext_vector_type(3)));
typedef ushort ushort4 __attribute__((ext_vector_type(4)));
typedef ushort ushort8 __attribute__((ext_vector_type(8)));
typedef ushort ushort16 __attribute__((ext_vector_type(16)));
typedef int int2 __attribute__((ext_vector_type(2)));
typedef int int3 __attribute__((ext_vector_type(3)));
typedef int int4 __attribute__((ext_vector_type(4)));
typedef int int8 __attribute__((ext_vector_type(8)));
typedef int int16 __attribute__((ext_vector_type(16)));
typedef uint uint2 __attribute__((ext_vector_type(2)));
typedef uint uint3 __attribute__((ext_vector_type(3)));
typedef uint uint4 __attribute__((ext_vector_type(4)));
typedef uint uint8 __attribute__((ext_vector_type(8)));
typedef uint uint16 __attribute__((ext_vector_type(16)));
typedef long long2 __attribute__((ext_vector_type(2)));
typedef long long3 __attribute__((ext_vector_type(3)));
typedef long long4 __attribute__((ext_vector_type(4)));
typedef long long8 __attribute__((ext_vector_type(8)));
typedef long long16 __attribute__((ext_vector_type(16)));
typedef ulong ulong2 __attribute__((ext_vector_type(2)));
typedef ulong ulong3 __attribute__((ext_vector_type(3)));
typedef ulong ulong4 __attribute__((ext_vector_type(4)));
typedef ulong ulong8 __attribute__((ext_vector_type(8)));
typedef ulong ulong16 __attribute__((ext_vector_type(16)));
typedef float float2 __attribute__((ext_vector_type(2)));
typedef float float3 __attribute__((ext_vector_type(3)));
typedef float float4 __attribute__((ext_vector_type(4)));
typedef float float8 __attribute__((ext_vector_type(8)));
typedef float float16 __attribute__((ext_vector_type(16)));
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
typedef half half2 __attribute__((ext_vector_type(2)));
typedef half half3 __attribute__((ext_vector_type(3)));
typedef half half4 __attribute__((ext_vector_type(4)));
typedef half half8 __attribute__((ext_vector_type(8)));
typedef half half16 __attribute__((ext_vector_type(16)));
#endif
#ifdef cl_khr_fp64
#if __OPENCL_C_VERSION__ < CL_VERSION_1_2
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
typedef double double2 __attribute__((ext_vector_type(2)));
typedef double double3 __attribute__((ext_vector_type(3)));
typedef double double4 __attribute__((ext_vector_type(4)));
typedef double double8 __attribute__((ext_vector_type(8)));
typedef double double16 __attribute__((ext_vector_type(16)));
#endif
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
#define NULL ((void*)0)
#endif
/**
* Value of maximum non-infinite single-precision floating-point
* number.
*/
#define MAXFLOAT 0x1.fffffep127f
/**
* A positive float constant expression. HUGE_VALF evaluates
* to +infinity. Used as an error value returned by the built-in
* math functions.
*/
#define HUGE_VALF (__builtin_huge_valf())
/**
* A positive double constant expression. HUGE_VAL evaluates
* to +infinity. Used as an error value returned by the built-in
* math functions.
*/
#define HUGE_VAL (__builtin_huge_val())
/**
* A constant expression of type float representing positive or
* unsigned infinity.
*/
#define INFINITY (__builtin_inff())
/**
* A constant expression of type float representing a quiet NaN.
*/
#define NAN as_float(INT_MAX)
#define FP_ILOGB0 INT_MIN
#define FP_ILOGBNAN INT_MAX
#define FLT_DIG 6
#define FLT_MANT_DIG 24
#define FLT_MAX_10_EXP +38
#define FLT_MAX_EXP +128
#define FLT_MIN_10_EXP -37
#define FLT_MIN_EXP -125
#define FLT_RADIX 2
#define FLT_MAX 0x1.fffffep127f
#define FLT_MIN 0x1.0p-126f
#define FLT_EPSILON 0x1.0p-23f
#define M_E_F 2.71828182845904523536028747135266250f
#define M_LOG2E_F 1.44269504088896340735992468100189214f
#define M_LOG10E_F 0.434294481903251827651128918916605082f
#define M_LN2_F 0.693147180559945309417232121458176568f
#define M_LN10_F 2.30258509299404568401799145468436421f
#define M_PI_F 3.14159265358979323846264338327950288f
#define M_PI_2_F 1.57079632679489661923132169163975144f
#define M_PI_4_F 0.785398163397448309615660845819875721f
#define M_1_PI_F 0.318309886183790671537767526745028724f
#define M_2_PI_F 0.636619772367581343075535053490057448f
#define M_2_SQRTPI_F 1.12837916709551257389615890312154517f
#define M_SQRT2_F 1.41421356237309504880168872420969808f
#define M_SQRT1_2_F 0.707106781186547524400844362104849039f
#define DBL_DIG 15
#define DBL_MANT_DIG 53
#define DBL_MAX_10_EXP +308
#define DBL_MAX_EXP +1024
#define DBL_MIN_10_EXP -307
#define DBL_MIN_EXP -1021
#define DBL_RADIX 2
#define DBL_MAX 0x1.fffffffffffffp1023
#define DBL_MIN 0x1.0p-1022
#define DBL_EPSILON 0x1.0p-52
#define M_E 0x1.5bf0a8b145769p+1
#define M_LOG2E 0x1.71547652b82fep+0
#define M_LOG10E 0x1.bcb7b1526e50ep-2
#define M_LN2 0x1.62e42fefa39efp-1
#define M_LN10 0x1.26bb1bbb55516p+1
#define M_PI 0x1.921fb54442d18p+1
#define M_PI_2 0x1.921fb54442d18p+0
#define M_PI_4 0x1.921fb54442d18p-1
#define M_1_PI 0x1.45f306dc9c883p-2
#define M_2_PI 0x1.45f306dc9c883p-1
#define M_2_SQRTPI 0x1.20dd750429b6dp+0
#define M_SQRT2 0x1.6a09e667f3bcdp+0
#define M_SQRT1_2 0x1.6a09e667f3bcdp-1
#ifdef cl_khr_fp16
#define HALF_DIG 3
#define HALF_MANT_DIG 11
#define HALF_MAX_10_EXP +4
#define HALF_MAX_EXP +16
#define HALF_MIN_10_EXP -4
#define HALF_MIN_EXP -13
#define HALF_RADIX 2
#define HALF_MAX ((0x1.ffcp15h))
#define HALF_MIN ((0x1.0p-14h))
#define HALF_EPSILON ((0x1.0p-10h))
#define M_E_H 2.71828182845904523536028747135266250h
#define M_LOG2E_H 1.44269504088896340735992468100189214h
#define M_LOG10E_H 0.434294481903251827651128918916605082h
#define M_LN2_H 0.693147180559945309417232121458176568h
#define M_LN10_H 2.30258509299404568401799145468436421h
#define M_PI_H 3.14159265358979323846264338327950288h
#define M_PI_2_H 1.57079632679489661923132169163975144h
#define M_PI_4_H 0.785398163397448309615660845819875721h
#define M_1_PI_H 0.318309886183790671537767526745028724h
#define M_2_PI_H 0.636619772367581343075535053490057448h
#define M_2_SQRTPI_H 1.12837916709551257389615890312154517h
#define M_SQRT2_H 1.41421356237309504880168872420969808h
#define M_SQRT1_2_H 0.707106781186547524400844362104849039h
#endif //cl_khr_fp16
#define CHAR_BIT 8
#define SCHAR_MAX 127
#define SCHAR_MIN (-128)
#define UCHAR_MAX 255
#define CHAR_MAX SCHAR_MAX
#define CHAR_MIN SCHAR_MIN
#define USHRT_MAX 65535
#define SHRT_MAX 32767
#define SHRT_MIN (-32768)
#define UINT_MAX 0xffffffff
#define INT_MAX 2147483647
#define INT_MIN (-2147483647-1)
#define ULONG_MAX 0xffffffffffffffffUL
#define LONG_MAX 0x7fffffffffffffffL
#define LONG_MIN (-0x7fffffffffffffffL-1)
// OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions
// Flag type and values for barrier, mem_fence, read_mem_fence, write_mem_fence
typedef uint cl_mem_fence_flags;
/**
* Queue a memory fence to ensure correct
* ordering of memory operations to local memory
*/
#define CLK_LOCAL_MEM_FENCE 0x01
/**
* Queue a memory fence to ensure correct
* ordering of memory operations to global memory
*/
#define CLK_GLOBAL_MEM_FENCE 0x02
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
typedef enum memory_scope {
memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
#endif
} memory_scope;
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
/**
* Queue a memory fence to ensure correct ordering of memory
* operations between work-items of a work-group to
* image memory.
*/
#define CLK_IMAGE_MEM_FENCE 0x04
#ifndef ATOMIC_VAR_INIT
#define ATOMIC_VAR_INIT(x) (x)
#endif //ATOMIC_VAR_INIT
#define ATOMIC_FLAG_INIT 0
// enum values aligned with what clang uses in EmitAtomicExpr()
typedef enum memory_order
{
memory_order_relaxed = __ATOMIC_RELAXED,
memory_order_acquire = __ATOMIC_ACQUIRE,
memory_order_release = __ATOMIC_RELEASE,
memory_order_acq_rel = __ATOMIC_ACQ_REL,
memory_order_seq_cst = __ATOMIC_SEQ_CST
} memory_order;
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
// OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions
// These values need to match the runtime equivalent
//
// Addressing Mode.
//
#define CLK_ADDRESS_NONE 0
#define CLK_ADDRESS_CLAMP_TO_EDGE 2
#define CLK_ADDRESS_CLAMP 4
#define CLK_ADDRESS_REPEAT 6
#define CLK_ADDRESS_MIRRORED_REPEAT 8
//
// Coordination Normalization
//
#define CLK_NORMALIZED_COORDS_FALSE 0
#define CLK_NORMALIZED_COORDS_TRUE 1
//
// Filtering Mode.
//
#define CLK_FILTER_NEAREST 0x10
#define CLK_FILTER_LINEAR 0x20
#ifdef cl_khr_gl_msaa_sharing
#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable
#endif //cl_khr_gl_msaa_sharing
//
// Channel Datatype.
//
#define CLK_SNORM_INT8 0x10D0
#define CLK_SNORM_INT16 0x10D1
#define CLK_UNORM_INT8 0x10D2
#define CLK_UNORM_INT16 0x10D3
#define CLK_UNORM_SHORT_565 0x10D4
#define CLK_UNORM_SHORT_555 0x10D5
#define CLK_UNORM_INT_101010 0x10D6
#define CLK_SIGNED_INT8 0x10D7
#define CLK_SIGNED_INT16 0x10D8
#define CLK_SIGNED_INT32 0x10D9
#define CLK_UNSIGNED_INT8 0x10DA
#define CLK_UNSIGNED_INT16 0x10DB
#define CLK_UNSIGNED_INT32 0x10DC
#define CLK_HALF_FLOAT 0x10DD
#define CLK_FLOAT 0x10DE
#define CLK_UNORM_INT24 0x10DF
// Channel order, numbering must be aligned with cl_channel_order in cl.h
//
#define CLK_R 0x10B0
#define CLK_A 0x10B1
#define CLK_RG 0x10B2
#define CLK_RA 0x10B3
#define CLK_RGB 0x10B4
#define CLK_RGBA 0x10B5
#define CLK_BGRA 0x10B6
#define CLK_ARGB 0x10B7
#define CLK_INTENSITY 0x10B8
#define CLK_LUMINANCE 0x10B9
#define CLK_Rx 0x10BA
#define CLK_RGx 0x10BB
#define CLK_RGBx 0x10BC
#define CLK_DEPTH 0x10BD
#define CLK_DEPTH_STENCIL 0x10BE
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
#define CLK_sRGB 0x10BF
#define CLK_sRGBx 0x10C0
#define CLK_sRGBA 0x10C1
#define CLK_sBGRA 0x10C2
#define CLK_ABGR 0x10C3
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
// OpenCL v2.0 s6.13.16 - Pipe Functions
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t))
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
// OpenCL v2.0 s6.13.17 - Enqueue Kernels
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
#define CL_COMPLETE 0x0
#define CL_RUNNING 0x1
#define CL_SUBMITTED 0x2
#define CL_QUEUED 0x3
#define CLK_SUCCESS 0
#define CLK_ENQUEUE_FAILURE -101
#define CLK_INVALID_QUEUE -102
#define CLK_INVALID_NDRANGE -160
#define CLK_INVALID_EVENT_WAIT_LIST -57
#define CLK_DEVICE_QUEUE_FULL -161
#define CLK_INVALID_ARG_SIZE -51
#define CLK_EVENT_ALLOCATION_FAILURE -100
#define CLK_OUT_OF_RESOURCES -5
#define CLK_NULL_QUEUE 0
#define CLK_NULL_EVENT (__builtin_astype(((void*)(__SIZE_MAX__)), clk_event_t))
// execution model related definitions
#define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0
#define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0x1
#define CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP 0x2
typedef int kernel_enqueue_flags_t;
typedef int clk_profiling_info;
// Profiling info name (see capture_event_profiling_info)
#define CLK_PROFILING_COMMAND_EXEC_TIME 0x1
#define MAX_WORK_DIM 3
typedef struct {
unsigned int workDimension;
size_t globalWorkOffset[MAX_WORK_DIM];
size_t globalWorkSize[MAX_WORK_DIM];
size_t localWorkSize[MAX_WORK_DIM];
} ndrange_t;
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
#ifdef cl_intel_device_side_avc_motion_estimation
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin
#define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0
#define CLK_AVC_ME_MAJOR_16x8_INTEL 0x1
#define CLK_AVC_ME_MAJOR_8x16_INTEL 0x2
#define CLK_AVC_ME_MAJOR_8x8_INTEL 0x3
#define CLK_AVC_ME_MINOR_8x8_INTEL 0x0
#define CLK_AVC_ME_MINOR_8x4_INTEL 0x1
#define CLK_AVC_ME_MINOR_4x8_INTEL 0x2
#define CLK_AVC_ME_MINOR_4x4_INTEL 0x3
#define CLK_AVC_ME_MAJOR_FORWARD_INTEL 0x0
#define CLK_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
#define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
#define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
#define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
#define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
#define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
#define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
#define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
#define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
#define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
#define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
#define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
#define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
#define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
#define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
#define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
#define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
#define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
#define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
#define CLK_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
#define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
#define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
#define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
#define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
#define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
#define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
#define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
#define CLK_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
#define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
#define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
#define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
#define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
#define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
#define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
#define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
#define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
#define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
#define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
#define CLK_AVC_ME_INTRA_16x16_INTEL 0x0
#define CLK_AVC_ME_INTRA_8x8_INTEL 0x1
#define CLK_AVC_ME_INTRA_4x4_INTEL 0x2
#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
#define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL (0x1 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL (0x2 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL (0x3 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL (0x55 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL (0xAA << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL (0xFF << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL (0x1 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL (0x2 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL (0x1 << 26)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL (0x2 << 26)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL (0x1 << 28)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL (0x2 << 28)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL (0x1 << 30)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL (0x2 << 30)
#define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
#define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL 0x0
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
#define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
#define CLK_AVC_ME_FRAME_FORWARD_INTEL 0x1
#define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2
#define CLK_AVC_ME_FRAME_DUAL_INTEL 0x3
#define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
#define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
#define CLK_AVC_ME_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL 0x0
#define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL 0x0
#define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_INITIALIZE_INTEL 0x0
#define CLK_AVC_REF_RESULT_INITIALIZE_INTEL 0x0
#define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end
#endif // cl_intel_device_side_avc_motion_estimation
#endif //_OPENCL_BASE_H_

698
lib/include/opencl-c.h vendored
View File

@ -1,15 +1,16 @@
//===--- opencl-c.h - OpenCL C language builtin function header -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _OPENCL_H_
#define _OPENCL_H_
#include "opencl-c-base.h"
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
#ifndef cl_khr_depth_images
#define cl_khr_depth_images
@ -23,9 +24,6 @@
#endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
#ifndef cl_intel_planar_yuv
#define cl_intel_planar_yuv
#endif // cl_intel_planar_yuv
#pragma OPENCL EXTENSION cl_intel_planar_yuv : begin
#pragma OPENCL EXTENSION cl_intel_planar_yuv : end
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
@ -37,255 +35,6 @@
#define __purefn __attribute__((pure))
#define __cnfn __attribute__((const))
// built-in scalar data types:
/**
* An unsigned 8-bit integer.
*/
typedef unsigned char uchar;
/**
* An unsigned 16-bit integer.
*/
typedef unsigned short ushort;
/**
* An unsigned 32-bit integer.
*/
typedef unsigned int uint;
/**
* An unsigned 64-bit integer.
*/
typedef unsigned long ulong;
/**
* The unsigned integer type of the result of the sizeof operator. This
* is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS
* defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if
* CL_DEVICE_ADDRESS_BITS is 64-bits.
*/
typedef __SIZE_TYPE__ size_t;
/**
* A signed integer type that is the result of subtracting two pointers.
* This is a 32-bit signed integer if CL_DEVICE_ADDRESS_BITS
* defined in table 4.3 is 32-bits and is a 64-bit signed integer if
* CL_DEVICE_ADDRESS_BITS is 64-bits.
*/
typedef __PTRDIFF_TYPE__ ptrdiff_t;
/**
* A signed integer type with the property that any valid pointer to
* void can be converted to this type, then converted back to pointer
* to void, and the result will compare equal to the original pointer.
*/
typedef __INTPTR_TYPE__ intptr_t;
/**
* An unsigned integer type with the property that any valid pointer to
* void can be converted to this type, then converted back to pointer
* to void, and the result will compare equal to the original pointer.
*/
typedef __UINTPTR_TYPE__ uintptr_t;
// built-in vector data types:
typedef char char2 __attribute__((ext_vector_type(2)));
typedef char char3 __attribute__((ext_vector_type(3)));
typedef char char4 __attribute__((ext_vector_type(4)));
typedef char char8 __attribute__((ext_vector_type(8)));
typedef char char16 __attribute__((ext_vector_type(16)));
typedef uchar uchar2 __attribute__((ext_vector_type(2)));
typedef uchar uchar3 __attribute__((ext_vector_type(3)));
typedef uchar uchar4 __attribute__((ext_vector_type(4)));
typedef uchar uchar8 __attribute__((ext_vector_type(8)));
typedef uchar uchar16 __attribute__((ext_vector_type(16)));
typedef short short2 __attribute__((ext_vector_type(2)));
typedef short short3 __attribute__((ext_vector_type(3)));
typedef short short4 __attribute__((ext_vector_type(4)));
typedef short short8 __attribute__((ext_vector_type(8)));
typedef short short16 __attribute__((ext_vector_type(16)));
typedef ushort ushort2 __attribute__((ext_vector_type(2)));
typedef ushort ushort3 __attribute__((ext_vector_type(3)));
typedef ushort ushort4 __attribute__((ext_vector_type(4)));
typedef ushort ushort8 __attribute__((ext_vector_type(8)));
typedef ushort ushort16 __attribute__((ext_vector_type(16)));
typedef int int2 __attribute__((ext_vector_type(2)));
typedef int int3 __attribute__((ext_vector_type(3)));
typedef int int4 __attribute__((ext_vector_type(4)));
typedef int int8 __attribute__((ext_vector_type(8)));
typedef int int16 __attribute__((ext_vector_type(16)));
typedef uint uint2 __attribute__((ext_vector_type(2)));
typedef uint uint3 __attribute__((ext_vector_type(3)));
typedef uint uint4 __attribute__((ext_vector_type(4)));
typedef uint uint8 __attribute__((ext_vector_type(8)));
typedef uint uint16 __attribute__((ext_vector_type(16)));
typedef long long2 __attribute__((ext_vector_type(2)));
typedef long long3 __attribute__((ext_vector_type(3)));
typedef long long4 __attribute__((ext_vector_type(4)));
typedef long long8 __attribute__((ext_vector_type(8)));
typedef long long16 __attribute__((ext_vector_type(16)));
typedef ulong ulong2 __attribute__((ext_vector_type(2)));
typedef ulong ulong3 __attribute__((ext_vector_type(3)));
typedef ulong ulong4 __attribute__((ext_vector_type(4)));
typedef ulong ulong8 __attribute__((ext_vector_type(8)));
typedef ulong ulong16 __attribute__((ext_vector_type(16)));
typedef float float2 __attribute__((ext_vector_type(2)));
typedef float float3 __attribute__((ext_vector_type(3)));
typedef float float4 __attribute__((ext_vector_type(4)));
typedef float float8 __attribute__((ext_vector_type(8)));
typedef float float16 __attribute__((ext_vector_type(16)));
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
typedef half half2 __attribute__((ext_vector_type(2)));
typedef half half3 __attribute__((ext_vector_type(3)));
typedef half half4 __attribute__((ext_vector_type(4)));
typedef half half8 __attribute__((ext_vector_type(8)));
typedef half half16 __attribute__((ext_vector_type(16)));
#endif
#ifdef cl_khr_fp64
#if __OPENCL_C_VERSION__ < CL_VERSION_1_2
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
typedef double double2 __attribute__((ext_vector_type(2)));
typedef double double3 __attribute__((ext_vector_type(3)));
typedef double double4 __attribute__((ext_vector_type(4)));
typedef double double8 __attribute__((ext_vector_type(8)));
typedef double double16 __attribute__((ext_vector_type(16)));
#endif
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
#define NULL ((void*)0)
#endif
/**
* Value of maximum non-infinite single-precision floating-point
* number.
*/
#define MAXFLOAT 0x1.fffffep127f
/**
* A positive float constant expression. HUGE_VALF evaluates
* to +infinity. Used as an error value returned by the built-in
* math functions.
*/
#define HUGE_VALF (__builtin_huge_valf())
/**
* A positive double constant expression. HUGE_VAL evaluates
* to +infinity. Used as an error value returned by the built-in
* math functions.
*/
#define HUGE_VAL (__builtin_huge_val())
/**
* A constant expression of type float representing positive or
* unsigned infinity.
*/
#define INFINITY (__builtin_inff())
/**
* A constant expression of type float representing a quiet NaN.
*/
#define NAN as_float(INT_MAX)
#define FP_ILOGB0 INT_MIN
#define FP_ILOGBNAN INT_MAX
#define FLT_DIG 6
#define FLT_MANT_DIG 24
#define FLT_MAX_10_EXP +38
#define FLT_MAX_EXP +128
#define FLT_MIN_10_EXP -37
#define FLT_MIN_EXP -125
#define FLT_RADIX 2
#define FLT_MAX 0x1.fffffep127f
#define FLT_MIN 0x1.0p-126f
#define FLT_EPSILON 0x1.0p-23f
#define M_E_F 2.71828182845904523536028747135266250f
#define M_LOG2E_F 1.44269504088896340735992468100189214f
#define M_LOG10E_F 0.434294481903251827651128918916605082f
#define M_LN2_F 0.693147180559945309417232121458176568f
#define M_LN10_F 2.30258509299404568401799145468436421f
#define M_PI_F 3.14159265358979323846264338327950288f
#define M_PI_2_F 1.57079632679489661923132169163975144f
#define M_PI_4_F 0.785398163397448309615660845819875721f
#define M_1_PI_F 0.318309886183790671537767526745028724f
#define M_2_PI_F 0.636619772367581343075535053490057448f
#define M_2_SQRTPI_F 1.12837916709551257389615890312154517f
#define M_SQRT2_F 1.41421356237309504880168872420969808f
#define M_SQRT1_2_F 0.707106781186547524400844362104849039f
#define DBL_DIG 15
#define DBL_MANT_DIG 53
#define DBL_MAX_10_EXP +308
#define DBL_MAX_EXP +1024
#define DBL_MIN_10_EXP -307
#define DBL_MIN_EXP -1021
#define DBL_RADIX 2
#define DBL_MAX 0x1.fffffffffffffp1023
#define DBL_MIN 0x1.0p-1022
#define DBL_EPSILON 0x1.0p-52
#define M_E 0x1.5bf0a8b145769p+1
#define M_LOG2E 0x1.71547652b82fep+0
#define M_LOG10E 0x1.bcb7b1526e50ep-2
#define M_LN2 0x1.62e42fefa39efp-1
#define M_LN10 0x1.26bb1bbb55516p+1
#define M_PI 0x1.921fb54442d18p+1
#define M_PI_2 0x1.921fb54442d18p+0
#define M_PI_4 0x1.921fb54442d18p-1
#define M_1_PI 0x1.45f306dc9c883p-2
#define M_2_PI 0x1.45f306dc9c883p-1
#define M_2_SQRTPI 0x1.20dd750429b6dp+0
#define M_SQRT2 0x1.6a09e667f3bcdp+0
#define M_SQRT1_2 0x1.6a09e667f3bcdp-1
#ifdef cl_khr_fp16
#define HALF_DIG 3
#define HALF_MANT_DIG 11
#define HALF_MAX_10_EXP +4
#define HALF_MAX_EXP +16
#define HALF_MIN_10_EXP -4
#define HALF_MIN_EXP -13
#define HALF_RADIX 2
#define HALF_MAX ((0x1.ffcp15h))
#define HALF_MIN ((0x1.0p-14h))
#define HALF_EPSILON ((0x1.0p-10h))
#define M_E_H 2.71828182845904523536028747135266250h
#define M_LOG2E_H 1.44269504088896340735992468100189214h
#define M_LOG10E_H 0.434294481903251827651128918916605082h
#define M_LN2_H 0.693147180559945309417232121458176568h
#define M_LN10_H 2.30258509299404568401799145468436421h
#define M_PI_H 3.14159265358979323846264338327950288h
#define M_PI_2_H 1.57079632679489661923132169163975144h
#define M_PI_4_H 0.785398163397448309615660845819875721h
#define M_1_PI_H 0.318309886183790671537767526745028724h
#define M_2_PI_H 0.636619772367581343075535053490057448h
#define M_2_SQRTPI_H 1.12837916709551257389615890312154517h
#define M_SQRT2_H 1.41421356237309504880168872420969808h
#define M_SQRT1_2_H 0.707106781186547524400844362104849039h
#endif //cl_khr_fp16
#define CHAR_BIT 8
#define SCHAR_MAX 127
#define SCHAR_MIN (-128)
#define UCHAR_MAX 255
#define CHAR_MAX SCHAR_MAX
#define CHAR_MIN SCHAR_MIN
#define USHRT_MAX 65535
#define SHRT_MAX 32767
#define SHRT_MIN (-32768)
#define UINT_MAX 0xffffffff
#define INT_MAX 2147483647
#define INT_MIN (-2147483647-1)
#define ULONG_MAX 0xffffffffffffffffUL
#define LONG_MAX 0x7fffffffffffffffL
#define LONG_MIN (-0x7fffffffffffffffL-1)
// OpenCL v1.1/1.2/2.0 s6.2.3 - Explicit conversions
@ -9598,8 +9347,6 @@ long8 __ovld __cnfn clamp(long8 x, long8 minval, long8 maxval);
ulong8 __ovld __cnfn clamp(ulong8 x, ulong8 minval, ulong8 maxval);
long16 __ovld __cnfn clamp(long16 x, long16 minval, long16 maxval);
ulong16 __ovld __cnfn clamp(ulong16 x, ulong16 minval, ulong16 maxval);
char __ovld __cnfn clamp(char x, char minval, char maxval);
uchar __ovld __cnfn clamp(uchar x, uchar minval, uchar maxval);
char2 __ovld __cnfn clamp(char2 x, char minval, char maxval);
uchar2 __ovld __cnfn clamp(uchar2 x, uchar minval, uchar maxval);
char3 __ovld __cnfn clamp(char3 x, char minval, char maxval);
@ -9610,8 +9357,6 @@ char8 __ovld __cnfn clamp(char8 x, char minval, char maxval);
uchar8 __ovld __cnfn clamp(uchar8 x, uchar minval, uchar maxval);
char16 __ovld __cnfn clamp(char16 x, char minval, char maxval);
uchar16 __ovld __cnfn clamp(uchar16 x, uchar minval, uchar maxval);
short __ovld __cnfn clamp(short x, short minval, short maxval);
ushort __ovld __cnfn clamp(ushort x, ushort minval, ushort maxval);
short2 __ovld __cnfn clamp(short2 x, short minval, short maxval);
ushort2 __ovld __cnfn clamp(ushort2 x, ushort minval, ushort maxval);
short3 __ovld __cnfn clamp(short3 x, short minval, short maxval);
@ -9622,8 +9367,6 @@ short8 __ovld __cnfn clamp(short8 x, short minval, short maxval);
ushort8 __ovld __cnfn clamp(ushort8 x, ushort minval, ushort maxval);
short16 __ovld __cnfn clamp(short16 x, short minval, short maxval);
ushort16 __ovld __cnfn clamp(ushort16 x, ushort minval, ushort maxval);
int __ovld __cnfn clamp(int x, int minval, int maxval);
uint __ovld __cnfn clamp(uint x, uint minval, uint maxval);
int2 __ovld __cnfn clamp(int2 x, int minval, int maxval);
uint2 __ovld __cnfn clamp(uint2 x, uint minval, uint maxval);
int3 __ovld __cnfn clamp(int3 x, int minval, int maxval);
@ -9634,8 +9377,6 @@ int8 __ovld __cnfn clamp(int8 x, int minval, int maxval);
uint8 __ovld __cnfn clamp(uint8 x, uint minval, uint maxval);
int16 __ovld __cnfn clamp(int16 x, int minval, int maxval);
uint16 __ovld __cnfn clamp(uint16 x, uint minval, uint maxval);
long __ovld __cnfn clamp(long x, long minval, long maxval);
ulong __ovld __cnfn clamp(ulong x, ulong minval, ulong maxval);
long2 __ovld __cnfn clamp(long2 x, long minval, long maxval);
ulong2 __ovld __cnfn clamp(ulong2 x, ulong minval, ulong maxval);
long3 __ovld __cnfn clamp(long3 x, long minval, long maxval);
@ -9911,8 +9652,6 @@ long8 __ovld __cnfn max(long8 x, long8 y);
ulong8 __ovld __cnfn max(ulong8 x, ulong8 y);
long16 __ovld __cnfn max(long16 x, long16 y);
ulong16 __ovld __cnfn max(ulong16 x, ulong16 y);
char __ovld __cnfn max(char x, char y);
uchar __ovld __cnfn max(uchar x, uchar y);
char2 __ovld __cnfn max(char2 x, char y);
uchar2 __ovld __cnfn max(uchar2 x, uchar y);
char3 __ovld __cnfn max(char3 x, char y);
@ -9923,8 +9662,6 @@ char8 __ovld __cnfn max(char8 x, char y);
uchar8 __ovld __cnfn max(uchar8 x, uchar y);
char16 __ovld __cnfn max(char16 x, char y);
uchar16 __ovld __cnfn max(uchar16 x, uchar y);
short __ovld __cnfn max(short x, short y);
ushort __ovld __cnfn max(ushort x, ushort y);
short2 __ovld __cnfn max(short2 x, short y);
ushort2 __ovld __cnfn max(ushort2 x, ushort y);
short3 __ovld __cnfn max(short3 x, short y);
@ -9935,8 +9672,6 @@ short8 __ovld __cnfn max(short8 x, short y);
ushort8 __ovld __cnfn max(ushort8 x, ushort y);
short16 __ovld __cnfn max(short16 x, short y);
ushort16 __ovld __cnfn max(ushort16 x, ushort y);
int __ovld __cnfn max(int x, int y);
uint __ovld __cnfn max(uint x, uint y);
int2 __ovld __cnfn max(int2 x, int y);
uint2 __ovld __cnfn max(uint2 x, uint y);
int3 __ovld __cnfn max(int3 x, int y);
@ -9947,8 +9682,6 @@ int8 __ovld __cnfn max(int8 x, int y);
uint8 __ovld __cnfn max(uint8 x, uint y);
int16 __ovld __cnfn max(int16 x, int y);
uint16 __ovld __cnfn max(uint16 x, uint y);
long __ovld __cnfn max(long x, long y);
ulong __ovld __cnfn max(ulong x, ulong y);
long2 __ovld __cnfn max(long2 x, long y);
ulong2 __ovld __cnfn max(ulong2 x, ulong y);
long3 __ovld __cnfn max(long3 x, long y);
@ -10011,8 +9744,6 @@ long8 __ovld __cnfn min(long8 x, long8 y);
ulong8 __ovld __cnfn min(ulong8 x, ulong8 y);
long16 __ovld __cnfn min(long16 x, long16 y);
ulong16 __ovld __cnfn min(ulong16 x, ulong16 y);
char __ovld __cnfn min(char x, char y);
uchar __ovld __cnfn min(uchar x, uchar y);
char2 __ovld __cnfn min(char2 x, char y);
uchar2 __ovld __cnfn min(uchar2 x, uchar y);
char3 __ovld __cnfn min(char3 x, char y);
@ -10023,8 +9754,6 @@ char8 __ovld __cnfn min(char8 x, char y);
uchar8 __ovld __cnfn min(uchar8 x, uchar y);
char16 __ovld __cnfn min(char16 x, char y);
uchar16 __ovld __cnfn min(uchar16 x, uchar y);
short __ovld __cnfn min(short x, short y);
ushort __ovld __cnfn min(ushort x, ushort y);
short2 __ovld __cnfn min(short2 x, short y);
ushort2 __ovld __cnfn min(ushort2 x, ushort y);
short3 __ovld __cnfn min(short3 x, short y);
@ -10035,8 +9764,6 @@ short8 __ovld __cnfn min(short8 x, short y);
ushort8 __ovld __cnfn min(ushort8 x, ushort y);
short16 __ovld __cnfn min(short16 x, short y);
ushort16 __ovld __cnfn min(ushort16 x, ushort y);
int __ovld __cnfn min(int x, int y);
uint __ovld __cnfn min(uint x, uint y);
int2 __ovld __cnfn min(int2 x, int y);
uint2 __ovld __cnfn min(uint2 x, uint y);
int3 __ovld __cnfn min(int3 x, int y);
@ -10047,8 +9774,6 @@ int8 __ovld __cnfn min(int8 x, int y);
uint8 __ovld __cnfn min(uint8 x, uint y);
int16 __ovld __cnfn min(int16 x, int y);
uint16 __ovld __cnfn min(uint16 x, uint y);
long __ovld __cnfn min(long x, long y);
ulong __ovld __cnfn min(ulong x, ulong y);
long2 __ovld __cnfn min(long2 x, long y);
ulong2 __ovld __cnfn min(ulong2 x, ulong y);
long3 __ovld __cnfn min(long3 x, long y);
@ -10627,7 +10352,6 @@ half3 __ovld __cnfn step(half3 edge, half3 x);
half4 __ovld __cnfn step(half4 edge, half4 x);
half8 __ovld __cnfn step(half8 edge, half8 x);
half16 __ovld __cnfn step(half16 edge, half16 x);
half __ovld __cnfn step(half edge, half x);
half2 __ovld __cnfn step(half edge, half2 x);
half3 __ovld __cnfn step(half edge, half3 x);
half4 __ovld __cnfn step(half edge, half4 x);
@ -10679,7 +10403,6 @@ half3 __ovld __cnfn smoothstep(half3 edge0, half3 edge1, half3 x);
half4 __ovld __cnfn smoothstep(half4 edge0, half4 edge1, half4 x);
half8 __ovld __cnfn smoothstep(half8 edge0, half8 edge1, half8 x);
half16 __ovld __cnfn smoothstep(half16 edge0, half16 edge1, half16 x);
half __ovld __cnfn smoothstep(half edge0, half edge1, half x);
half2 __ovld __cnfn smoothstep(half edge0, half edge1, half2 x);
half3 __ovld __cnfn smoothstep(half edge0, half edge1, half3 x);
half4 __ovld __cnfn smoothstep(half edge0, half edge1, half4 x);
@ -12777,30 +12500,6 @@ void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p);
// OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions
// Flag type and values for barrier, mem_fence, read_mem_fence, write_mem_fence
typedef uint cl_mem_fence_flags;
/**
* Queue a memory fence to ensure correct
* ordering of memory operations to local memory
*/
#define CLK_LOCAL_MEM_FENCE 0x01
/**
* Queue a memory fence to ensure correct
* ordering of memory operations to global memory
*/
#define CLK_GLOBAL_MEM_FENCE 0x02
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
/**
* Queue a memory fence to ensure correct ordering of memory
* operations between work-items of a work-group to
* image memory.
*/
#define CLK_IMAGE_MEM_FENCE 0x04
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
/**
* All work-items in a work-group executing the kernel
* on a processor must execute this function before any
@ -12834,17 +12533,6 @@ typedef uint cl_mem_fence_flags;
void __ovld __conv barrier(cl_mem_fence_flags flags);
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
typedef enum memory_scope {
memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
#endif
} memory_scope;
void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
void __ovld __conv work_group_barrier(cl_mem_fence_flags flags);
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
@ -13341,6 +13029,10 @@ int __ovld atomic_add(volatile __global int *p, int val);
unsigned int __ovld atomic_add(volatile __global unsigned int *p, unsigned int val);
int __ovld atomic_add(volatile __local int *p, int val);
unsigned int __ovld atomic_add(volatile __local unsigned int *p, unsigned int val);
#ifdef __OPENCL_CPP_VERSION__
int __ovld atomic_add(volatile int *p, int val);
unsigned int __ovld atomic_add(volatile unsigned int *p, unsigned int val);
#endif
#if defined(cl_khr_global_int32_base_atomics)
int __ovld atom_add(volatile __global int *p, int val);
@ -13367,6 +13059,10 @@ int __ovld atomic_sub(volatile __global int *p, int val);
unsigned int __ovld atomic_sub(volatile __global unsigned int *p, unsigned int val);
int __ovld atomic_sub(volatile __local int *p, int val);
unsigned int __ovld atomic_sub(volatile __local unsigned int *p, unsigned int val);
#ifdef __OPENCL_CPP_VERSION__
int __ovld atomic_sub(volatile int *p, int val);
unsigned int __ovld atomic_sub(volatile unsigned int *p, unsigned int val);
#endif
#if defined(cl_khr_global_int32_base_atomics)
int __ovld atom_sub(volatile __global int *p, int val);
@ -13395,6 +13091,11 @@ int __ovld atomic_xchg(volatile __local int *p, int val);
unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, unsigned int val);
float __ovld atomic_xchg(volatile __global float *p, float val);
float __ovld atomic_xchg(volatile __local float *p, float val);
#ifdef __OPENCL_CPP_VERSION__
int __ovld atomic_xchg(volatile int *p, int val);
unsigned int __ovld atomic_xchg(volatile unsigned int *p, unsigned int val);
float __ovld atomic_xchg(volatile float *p, float val);
#endif
#if defined(cl_khr_global_int32_base_atomics)
int __ovld atom_xchg(volatile __global int *p, int val);
@ -13422,6 +13123,10 @@ int __ovld atomic_inc(volatile __global int *p);
unsigned int __ovld atomic_inc(volatile __global unsigned int *p);
int __ovld atomic_inc(volatile __local int *p);
unsigned int __ovld atomic_inc(volatile __local unsigned int *p);
#ifdef __OPENCL_CPP_VERSION__
int __ovld atomic_inc(volatile int *p);
unsigned int __ovld atomic_inc(volatile unsigned int *p);
#endif
#if defined(cl_khr_global_int32_base_atomics)
int __ovld atom_inc(volatile __global int *p);
@ -13449,6 +13154,10 @@ int __ovld atomic_dec(volatile __global int *p);
unsigned int __ovld atomic_dec(volatile __global unsigned int *p);
int __ovld atomic_dec(volatile __local int *p);
unsigned int __ovld atomic_dec(volatile __local unsigned int *p);
#ifdef __OPENCL_CPP_VERSION__
int __ovld atomic_dec(volatile int *p);
unsigned int __ovld atomic_dec(volatile unsigned int *p);
#endif
#if defined(cl_khr_global_int32_base_atomics)
int __ovld atom_dec(volatile __global int *p);
@ -13477,6 +13186,10 @@ int __ovld atomic_cmpxchg(volatile __global int *p, int cmp, int val);
unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val);
int __ovld atomic_cmpxchg(volatile __local int *p, int cmp, int val);
unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val);
#ifdef __OPENCL_CPP_VERSION__
int __ovld atomic_cmpxchg(volatile int *p, int cmp, int val);
unsigned int __ovld atomic_cmpxchg(volatile unsigned int *p, unsigned int cmp, unsigned int val);
#endif
#if defined(cl_khr_global_int32_base_atomics)
int __ovld atom_cmpxchg(volatile __global int *p, int cmp, int val);
@ -13505,6 +13218,10 @@ int __ovld atomic_min(volatile __global int *p, int val);
unsigned int __ovld atomic_min(volatile __global unsigned int *p, unsigned int val);
int __ovld atomic_min(volatile __local int *p, int val);
unsigned int __ovld atomic_min(volatile __local unsigned int *p, unsigned int val);
#ifdef __OPENCL_CPP_VERSION__
int __ovld atomic_min(volatile int *p, int val);
unsigned int __ovld atomic_min(volatile unsigned int *p, unsigned int val);
#endif
#if defined(cl_khr_global_int32_extended_atomics)
int __ovld atom_min(volatile __global int *p, int val);
@ -13533,6 +13250,10 @@ int __ovld atomic_max(volatile __global int *p, int val);
unsigned int __ovld atomic_max(volatile __global unsigned int *p, unsigned int val);
int __ovld atomic_max(volatile __local int *p, int val);
unsigned int __ovld atomic_max(volatile __local unsigned int *p, unsigned int val);
#ifdef __OPENCL_CPP_VERSION__
int __ovld atomic_max(volatile int *p, int val);
unsigned int __ovld atomic_max(volatile unsigned int *p, unsigned int val);
#endif
#if defined(cl_khr_global_int32_extended_atomics)
int __ovld atom_max(volatile __global int *p, int val);
@ -13560,6 +13281,10 @@ int __ovld atomic_and(volatile __global int *p, int val);
unsigned int __ovld atomic_and(volatile __global unsigned int *p, unsigned int val);
int __ovld atomic_and(volatile __local int *p, int val);
unsigned int __ovld atomic_and(volatile __local unsigned int *p, unsigned int val);
#ifdef __OPENCL_CPP_VERSION__
int __ovld atomic_and(volatile int *p, int val);
unsigned int __ovld atomic_and(volatile unsigned int *p, unsigned int val);
#endif
#if defined(cl_khr_global_int32_extended_atomics)
int __ovld atom_and(volatile __global int *p, int val);
@ -13587,6 +13312,10 @@ int __ovld atomic_or(volatile __global int *p, int val);
unsigned int __ovld atomic_or(volatile __global unsigned int *p, unsigned int val);
int __ovld atomic_or(volatile __local int *p, int val);
unsigned int __ovld atomic_or(volatile __local unsigned int *p, unsigned int val);
#ifdef __OPENCL_CPP_VERSION__
int __ovld atomic_or(volatile int *p, int val);
unsigned int __ovld atomic_or(volatile unsigned int *p, unsigned int val);
#endif
#if defined(cl_khr_global_int32_extended_atomics)
int __ovld atom_or(volatile __global int *p, int val);
@ -13614,6 +13343,10 @@ int __ovld atomic_xor(volatile __global int *p, int val);
unsigned int __ovld atomic_xor(volatile __global unsigned int *p, unsigned int val);
int __ovld atomic_xor(volatile __local int *p, int val);
unsigned int __ovld atomic_xor(volatile __local unsigned int *p, unsigned int val);
#ifdef __OPENCL_CPP_VERSION__
int __ovld atomic_xor(volatile int *p, int val);
unsigned int __ovld atomic_xor(volatile unsigned int *p, unsigned int val);
#endif
#if defined(cl_khr_global_int32_extended_atomics)
int __ovld atom_xor(volatile __global int *p, int val);
@ -13639,20 +13372,6 @@ unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long v
// OpenCL v2.0 s6.13.11 - Atomics Functions
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
#ifndef ATOMIC_VAR_INIT
#define ATOMIC_VAR_INIT(x) (x)
#endif //ATOMIC_VAR_INIT
#define ATOMIC_FLAG_INIT 0
// enum values aligned with what clang uses in EmitAtomicExpr()
typedef enum memory_order
{
memory_order_relaxed = __ATOMIC_RELAXED,
memory_order_acquire = __ATOMIC_ACQUIRE,
memory_order_release = __ATOMIC_RELEASE,
memory_order_acq_rel = __ATOMIC_ACQ_REL,
memory_order_seq_cst = __ATOMIC_SEQ_CST
} memory_order;
// double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics
#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
@ -14470,33 +14189,11 @@ half16 __ovld __cnfn shuffle2(half16 x, half16 y, ushort16 mask);
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
// OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf
int printf(__constant const char* st, ...);
int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)));
#endif
// OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions
// These values need to match the runtime equivalent
//
// Addressing Mode.
//
#define CLK_ADDRESS_NONE 0
#define CLK_ADDRESS_CLAMP_TO_EDGE 2
#define CLK_ADDRESS_CLAMP 4
#define CLK_ADDRESS_REPEAT 6
#define CLK_ADDRESS_MIRRORED_REPEAT 8
//
// Coordination Normalization
//
#define CLK_NORMALIZED_COORDS_FALSE 0
#define CLK_NORMALIZED_COORDS_TRUE 1
//
// Filtering Mode.
//
#define CLK_FILTER_NEAREST 0x10
#define CLK_FILTER_LINEAR 0x20
#ifdef cl_khr_gl_msaa_sharing
#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable
#endif //cl_khr_gl_msaa_sharing
@ -14712,30 +14409,6 @@ float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler,
int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);
uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);
float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod);
int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float lod);
uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float lod);
float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);
int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);
uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);
float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float lod);
float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);
float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);
int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);
uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);
#endif //cl_khr_mipmap_image
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
@ -14895,29 +14568,6 @@ float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler
int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);
uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);
float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);
int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);
uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float lod);
float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);
int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);
uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);
float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float lod);
float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);
float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);
int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);
uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);
#endif //cl_khr_mipmap_image
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
@ -15332,26 +14982,6 @@ int __ovld get_image_num_mip_levels(read_write image2d_depth_t image);
* CLK_FLOAT
*/
//
// Channel Datatype.
//
#define CLK_SNORM_INT8 0x10D0
#define CLK_SNORM_INT16 0x10D1
#define CLK_UNORM_INT8 0x10D2
#define CLK_UNORM_INT16 0x10D3
#define CLK_UNORM_SHORT_565 0x10D4
#define CLK_UNORM_SHORT_555 0x10D5
#define CLK_UNORM_INT_101010 0x10D6
#define CLK_SIGNED_INT8 0x10D7
#define CLK_SIGNED_INT16 0x10D8
#define CLK_SIGNED_INT32 0x10D9
#define CLK_UNSIGNED_INT8 0x10DA
#define CLK_UNSIGNED_INT16 0x10DB
#define CLK_UNSIGNED_INT32 0x10DC
#define CLK_HALF_FLOAT 0x10DD
#define CLK_FLOAT 0x10DE
#define CLK_UNORM_INT24 0x10DF
int __ovld __cnfn get_image_channel_data_type(read_only image1d_t image);
int __ovld __cnfn get_image_channel_data_type(read_only image1d_buffer_t image);
int __ovld __cnfn get_image_channel_data_type(read_only image2d_t image);
@ -15423,30 +15053,6 @@ int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_dept
* CLK_INTENSITY
* CLK_LUMINANCE
*/
// Channel order, numbering must be aligned with cl_channel_order in cl.h
//
#define CLK_R 0x10B0
#define CLK_A 0x10B1
#define CLK_RG 0x10B2
#define CLK_RA 0x10B3
#define CLK_RGB 0x10B4
#define CLK_RGBA 0x10B5
#define CLK_BGRA 0x10B6
#define CLK_ARGB 0x10B7
#define CLK_INTENSITY 0x10B8
#define CLK_LUMINANCE 0x10B9
#define CLK_Rx 0x10BA
#define CLK_RGx 0x10BB
#define CLK_RGBx 0x10BC
#define CLK_DEPTH 0x10BD
#define CLK_DEPTH_STENCIL 0x10BE
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
#define CLK_sRGB 0x10BF
#define CLK_sRGBx 0x10C0
#define CLK_sRGBA 0x10C1
#define CLK_sBGRA 0x10C2
#define CLK_ABGR 0x10C3
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
int __ovld __cnfn get_image_channel_order(read_only image1d_t image);
int __ovld __cnfn get_image_channel_order(read_only image1d_buffer_t image);
@ -15605,20 +15211,17 @@ size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t
#if defined(cl_khr_gl_msaa_sharing)
int __ovld get_image_num_samples(read_only image2d_msaa_t image);
int __ovld get_image_num_samples(read_only image2d_msaa_depth_t image);
int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image);
int __ovld get_image_num_samples(read_only image2d_array_msaa_t image);
int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image);
int __ovld get_image_num_samples(write_only image2d_msaa_t image);
int __ovld get_image_num_samples(write_only image2d_msaa_depth_t image);
int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);
int __ovld get_image_num_samples(write_only image2d_array_msaa_t image);
int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
int __ovld get_image_num_samples(read_write image2d_msaa_t image);
int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image);
int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);
int __ovld get_image_num_samples(read_write image2d_array_msaa_t image);
int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
@ -15728,7 +15331,6 @@ double __ovld __conv work_group_scan_inclusive_max(double x);
// OpenCL v2.0 s6.13.16 - Pipe Functions
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t))
bool __ovld is_valid_reserve_id(reserve_id_t reserve_id);
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
@ -15736,44 +15338,6 @@ bool __ovld is_valid_reserve_id(reserve_id_t reserve_id);
// OpenCL v2.0 s6.13.17 - Enqueue Kernels
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
#define CL_COMPLETE 0x0
#define CL_RUNNING 0x1
#define CL_SUBMITTED 0x2
#define CL_QUEUED 0x3
#define CLK_SUCCESS 0
#define CLK_ENQUEUE_FAILURE -101
#define CLK_INVALID_QUEUE -102
#define CLK_INVALID_NDRANGE -160
#define CLK_INVALID_EVENT_WAIT_LIST -57
#define CLK_DEVICE_QUEUE_FULL -161
#define CLK_INVALID_ARG_SIZE -51
#define CLK_EVENT_ALLOCATION_FAILURE -100
#define CLK_OUT_OF_RESOURCES -5
#define CLK_NULL_QUEUE 0
#define CLK_NULL_EVENT (__builtin_astype(((void*)(__SIZE_MAX__)), clk_event_t))
// execution model related definitions
#define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0
#define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0x1
#define CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP 0x2
typedef int kernel_enqueue_flags_t;
typedef int clk_profiling_info;
// Profiling info name (see capture_event_profiling_info)
#define CLK_PROFILING_COMMAND_EXEC_TIME 0x1
#define MAX_WORK_DIM 3
typedef struct {
unsigned int workDimension;
size_t globalWorkOffset[MAX_WORK_DIM];
size_t globalWorkSize[MAX_WORK_DIM];
size_t localWorkSize[MAX_WORK_DIM];
} ndrange_t;
ndrange_t __ovld ndrange_1D(size_t);
ndrange_t __ovld ndrange_1D(size_t, size_t);
ndrange_t __ovld ndrange_1D(size_t, size_t, size_t);
@ -16216,138 +15780,6 @@ void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, u
#ifdef cl_intel_device_side_avc_motion_estimation
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin
#define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0
#define CLK_AVC_ME_MAJOR_16x8_INTEL 0x1
#define CLK_AVC_ME_MAJOR_8x16_INTEL 0x2
#define CLK_AVC_ME_MAJOR_8x8_INTEL 0x3
#define CLK_AVC_ME_MINOR_8x8_INTEL 0x0
#define CLK_AVC_ME_MINOR_8x4_INTEL 0x1
#define CLK_AVC_ME_MINOR_4x8_INTEL 0x2
#define CLK_AVC_ME_MINOR_4x4_INTEL 0x3
#define CLK_AVC_ME_MAJOR_FORWARD_INTEL 0x0
#define CLK_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
#define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
#define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
#define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
#define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
#define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
#define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
#define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
#define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
#define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
#define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
#define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
#define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
#define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
#define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
#define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
#define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
#define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
#define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
#define CLK_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
#define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
#define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
#define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
#define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
#define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
#define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
#define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
#define CLK_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
#define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
#define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
#define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
#define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
#define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
#define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
#define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
#define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
#define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
#define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
#define CLK_AVC_ME_INTRA_16x16_INTEL 0x0
#define CLK_AVC_ME_INTRA_8x8_INTEL 0x1
#define CLK_AVC_ME_INTRA_4x4_INTEL 0x2
#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
#define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL (0x1 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL (0x2 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL (0x3 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL (0x55 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL (0xAA << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL (0xFF << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL (0x1 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL (0x2 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL (0x1 << 26)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL (0x2 << 26)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL (0x1 << 28)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL (0x2 << 28)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL (0x1 << 30)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL (0x2 << 30)
#define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
#define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL 0x0
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
#define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
#define CLK_AVC_ME_FRAME_FORWARD_INTEL 0x1
#define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2
#define CLK_AVC_ME_FRAME_DUAL_INTEL 0x3
#define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
#define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
#define CLK_AVC_ME_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL 0x0
#define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL 0x0
#define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_INITIALIZE_INTEL 0x0
#define CLK_AVC_REF_RESULT_INITIALIZE_INTEL 0x0
#define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0
// MCE built-in functions
uchar __ovld
intel_sub_group_avc_mce_get_default_inter_base_multi_reference_penalty(
@ -17034,6 +16466,34 @@ uint8 __ovld amd_sadw(uint8 src0, uint8 src1, uint8 src2);
uint16 __ovld amd_sadw(uint16 src0, uint16 src1, uint16 src2);
#endif // cl_amd_media_ops2
#if defined(cl_arm_integer_dot_product_int8)
#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : begin
uint __ovld arm_dot(uchar4 a, uchar4 b);
int __ovld arm_dot(char4 a, char4 b);
#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : end
#endif // defined(cl_arm_integer_dot_product_int8)
#if defined(cl_arm_integer_dot_product_accumulate_int8)
#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : begin
uint __ovld arm_dot_acc(uchar4 a, uchar4 b, uint c);
int __ovld arm_dot_acc(char4 a, char4 b, int c);
#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : end
#endif // defined(cl_arm_integer_dot_product_accumulate_int8)
#if defined(cl_arm_integer_dot_product_accumulate_int16)
#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int16 : begin
uint __ovld arm_dot_acc(ushort2 a, ushort2 b, uint c);
int __ovld arm_dot_acc(short2 a, short2 b, int c);
#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int16 : end
#endif // defined(cl_arm_integer_dot_product_accumulate_int16)
#if defined(cl_arm_integer_dot_product_accumulate_saturate_int8)
#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_saturate_int8 : begin
uint __ovld arm_dot_acc_sat(uchar4 a, uchar4 b, uint c);
int __ovld arm_dot_acc_sat(char4 a, char4 b, int c);
#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_saturate_int8 : end
#endif // defined(cl_arm_integer_dot_product_accumulate_saturate_int8)
// Disable any extensions we may have enabled previously.
#pragma OPENCL EXTENSION all : disable

View File

@ -0,0 +1,35 @@
/*===---- __clang_openmp_math.h - OpenMP target math support ---------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#if defined(__NVPTX__) && defined(_OPENMP)
/// TODO:
/// We are currently reusing the functionality of the Clang-CUDA code path
/// as an alternative to the host declarations provided by math.h and cmath.
/// This is suboptimal.
///
/// We should instead declare the device functions in a similar way, e.g.,
/// through OpenMP 5.0 variants, and afterwards populate the module with the
/// host declarations by unconditionally including the host math.h or cmath,
/// respectively. This is actually what the Clang-CUDA code path does, using
/// __device__ instead of variants to avoid redeclarations and get the desired
/// overload resolution.
#define __CUDA__
#if defined(__cplusplus)
#include <__clang_cuda_cmath.h>
#endif
#undef __CUDA__
/// Magic macro for stopping the math.h/cmath host header from being included.
#define __CLANG_NO_HOST_MATH__
#endif

View File

@ -0,0 +1,33 @@
/*===---- __clang_openmp_math_declares.h - OpenMP math declares ------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __CLANG_OPENMP_MATH_DECLARES_H__
#define __CLANG_OPENMP_MATH_DECLARES_H__
#ifndef _OPENMP
#error "This file is for OpenMP compilation only."
#endif
#if defined(__NVPTX__) && defined(_OPENMP)
#define __CUDA__
#if defined(__cplusplus)
#include <__clang_cuda_math_forward_declares.h>
#endif
/// Include declarations for libdevice functions.
#include <__clang_cuda_libdevice_declares.h>
/// Provide definitions for these functions.
#include <__clang_cuda_device_functions.h>
#undef __CUDA__
#endif
#endif

View File

@ -0,0 +1,16 @@
/*===-------------- cmath - Alternative cmath header -----------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#include <__clang_openmp_math.h>
#ifndef __CLANG_NO_HOST_MATH__
#include_next <cmath>
#else
#undef __CLANG_NO_HOST_MATH__
#endif

View File

@ -0,0 +1,17 @@
/*===------------- math.h - Alternative math.h header ----------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#include <__clang_openmp_math.h>
#ifndef __CLANG_NO_HOST_MATH__
#include_next <math.h>
#else
#undef __CLANG_NO_HOST_MATH__
#endif

View File

@ -1,22 +1,8 @@
/*===---- pconfigintrin.h - X86 platform configuration ---------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -30,6 +16,8 @@
#define __PCONFIG_KEY_PROGRAM 0x00000001
#if __has_extension(gnu_asm)
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("pconfig")))
@ -47,4 +35,6 @@ _pconfig_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])
#undef __DEFAULT_FN_ATTRS
#endif /* __has_extension(gnu_asm) */
#endif

View File

@ -1,23 +1,9 @@
/*===---- pkuintrin.h - PKU intrinsics -------------------------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- popcntintrin.h - POPCNT intrinsics -------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
@ -43,22 +29,6 @@ _mm_popcnt_u32(unsigned int __A)
return __builtin_popcount(__A);
}
/// Counts the number of bits in the source operand having a value of 1.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> POPCNT </c> instruction.
///
/// \param __A
/// A signed 32-bit integer operand.
/// \returns A 32-bit integer containing the number of bits with value 1 in the
/// source operand.
static __inline__ int __DEFAULT_FN_ATTRS
_popcnt32(int __A)
{
return __builtin_popcount(__A);
}
#ifdef __x86_64__
/// Counts the number of bits in the source operand having a value of 1.
///
@ -75,22 +45,6 @@ _mm_popcnt_u64(unsigned long long __A)
{
return __builtin_popcountll(__A);
}
/// Counts the number of bits in the source operand having a value of 1.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> POPCNT </c> instruction.
///
/// \param __A
/// A signed 64-bit integer operand.
/// \returns A 64-bit integer containing the number of bits with value 1 in the
/// source operand.
static __inline__ long long __DEFAULT_FN_ATTRS
_popcnt64(long long __A)
{
return __builtin_popcountll(__A);
}
#endif /* __x86_64__ */
#undef __DEFAULT_FN_ATTRS

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,44 @@
/*===---- mm_malloc.h - Implementation of _mm_malloc and _mm_free ----------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef _MM_MALLOC_H_INCLUDED
#define _MM_MALLOC_H_INCLUDED
#include <stdlib.h>
/* We can't depend on <stdlib.h> since the prototype of posix_memalign
may not be visible. */
#ifndef __cplusplus
extern int posix_memalign (void **, size_t, size_t);
#else
extern "C" int posix_memalign (void **, size_t, size_t) throw ();
#endif
static __inline void *
_mm_malloc (size_t size, size_t alignment)
{
/* PowerPC64 ELF V2 ABI requires quadword alignment. */
size_t vec_align = sizeof (__vector float);
void *ptr;
if (alignment < vec_align)
alignment = vec_align;
if (posix_memalign (&ptr, alignment, size) == 0)
return ptr;
else
return NULL;
}
static __inline void
_mm_free (void * ptr)
{
free (ptr);
}
#endif /* _MM_MALLOC_H_INCLUDED */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,22 +1,8 @@
/*===---- prfchwintrin.h - PREFETCHW intrinsic -----------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===------------ ptwriteintrin.h - PTWRITE intrinsic --------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- rdseedintrin.h - RDSEED intrinsics -------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- rtmintrin.h - RTM intrinsics -------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

View File

@ -1,22 +1,8 @@
/*===---- s390intrin.h - SystemZ intrinsics --------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

Some files were not shown because too many files have changed in this diff Show More