libobs: Update to SIMDe 0.7.1
c3d7abfaba
Simplify usage of the SIMDe header
This obviates the need for sse2neon as well and fixes compilation of all
plugins that referenced sse-intrin.h on all architectures, not just
arm*.
master
parent
fdd34c35fc
commit
1e96573328
|
@ -123,18 +123,14 @@ else ()
|
|||
endif ()
|
||||
|
||||
if(LOWERCASE_CMAKE_SYSTEM_PROCESSOR MATCHES "(i[3-6]86|x86|x64|x86_64|amd64|e2k)")
|
||||
set(NEEDS_SIMDE "0")
|
||||
if(NOT MSVC)
|
||||
set(ARCH_SIMD_FLAGS "-mmmx" "-msse" "-msse2")
|
||||
endif()
|
||||
elseif(LOWERCASE_CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64(le)?")
|
||||
set(NEEDS_SIMDE "0")
|
||||
set(ARCH_SIMD_DEFINES "-DNO_WARN_X86_INTRINSICS")
|
||||
set(ARCH_SIMD_FLAGS "-mvsx")
|
||||
add_compile_definitions(NO_WARN_X86_INTRINSICS)
|
||||
else()
|
||||
set(NEEDS_SIMDE "1")
|
||||
add_definitions(-DNEEDS_SIMDE=1)
|
||||
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSIMDE_ENABLE_OPENMP")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DSIMDE_ENABLE_OPENMP")
|
||||
|
|
|
@ -188,20 +188,8 @@ elseif(UNIX)
|
|||
util/pipe-posix.c
|
||||
util/platform-nix.c)
|
||||
|
||||
if(NEEDS_SIMDE)
|
||||
set(libobs_PLATFORM_HEADERS
|
||||
util/simde/check.h
|
||||
util/simde/hedley.h
|
||||
util/simde/mmx.h
|
||||
util/simde/simde-arch.h
|
||||
util/simde/simde-common.h
|
||||
util/simde/sse.h
|
||||
util/simde/sse2.h
|
||||
util/threading-posix.h)
|
||||
else()
|
||||
set(libobs_PLATFORM_HEADERS
|
||||
util/threading-posix.h)
|
||||
endif()
|
||||
set(libobs_PLATFORM_HEADERS
|
||||
util/threading-posix.h)
|
||||
|
||||
if(HAVE_PULSEAUDIO)
|
||||
set(libobs_audio_monitoring_HEADERS
|
||||
|
@ -369,7 +357,6 @@ set(libobs_util_SOURCES
|
|||
set(libobs_util_HEADERS
|
||||
util/curl/curl-helper.h
|
||||
util/sse-intrin.h
|
||||
util/sse2neon.h
|
||||
util/array-serializer.h
|
||||
util/file-serializer.h
|
||||
util/utf8.h
|
||||
|
@ -419,6 +406,20 @@ set(libobs_libobs_SOURCES
|
|||
obs-video-gpu-encode.c
|
||||
obs-video.c)
|
||||
set(libobs_libobs_HEADERS
|
||||
util/simde/check.h
|
||||
util/simde/debug-trap.h
|
||||
util/simde/hedley.h
|
||||
util/simde/simde-align.h
|
||||
util/simde/simde-arch.h
|
||||
util/simde/simde-common.h
|
||||
util/simde/simde-constify.h
|
||||
util/simde/simde-detect-clang.h
|
||||
util/simde/simde-diagnostic.h
|
||||
util/simde/simde-features.h
|
||||
util/simde/simde-math.h
|
||||
util/simde/x86/mmx.h
|
||||
util/simde/x86/sse2.h
|
||||
util/simde/x86/sse.h
|
||||
${libobs_PLATFORM_HEADERS}
|
||||
obs-audio-controls.h
|
||||
obs-defs.h
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
#define HAVE_DBUS @HAVE_DBUS@
|
||||
#define HAVE_PULSEAUDIO @HAVE_PULSEAUDIO@
|
||||
#define USE_XINPUT @USE_XINPUT@
|
||||
#define NEEDS_SIMDE @NEEDS_SIMDE@
|
||||
#define LIBOBS_IMAGEMAGICK_DIR_STYLE_6L 6
|
||||
#define LIBOBS_IMAGEMAGICK_DIR_STYLE_7GE 7
|
||||
#define LIBOBS_IMAGEMAGICK_DIR_STYLE @LIBOBS_IMAGEMAGICK_DIR_STYLE@
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
This is a slightly modified version of https://github.com/nemequ/simde/commit/cafec4b952fa5a31a51a10326f97c2e7c9067771
|
||||
sse{,2}.h and mmx.h was moved down from the original "x86" subdirectory,
|
||||
subsequently the '#include "../simde-common.h"' line in mmx.h was changed to '#include "simde-common.h"'
|
||||
This is a slightly modified version of the simde directory in
|
||||
https://github.com/simd-everywhere/simde/commit/c3d7abfaba6729a8b11d09a314b34a4db628911d
|
||||
Unused files have removed.
|
||||
|
||||
Then the code was reformatted using the "formatcode.sh" script in the root of this repository.
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#endif
|
||||
|
||||
#include "hedley.h"
|
||||
#include "simde-diagnostic.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
|
|
|
@ -10,11 +10,11 @@
|
|||
* SPDX-License-Identifier: CC0-1.0
|
||||
*/
|
||||
|
||||
#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 12)
|
||||
#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 14)
|
||||
#if defined(HEDLEY_VERSION)
|
||||
#undef HEDLEY_VERSION
|
||||
#endif
|
||||
#define HEDLEY_VERSION 12
|
||||
#define HEDLEY_VERSION 14
|
||||
|
||||
#if defined(HEDLEY_STRINGIFY_EX)
|
||||
#undef HEDLEY_STRINGIFY_EX
|
||||
|
@ -36,6 +36,16 @@
|
|||
#endif
|
||||
#define HEDLEY_CONCAT(a, b) HEDLEY_CONCAT_EX(a, b)
|
||||
|
||||
#if defined(HEDLEY_CONCAT3_EX)
|
||||
#undef HEDLEY_CONCAT3_EX
|
||||
#endif
|
||||
#define HEDLEY_CONCAT3_EX(a, b, c) a##b##c
|
||||
|
||||
#if defined(HEDLEY_CONCAT3)
|
||||
#undef HEDLEY_CONCAT3
|
||||
#endif
|
||||
#define HEDLEY_CONCAT3(a, b, c) HEDLEY_CONCAT3_EX(a, b, c)
|
||||
|
||||
#if defined(HEDLEY_VERSION_ENCODE)
|
||||
#undef HEDLEY_VERSION_ENCODE
|
||||
#endif
|
||||
|
@ -80,17 +90,17 @@
|
|||
#if defined(HEDLEY_MSVC_VERSION)
|
||||
#undef HEDLEY_MSVC_VERSION
|
||||
#endif
|
||||
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000)
|
||||
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL)
|
||||
#define HEDLEY_MSVC_VERSION \
|
||||
HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, \
|
||||
(_MSC_FULL_VER % 10000000) / 100000, \
|
||||
(_MSC_FULL_VER % 100000) / 100)
|
||||
#elif defined(_MSC_FULL_VER)
|
||||
#elif defined(_MSC_FULL_VER) && !defined(__ICL)
|
||||
#define HEDLEY_MSVC_VERSION \
|
||||
HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, \
|
||||
(_MSC_FULL_VER % 1000000) / 10000, \
|
||||
(_MSC_FULL_VER % 10000) / 10)
|
||||
#elif defined(_MSC_VER)
|
||||
#elif defined(_MSC_VER) && !defined(__ICL)
|
||||
#define HEDLEY_MSVC_VERSION \
|
||||
HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0)
|
||||
#endif
|
||||
|
@ -98,7 +108,7 @@
|
|||
#if defined(HEDLEY_MSVC_VERSION_CHECK)
|
||||
#undef HEDLEY_MSVC_VERSION_CHECK
|
||||
#endif
|
||||
#if !defined(_MSC_VER)
|
||||
#if !defined(HEDLEY_MSVC_VERSION)
|
||||
#define HEDLEY_MSVC_VERSION_CHECK(major, minor, patch) (0)
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
|
||||
#define HEDLEY_MSVC_VERSION_CHECK(major, minor, patch) \
|
||||
|
@ -114,11 +124,12 @@
|
|||
#if defined(HEDLEY_INTEL_VERSION)
|
||||
#undef HEDLEY_INTEL_VERSION
|
||||
#endif
|
||||
#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE)
|
||||
#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && \
|
||||
!defined(__ICL)
|
||||
#define HEDLEY_INTEL_VERSION \
|
||||
HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, \
|
||||
__INTEL_COMPILER_UPDATE)
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
#elif defined(__INTEL_COMPILER) && !defined(__ICL)
|
||||
#define HEDLEY_INTEL_VERSION \
|
||||
HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0)
|
||||
#endif
|
||||
|
@ -133,6 +144,25 @@
|
|||
#define HEDLEY_INTEL_VERSION_CHECK(major, minor, patch) (0)
|
||||
#endif
|
||||
|
||||
#if defined(HEDLEY_INTEL_CL_VERSION)
|
||||
#undef HEDLEY_INTEL_CL_VERSION
|
||||
#endif
|
||||
#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && \
|
||||
defined(__ICL)
|
||||
#define HEDLEY_INTEL_CL_VERSION \
|
||||
HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0)
|
||||
#endif
|
||||
|
||||
#if defined(HEDLEY_INTEL_CL_VERSION_CHECK)
|
||||
#undef HEDLEY_INTEL_CL_VERSION_CHECK
|
||||
#endif
|
||||
#if defined(HEDLEY_INTEL_CL_VERSION)
|
||||
#define HEDLEY_INTEL_CL_VERSION_CHECK(major, minor, patch) \
|
||||
(HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
|
||||
#else
|
||||
#define HEDLEY_INTEL_CL_VERSION_CHECK(major, minor, patch) (0)
|
||||
#endif
|
||||
|
||||
#if defined(HEDLEY_PGI_VERSION)
|
||||
#undef HEDLEY_PGI_VERSION
|
||||
#endif
|
||||
|
@ -788,6 +818,68 @@
|
|||
HEDLEY_GCC_VERSION_CHECK(major, minor, patch)
|
||||
#endif
|
||||
|
||||
#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \
|
||||
defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3, 0, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
|
||||
HEDLEY_IAR_VERSION_CHECK(8, 0, 0) || \
|
||||
HEDLEY_PGI_VERSION_CHECK(18, 4, 0) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
|
||||
HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \
|
||||
HEDLEY_TI_ARMCL_VERSION_CHECK(4, 7, 0) || \
|
||||
HEDLEY_TI_CL430_VERSION_CHECK(2, 0, 1) || \
|
||||
HEDLEY_TI_CL2000_VERSION_CHECK(6, 1, 0) || \
|
||||
HEDLEY_TI_CL6X_VERSION_CHECK(7, 0, 0) || \
|
||||
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
|
||||
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) || \
|
||||
HEDLEY_CRAY_VERSION_CHECK(5, 0, 0) || \
|
||||
HEDLEY_TINYC_VERSION_CHECK(0, 9, 17) || \
|
||||
HEDLEY_SUNPRO_VERSION_CHECK(8, 0, 0) || \
|
||||
(HEDLEY_IBM_VERSION_CHECK(10, 1, 0) && defined(__C99_PRAGMA_OPERATOR))
|
||||
#define HEDLEY_PRAGMA(value) _Pragma(#value)
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0)
|
||||
#define HEDLEY_PRAGMA(value) __pragma(value)
|
||||
#else
|
||||
#define HEDLEY_PRAGMA(value)
|
||||
#endif
|
||||
|
||||
#if defined(HEDLEY_DIAGNOSTIC_PUSH)
|
||||
#undef HEDLEY_DIAGNOSTIC_PUSH
|
||||
#endif
|
||||
#if defined(HEDLEY_DIAGNOSTIC_POP)
|
||||
#undef HEDLEY_DIAGNOSTIC_POP
|
||||
#endif
|
||||
#if defined(__clang__)
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
|
||||
#define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
|
||||
#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
|
||||
#define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
|
||||
#elif HEDLEY_GCC_VERSION_CHECK(4, 6, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push")
|
||||
#define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop")
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0) || \
|
||||
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push))
|
||||
#define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop))
|
||||
#elif HEDLEY_ARM_VERSION_CHECK(5, 6, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push")
|
||||
#define HEDLEY_DIAGNOSTIC_POP _Pragma("pop")
|
||||
#elif HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \
|
||||
HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \
|
||||
HEDLEY_TI_CL430_VERSION_CHECK(4, 4, 0) || \
|
||||
HEDLEY_TI_CL6X_VERSION_CHECK(8, 1, 0) || \
|
||||
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
|
||||
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push")
|
||||
#define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop")
|
||||
#elif HEDLEY_PELLES_VERSION_CHECK(2, 90, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
|
||||
#define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
|
||||
#else
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH
|
||||
#define HEDLEY_DIAGNOSTIC_POP
|
||||
#endif
|
||||
|
||||
/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for
|
||||
HEDLEY INTERNAL USE ONLY. API subject to change without notice. */
|
||||
#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_)
|
||||
|
@ -796,11 +888,20 @@
|
|||
#if defined(__cplusplus)
|
||||
#if HEDLEY_HAS_WARNING("-Wc++98-compat")
|
||||
#if HEDLEY_HAS_WARNING("-Wc++17-extensions")
|
||||
#if HEDLEY_HAS_WARNING("-Wc++1z-extensions")
|
||||
#define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \
|
||||
HEDLEY_DIAGNOSTIC_PUSH \
|
||||
_Pragma("clang diagnostic ignored \"-Wc++98-compat\"") _Pragma( \
|
||||
"clang diagnostic ignored \"-Wc++17-extensions\"") \
|
||||
_Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \
|
||||
xpr HEDLEY_DIAGNOSTIC_POP
|
||||
#else
|
||||
#define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \
|
||||
HEDLEY_DIAGNOSTIC_PUSH \
|
||||
_Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \
|
||||
_Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \
|
||||
xpr HEDLEY_DIAGNOSTIC_POP
|
||||
#endif
|
||||
#else
|
||||
#define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \
|
||||
HEDLEY_DIAGNOSTIC_PUSH \
|
||||
|
@ -861,74 +962,14 @@
|
|||
#elif HEDLEY_IAR_VERSION_CHECK(8, 3, 0)
|
||||
#define HEDLEY_CPP_CAST(T, expr) \
|
||||
HEDLEY_DIAGNOSTIC_PUSH \
|
||||
_Pragma("diag_suppress=Pe137") HEDLEY_DIAGNOSTIC_POP #else
|
||||
_Pragma("diag_suppress=Pe137") HEDLEY_DIAGNOSTIC_POP
|
||||
#else
|
||||
#define HEDLEY_CPP_CAST(T, expr) ((T)(expr))
|
||||
#endif
|
||||
#else
|
||||
#define HEDLEY_CPP_CAST(T, expr) (expr)
|
||||
#endif
|
||||
|
||||
#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \
|
||||
defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3, 0, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
|
||||
HEDLEY_IAR_VERSION_CHECK(8, 0, 0) || \
|
||||
HEDLEY_PGI_VERSION_CHECK(18, 4, 0) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
|
||||
HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \
|
||||
HEDLEY_TI_ARMCL_VERSION_CHECK(4, 7, 0) || \
|
||||
HEDLEY_TI_CL430_VERSION_CHECK(2, 0, 1) || \
|
||||
HEDLEY_TI_CL2000_VERSION_CHECK(6, 1, 0) || \
|
||||
HEDLEY_TI_CL6X_VERSION_CHECK(7, 0, 0) || \
|
||||
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
|
||||
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) || \
|
||||
HEDLEY_CRAY_VERSION_CHECK(5, 0, 0) || \
|
||||
HEDLEY_TINYC_VERSION_CHECK(0, 9, 17) || \
|
||||
HEDLEY_SUNPRO_VERSION_CHECK(8, 0, 0) || \
|
||||
(HEDLEY_IBM_VERSION_CHECK(10, 1, 0) && defined(__C99_PRAGMA_OPERATOR))
|
||||
#define HEDLEY_PRAGMA(value) _Pragma(#value)
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0)
|
||||
#define HEDLEY_PRAGMA(value) __pragma(value)
|
||||
#else
|
||||
#define HEDLEY_PRAGMA(value)
|
||||
#endif
|
||||
|
||||
#if defined(HEDLEY_DIAGNOSTIC_PUSH)
|
||||
#undef HEDLEY_DIAGNOSTIC_PUSH
|
||||
#endif
|
||||
#if defined(HEDLEY_DIAGNOSTIC_POP)
|
||||
#undef HEDLEY_DIAGNOSTIC_POP
|
||||
#endif
|
||||
#if defined(__clang__)
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
|
||||
#define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
|
||||
#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
|
||||
#define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
|
||||
#elif HEDLEY_GCC_VERSION_CHECK(4, 6, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push")
|
||||
#define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop")
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push))
|
||||
#define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop))
|
||||
#elif HEDLEY_ARM_VERSION_CHECK(5, 6, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push")
|
||||
#define HEDLEY_DIAGNOSTIC_POP _Pragma("pop")
|
||||
#elif HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \
|
||||
HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \
|
||||
HEDLEY_TI_CL430_VERSION_CHECK(4, 4, 0) || \
|
||||
HEDLEY_TI_CL6X_VERSION_CHECK(8, 1, 0) || \
|
||||
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
|
||||
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push")
|
||||
#define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop")
|
||||
#elif HEDLEY_PELLES_VERSION_CHECK(2, 90, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
|
||||
#define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
|
||||
#else
|
||||
#define HEDLEY_DIAGNOSTIC_PUSH
|
||||
#define HEDLEY_DIAGNOSTIC_POP
|
||||
#endif
|
||||
|
||||
#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED)
|
||||
#undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED
|
||||
#endif
|
||||
|
@ -938,6 +979,12 @@
|
|||
#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \
|
||||
_Pragma("warning(disable:1478 1786)")
|
||||
#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \
|
||||
__pragma(warning(disable : 1478 1786))
|
||||
#elif HEDLEY_PGI_VERSION_CHECK(20, 7, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \
|
||||
_Pragma("diag_suppress 1215,1216,1444,1445")
|
||||
#elif HEDLEY_PGI_VERSION_CHECK(17, 10, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444")
|
||||
#elif HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
|
||||
|
@ -985,6 +1032,9 @@
|
|||
#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
|
||||
_Pragma("warning(disable:161)")
|
||||
#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
|
||||
__pragma(warning(disable : 161))
|
||||
#elif HEDLEY_PGI_VERSION_CHECK(17, 10, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675")
|
||||
#elif HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
|
||||
|
@ -1018,9 +1068,15 @@
|
|||
#elif HEDLEY_INTEL_VERSION_CHECK(17, 0, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
|
||||
_Pragma("warning(disable:1292)")
|
||||
#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
|
||||
__pragma(warning(disable : 1292))
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
|
||||
__pragma(warning(disable : 5030))
|
||||
#elif HEDLEY_PGI_VERSION_CHECK(20, 7, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
|
||||
_Pragma("diag_suppress 1097,1098")
|
||||
#elif HEDLEY_PGI_VERSION_CHECK(17, 10, 0)
|
||||
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
|
||||
_Pragma("diag_suppress 1097")
|
||||
|
@ -1061,13 +1117,11 @@
|
|||
#if defined(HEDLEY_DEPRECATED_FOR)
|
||||
#undef HEDLEY_DEPRECATED_FOR
|
||||
#endif
|
||||
#if defined(__cplusplus) && (__cplusplus >= 201402L)
|
||||
#define HEDLEY_DEPRECATED(since) \
|
||||
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
|
||||
[[deprecated("Since " #since)]])
|
||||
#define HEDLEY_DEPRECATED_FOR(since, replacement) \
|
||||
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
|
||||
[[deprecated("Since " #since "; use " #replacement)]])
|
||||
#if HEDLEY_MSVC_VERSION_CHECK(14, 0, 0) || \
|
||||
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
|
||||
#define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " #since))
|
||||
#define HEDLEY_DEPRECATED_FOR(since, replacement) \
|
||||
__declspec(deprecated("Since " #since "; use " #replacement))
|
||||
#elif HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(4, 5, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
|
||||
|
@ -1083,6 +1137,13 @@
|
|||
__attribute__((__deprecated__("Since " #since)))
|
||||
#define HEDLEY_DEPRECATED_FOR(since, replacement) \
|
||||
__attribute__((__deprecated__("Since " #since "; use " #replacement)))
|
||||
#elif defined(__cplusplus) && (__cplusplus >= 201402L)
|
||||
#define HEDLEY_DEPRECATED(since) \
|
||||
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
|
||||
[[deprecated("Since " #since)]])
|
||||
#define HEDLEY_DEPRECATED_FOR(since, replacement) \
|
||||
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
|
||||
[[deprecated("Since " #since "; use " #replacement)]])
|
||||
#elif HEDLEY_HAS_ATTRIBUTE(deprecated) || HEDLEY_GCC_VERSION_CHECK(3, 1, 0) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
|
||||
HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \
|
||||
|
@ -1103,12 +1164,9 @@
|
|||
#define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__))
|
||||
#define HEDLEY_DEPRECATED_FOR(since, replacement) \
|
||||
__attribute__((__deprecated__))
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(14, 0, 0)
|
||||
#define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " #since))
|
||||
#define HEDLEY_DEPRECATED_FOR(since, replacement) \
|
||||
__declspec(deprecated("Since " #since "; use " #replacement))
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \
|
||||
HEDLEY_PELLES_VERSION_CHECK(6, 50, 0)
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \
|
||||
HEDLEY_PELLES_VERSION_CHECK(6, 50, 0) || \
|
||||
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
|
||||
#define HEDLEY_DEPRECATED(since) __declspec(deprecated)
|
||||
#define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated)
|
||||
#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0)
|
||||
|
@ -1136,17 +1194,7 @@
|
|||
#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG)
|
||||
#undef HEDLEY_WARN_UNUSED_RESULT_MSG
|
||||
#endif
|
||||
#if (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L)
|
||||
#define HEDLEY_WARN_UNUSED_RESULT \
|
||||
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
|
||||
#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \
|
||||
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]])
|
||||
#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard)
|
||||
#define HEDLEY_WARN_UNUSED_RESULT \
|
||||
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
|
||||
#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \
|
||||
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
|
||||
#elif HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \
|
||||
#if HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(3, 4, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
|
||||
HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \
|
||||
|
@ -1169,6 +1217,16 @@
|
|||
#define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
|
||||
#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \
|
||||
__attribute__((__warn_unused_result__))
|
||||
#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L)
|
||||
#define HEDLEY_WARN_UNUSED_RESULT \
|
||||
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
|
||||
#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \
|
||||
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]])
|
||||
#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard)
|
||||
#define HEDLEY_WARN_UNUSED_RESULT \
|
||||
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
|
||||
#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \
|
||||
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
|
||||
#elif defined(_Check_return_) /* SAL */
|
||||
#define HEDLEY_WARN_UNUSED_RESULT _Check_return_
|
||||
#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_
|
||||
|
@ -1222,7 +1280,8 @@
|
|||
#define HEDLEY_NO_RETURN __attribute__((__noreturn__))
|
||||
#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0)
|
||||
#define HEDLEY_NO_RETURN _Pragma("does_not_return")
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0)
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \
|
||||
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
|
||||
#define HEDLEY_NO_RETURN __declspec(noreturn)
|
||||
#elif HEDLEY_TI_CL6X_VERSION_CHECK(6, 0, 0) && defined(__cplusplus)
|
||||
#define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;")
|
||||
|
@ -1252,7 +1311,9 @@
|
|||
#if defined(HEDLEY_ASSUME)
|
||||
#undef HEDLEY_ASSUME
|
||||
#endif
|
||||
#if HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
|
||||
#if HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
|
||||
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
|
||||
#define HEDLEY_ASSUME(expr) __assume(expr)
|
||||
#elif HEDLEY_HAS_BUILTIN(__builtin_assume)
|
||||
#define HEDLEY_ASSUME(expr) __builtin_assume(expr)
|
||||
|
@ -1389,7 +1450,8 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable)
|
||||
#define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr))
|
||||
#endif
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) || \
|
||||
#if (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && \
|
||||
!defined(HEDLEY_PGI_VERSION)) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(9, 0, 0)
|
||||
#define HEDLEY_PREDICT(expr, value, probability) \
|
||||
__builtin_expect_with_probability((expr), (value), (probability))
|
||||
|
@ -1399,7 +1461,8 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
__builtin_expect_with_probability(!!(expr), 0, (probability))
|
||||
#define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1)
|
||||
#define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
|
||||
#elif HEDLEY_HAS_BUILTIN(__builtin_expect) || \
|
||||
#elif (HEDLEY_HAS_BUILTIN(__builtin_expect) && \
|
||||
!defined(HEDLEY_INTEL_CL_VERSION)) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(3, 0, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
|
||||
(HEDLEY_SUNPRO_VERSION_CHECK(5, 15, 0) && defined(__cplusplus)) || \
|
||||
|
@ -1476,7 +1539,8 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
#define HEDLEY_MALLOC __attribute__((__malloc__))
|
||||
#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0)
|
||||
#define HEDLEY_MALLOC _Pragma("returns_new_memory")
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(14, 0, 0)
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(14, 0, 0) || \
|
||||
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
|
||||
#define HEDLEY_MALLOC __declspec(restrict)
|
||||
#else
|
||||
#define HEDLEY_MALLOC
|
||||
|
@ -1557,6 +1621,7 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
#elif HEDLEY_GCC_VERSION_CHECK(3, 1, 0) || \
|
||||
HEDLEY_MSVC_VERSION_CHECK(14, 0, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
|
||||
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
|
||||
HEDLEY_IBM_VERSION_CHECK(10, 1, 0) || \
|
||||
HEDLEY_PGI_VERSION_CHECK(17, 10, 0) || \
|
||||
|
@ -1581,13 +1646,14 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
#define HEDLEY_INLINE inline
|
||||
#elif defined(HEDLEY_GCC_VERSION) || HEDLEY_ARM_VERSION_CHECK(6, 2, 0)
|
||||
#define HEDLEY_INLINE __inline__
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(12, 0, 0) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
|
||||
HEDLEY_TI_ARMCL_VERSION_CHECK(5, 1, 0) || \
|
||||
HEDLEY_TI_CL430_VERSION_CHECK(3, 1, 0) || \
|
||||
HEDLEY_TI_CL2000_VERSION_CHECK(6, 2, 0) || \
|
||||
HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) || \
|
||||
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(12, 0, 0) || \
|
||||
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
|
||||
HEDLEY_TI_ARMCL_VERSION_CHECK(5, 1, 0) || \
|
||||
HEDLEY_TI_CL430_VERSION_CHECK(3, 1, 0) || \
|
||||
HEDLEY_TI_CL2000_VERSION_CHECK(6, 2, 0) || \
|
||||
HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) || \
|
||||
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
|
||||
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
|
||||
#define HEDLEY_INLINE __inline
|
||||
#else
|
||||
|
@ -1619,7 +1685,8 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
|
||||
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
|
||||
#define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(12, 0, 0)
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(12, 0, 0) || \
|
||||
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
|
||||
#define HEDLEY_ALWAYS_INLINE __forceinline
|
||||
#elif defined(__cplusplus) && (HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \
|
||||
HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) || \
|
||||
|
@ -1658,7 +1725,8 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
|
||||
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
|
||||
#define HEDLEY_NEVER_INLINE __attribute__((__noinline__))
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0)
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \
|
||||
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
|
||||
#define HEDLEY_NEVER_INLINE __declspec(noinline)
|
||||
#elif HEDLEY_PGI_VERSION_CHECK(10, 2, 0)
|
||||
#define HEDLEY_NEVER_INLINE _Pragma("noinline")
|
||||
|
@ -1711,7 +1779,9 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
#if HEDLEY_HAS_ATTRIBUTE(nothrow) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
|
||||
#define HEDLEY_NO_THROW __attribute__((__nothrow__))
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(13, 1, 0) || HEDLEY_ARM_VERSION_CHECK(4, 1, 0)
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(13, 1, 0) || \
|
||||
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4, 1, 0)
|
||||
#define HEDLEY_NO_THROW __declspec(nothrow)
|
||||
#else
|
||||
#define HEDLEY_NO_THROW
|
||||
|
@ -1720,8 +1790,7 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
#if defined(HEDLEY_FALL_THROUGH)
|
||||
#undef HEDLEY_FALL_THROUGH
|
||||
#endif
|
||||
#if HEDLEY_GNUC_HAS_ATTRIBUTE(fallthrough, 7, 0, 0) && \
|
||||
!defined(HEDLEY_PGI_VERSION)
|
||||
#if HEDLEY_HAS_ATTRIBUTE(fallthrough) || HEDLEY_GCC_VERSION_CHECK(7, 0, 0)
|
||||
#define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__))
|
||||
#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang, fallthrough)
|
||||
#define HEDLEY_FALL_THROUGH \
|
||||
|
@ -1866,12 +1935,14 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
#endif
|
||||
#if !defined(__cplusplus) && \
|
||||
((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \
|
||||
HEDLEY_HAS_FEATURE(c_static_assert) || \
|
||||
(HEDLEY_HAS_FEATURE(c_static_assert) && \
|
||||
!defined(HEDLEY_INTEL_CL_VERSION)) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(6, 0, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || defined(_Static_assert))
|
||||
#define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message)
|
||||
#elif (defined(__cplusplus) && (__cplusplus >= 201103L)) || \
|
||||
HEDLEY_MSVC_VERSION_CHECK(16, 0, 0)
|
||||
HEDLEY_MSVC_VERSION_CHECK(16, 0, 0) || \
|
||||
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
|
||||
#define HEDLEY_STATIC_ASSERT(expr, message) \
|
||||
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
|
||||
static_assert(expr, message))
|
||||
|
@ -1930,7 +2001,8 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
HEDLEY_PGI_VERSION_CHECK(18, 4, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
|
||||
#define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg)
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0)
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0) || \
|
||||
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
|
||||
#define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg))
|
||||
#else
|
||||
#define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg)
|
||||
|
@ -1970,6 +2042,8 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
#endif
|
||||
#if HEDLEY_HAS_ATTRIBUTE(flag_enum)
|
||||
#define HEDLEY_FLAGS __attribute__((__flag_enum__))
|
||||
#else
|
||||
#define HEDLEY_FLAGS
|
||||
#endif
|
||||
|
||||
#if defined(HEDLEY_FLAGS_CAST)
|
||||
|
@ -1989,8 +2063,9 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
#if defined(HEDLEY_EMPTY_BASES)
|
||||
#undef HEDLEY_EMPTY_BASES
|
||||
#endif
|
||||
#if HEDLEY_MSVC_VERSION_CHECK(19, 0, 23918) && \
|
||||
!HEDLEY_MSVC_VERSION_CHECK(20, 0, 0)
|
||||
#if (HEDLEY_MSVC_VERSION_CHECK(19, 0, 23918) && \
|
||||
!HEDLEY_MSVC_VERSION_CHECK(20, 0, 0)) || \
|
||||
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
|
||||
#define HEDLEY_EMPTY_BASES __declspec(empty_bases)
|
||||
#else
|
||||
#define HEDLEY_EMPTY_BASES
|
||||
|
|
|
@ -0,0 +1,481 @@
|
|||
/* Alignment
|
||||
* Created by Evan Nemerson <evan@nemerson.com>
|
||||
*
|
||||
* To the extent possible under law, the authors have waived all
|
||||
* copyright and related or neighboring rights to this code. For
|
||||
* details, see the Creative Commons Zero 1.0 Universal license at
|
||||
* <https://creativecommons.org/publicdomain/zero/1.0/>
|
||||
*
|
||||
* SPDX-License-Identifier: CC0-1.0
|
||||
*
|
||||
**********************************************************************
|
||||
*
|
||||
* This is portability layer which should help iron out some
|
||||
* differences across various compilers, as well as various verisons of
|
||||
* C and C++.
|
||||
*
|
||||
* It was originally developed for SIMD Everywhere
|
||||
* (<https://github.com/simd-everywhere/simde>), but since its only
|
||||
* dependency is Hedley (<https://nemequ.github.io/hedley>, also CC0)
|
||||
* it can easily be used in other projects, so please feel free to do
|
||||
* so.
|
||||
*
|
||||
* If you do use this in your project, please keep a link to SIMDe in
|
||||
* your code to remind you where to report any bugs and/or check for
|
||||
* updated versions.
|
||||
*
|
||||
* # API Overview
|
||||
*
|
||||
* The API has several parts, and most macros have a few variations.
|
||||
* There are APIs for declaring aligned fields/variables, optimization
|
||||
* hints, and run-time alignment checks.
|
||||
*
|
||||
* Briefly, macros ending with "_TO" take numeric values and are great
|
||||
* when you know the value you would like to use. Macros ending with
|
||||
* "_LIKE", on the other hand, accept a type and are used when you want
|
||||
* to use the alignment of a type instead of hardcoding a value.
|
||||
*
|
||||
* Documentation for each section of the API is inline.
|
||||
*
|
||||
* True to form, MSVC is the main problem and imposes several
|
||||
* limitations on the effectiveness of the APIs. Detailed descriptions
|
||||
* of the limitations of each macro are inline, but in general:
|
||||
*
|
||||
* * On C11+ or C++11+ code written using this API will work. The
|
||||
* ASSUME macros may or may not generate a hint to the compiler, but
|
||||
* that is only an optimization issue and will not actually cause
|
||||
* failures.
|
||||
* * If you're using pretty much any compiler other than MSVC,
|
||||
* everything should basically work as well as in C11/C++11.
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_ALIGN_H)
|
||||
#define SIMDE_ALIGN_H
|
||||
|
||||
#include "hedley.h"
|
||||
|
||||
/* I know this seems a little silly, but some non-hosted compilers
|
||||
* don't have stddef.h, so we try to accomodate them. */
|
||||
#if !defined(SIMDE_ALIGN_SIZE_T_)
|
||||
#if defined(__SIZE_TYPE__)
|
||||
#define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__
|
||||
#elif defined(__SIZE_T_TYPE__)
|
||||
#define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__
|
||||
#elif defined(__cplusplus)
|
||||
#include <cstddef>
|
||||
#define SIMDE_ALIGN_SIZE_T_ size_t
|
||||
#else
|
||||
#include <stddef.h>
|
||||
#define SIMDE_ALIGN_SIZE_T_ size_t
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_ALIGN_INTPTR_T_)
|
||||
#if defined(__INTPTR_TYPE__)
|
||||
#define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__
|
||||
#elif defined(__PTRDIFF_TYPE__)
|
||||
#define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__
|
||||
#elif defined(__PTRDIFF_T_TYPE__)
|
||||
#define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__
|
||||
#elif defined(__cplusplus)
|
||||
#include <cstddef>
|
||||
#define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t
|
||||
#else
|
||||
#include <stddef.h>
|
||||
#define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ALIGN_DEBUG)
|
||||
#if defined(__cplusplus)
|
||||
#include <cstdio>
|
||||
#else
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* SIMDE_ALIGN_OF(Type)
|
||||
*
|
||||
* The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or
|
||||
* __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler.
|
||||
* It isn't defined everywhere (only when the compiler has some alignof-
|
||||
* like feature we can use to implement it), but it should work in most
|
||||
* modern compilers, as well as C11 and C++11.
|
||||
*
|
||||
* If we can't find an implementation for SIMDE_ALIGN_OF then the macro
|
||||
* will not be defined, so if you can handle that situation sensibly
|
||||
* you may need to sprinkle some ifdefs into your code.
|
||||
*/
|
||||
#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \
|
||||
(0 && HEDLEY_HAS_FEATURE(c_alignof))
|
||||
#define SIMDE_ALIGN_OF(Type) _Alignof(Type)
|
||||
#elif (defined(__cplusplus) && (__cplusplus >= 201103L)) || \
|
||||
(0 && HEDLEY_HAS_FEATURE(cxx_alignof))
|
||||
#define SIMDE_ALIGN_OF(Type) alignof(Type)
|
||||
#elif HEDLEY_GCC_VERSION_CHECK(2, 95, 0) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
|
||||
HEDLEY_SUNPRO_VERSION_CHECK(5, 13, 0) || \
|
||||
HEDLEY_TINYC_VERSION_CHECK(0, 9, 24) || \
|
||||
HEDLEY_PGI_VERSION_CHECK(19, 10, 0) || \
|
||||
HEDLEY_CRAY_VERSION_CHECK(10, 0, 0) || \
|
||||
HEDLEY_TI_ARMCL_VERSION_CHECK(16, 9, 0) || \
|
||||
HEDLEY_TI_CL2000_VERSION_CHECK(16, 9, 0) || \
|
||||
HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) || \
|
||||
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
|
||||
HEDLEY_TI_CL430_VERSION_CHECK(16, 9, 0) || \
|
||||
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 3, 2) || defined(__IBM__ALIGNOF__) || \
|
||||
defined(__clang__)
|
||||
#define SIMDE_ALIGN_OF(Type) __alignof__(Type)
|
||||
#elif HEDLEY_IAR_VERSION_CHECK(8, 40, 0)
|
||||
#define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type)
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0)
|
||||
/* Probably goes back much further, but MS takes down their old docs.
|
||||
* If you can verify that this works in earlier versions please let
|
||||
* me know! */
|
||||
#define SIMDE_ALIGN_OF(Type) __alignof(Type)
|
||||
#endif
|
||||
|
||||
/* SIMDE_ALIGN_MAXIMUM:
|
||||
*
|
||||
* This is the maximum alignment that the compiler supports. You can
|
||||
* define the value prior to including SIMDe if necessary, but in that
|
||||
* case *please* submit an issue so we can add the platform to the
|
||||
* detection code.
|
||||
*
|
||||
* Most compilers are okay with types which are aligned beyond what
|
||||
* they think is the maximum, as long as the alignment is a power
|
||||
* of two. MSVC is the exception (of course), so we need to cap the
|
||||
* alignment requests at values that the implementation supports.
|
||||
*
|
||||
* XL C/C++ will accept values larger than 16 (which is the alignment
|
||||
* of an AltiVec vector), but will not reliably align to the larger
|
||||
* value, so so we cap the value at 16 there.
|
||||
*
|
||||
* If the compiler accepts any power-of-two value within reason then
|
||||
* this macro should be left undefined, and the SIMDE_ALIGN_CAP
|
||||
* macro will just return the value passed to it. */
|
||||
#if !defined(SIMDE_ALIGN_MAXIMUM)
|
||||
#if defined(HEDLEY_MSVC_VERSION)
|
||||
#if defined(_M_IX86) || defined(_M_AMD64)
|
||||
#if HEDLEY_MSVC_VERSION_CHECK(19, 14, 0)
|
||||
#define SIMDE_ALIGN_PLATFORM_MAXIMUM 64
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(16, 0, 0)
|
||||
/* VS 2010 is really a guess based on Wikipedia; if anyone can
|
||||
* test with old VS versions I'd really appreciate it. */
|
||||
#define SIMDE_ALIGN_PLATFORM_MAXIMUM 32
|
||||
#else
|
||||
#define SIMDE_ALIGN_PLATFORM_MAXIMUM 16
|
||||
#endif
|
||||
#elif defined(_M_ARM) || defined(_M_ARM64)
|
||||
#define SIMDE_ALIGN_PLATFORM_MAXIMUM 8
|
||||
#endif
|
||||
#elif defined(HEDLEY_IBM_VERSION)
|
||||
#define SIMDE_ALIGN_PLATFORM_MAXIMUM 16
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* You can mostly ignore these; they're intended for internal use.
|
||||
* If you do need to use them please let me know; if they fulfill
|
||||
* a common use case I'll probably drop the trailing underscore
|
||||
* and make them part of the public API. */
|
||||
#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM)
|
||||
#if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64
|
||||
#define SIMDE_ALIGN_64_ 64
|
||||
#define SIMDE_ALIGN_32_ 32
|
||||
#define SIMDE_ALIGN_16_ 16
|
||||
#define SIMDE_ALIGN_8_ 8
|
||||
#elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32
|
||||
#define SIMDE_ALIGN_64_ 32
|
||||
#define SIMDE_ALIGN_32_ 32
|
||||
#define SIMDE_ALIGN_16_ 16
|
||||
#define SIMDE_ALIGN_8_ 8
|
||||
#elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16
|
||||
#define SIMDE_ALIGN_64_ 16
|
||||
#define SIMDE_ALIGN_32_ 16
|
||||
#define SIMDE_ALIGN_16_ 16
|
||||
#define SIMDE_ALIGN_8_ 8
|
||||
#elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8
|
||||
#define SIMDE_ALIGN_64_ 8
|
||||
#define SIMDE_ALIGN_32_ 8
|
||||
#define SIMDE_ALIGN_16_ 8
|
||||
#define SIMDE_ALIGN_8_ 8
|
||||
#else
|
||||
#error Max alignment expected to be >= 8
|
||||
#endif
|
||||
#else
|
||||
#define SIMDE_ALIGN_64_ 64
|
||||
#define SIMDE_ALIGN_32_ 32
|
||||
#define SIMDE_ALIGN_16_ 16
|
||||
#define SIMDE_ALIGN_8_ 8
|
||||
#endif
|
||||
|
||||
/**
|
||||
* SIMDE_ALIGN_CAP(Alignment)
|
||||
*
|
||||
* Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM.
|
||||
*/
|
||||
#if defined(SIMDE_ALIGN_MAXIMUM)
|
||||
#define SIMDE_ALIGN_CAP(Alignment) \
|
||||
(((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) \
|
||||
? (Alignment) \
|
||||
: (SIMDE_ALIGN_PLATFORM_MAXIMUM))
|
||||
#else
|
||||
#define SIMDE_ALIGN_CAP(Alignment) (Alignment)
|
||||
#endif
|
||||
|
||||
/* SIMDE_ALIGN_TO(Alignment)
|
||||
*
|
||||
* SIMDE_ALIGN_TO is used to declare types or variables. It basically
|
||||
* maps to the align attribute in most compilers, the align declspec
|
||||
* in MSVC, or _Alignas/alignas in C11/C++11.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* struct i32x4 {
|
||||
* SIMDE_ALIGN_TO(16) int32_t values[4];
|
||||
* }
|
||||
*
|
||||
* Limitations:
|
||||
*
|
||||
* MSVC requires that the Alignment parameter be numeric; you can't do
|
||||
* something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is
|
||||
* unfortunate because that's really how the LIKE macros are
|
||||
* implemented, and I am not aware of a way to get anything like this
|
||||
* to work without using the C11/C++11 keywords.
|
||||
*
|
||||
* It also means that we can't use SIMDE_ALIGN_CAP to limit the
|
||||
* alignment to the value specified, which MSVC also requires, so on
|
||||
* MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead.
|
||||
* They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would,
|
||||
* but should be safe to use on MSVC.
|
||||
*
|
||||
* All this is to say that, if you want your code to work on MSVC, you
|
||||
* should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of
|
||||
* SIMDE_ALIGN_TO(8/16/32/64).
|
||||
*/
|
||||
#if HEDLEY_HAS_ATTRIBUTE(aligned) || HEDLEY_GCC_VERSION_CHECK(2, 95, 0) || \
|
||||
HEDLEY_CRAY_VERSION_CHECK(8, 4, 0) || \
|
||||
HEDLEY_IBM_VERSION_CHECK(11, 1, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
|
||||
HEDLEY_PGI_VERSION_CHECK(19, 4, 0) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
|
||||
HEDLEY_TINYC_VERSION_CHECK(0, 9, 24) || \
|
||||
HEDLEY_TI_ARMCL_VERSION_CHECK(16, 9, 0) || \
|
||||
HEDLEY_TI_CL2000_VERSION_CHECK(16, 9, 0) || \
|
||||
HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) || \
|
||||
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
|
||||
HEDLEY_TI_CL430_VERSION_CHECK(16, 9, 0) || \
|
||||
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 3, 2)
|
||||
#define SIMDE_ALIGN_TO(Alignment) \
|
||||
__attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment))))
|
||||
#elif (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L))
|
||||
#define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment))
|
||||
#elif (defined(__cplusplus) && (__cplusplus >= 201103L))
|
||||
#define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment))
|
||||
#elif defined(HEDLEY_MSVC_VERSION)
|
||||
#define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment))
|
||||
/* Unfortunately MSVC can't handle __declspec(align(__alignof(Type)));
|
||||
* the alignment passed to the declspec has to be an integer. */
|
||||
#define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE
|
||||
#endif
|
||||
#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_)
|
||||
#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_)
|
||||
#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_)
|
||||
#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_)
|
||||
|
||||
/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment)
|
||||
*
|
||||
* SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's
|
||||
* std::assume_aligned, or __builtin_assume_aligned. It tells the
|
||||
* compiler to assume that the provided pointer is aligned to an
|
||||
* `Alignment`-byte boundary.
|
||||
*
|
||||
* If you define SIMDE_ALIGN_DEBUG prior to including this header then
|
||||
* SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't
|
||||
* integrate with NDEBUG in this header, but it may be a good idea to
|
||||
* put something like this in your code:
|
||||
*
|
||||
* #if !defined(NDEBUG)
|
||||
* #define SIMDE_ALIGN_DEBUG
|
||||
* #endif
|
||||
* #include <.../simde-align.h>
|
||||
*/
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(4, 7, 0)
|
||||
#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \
|
||||
HEDLEY_REINTERPRET_CAST( \
|
||||
__typeof__(Pointer), \
|
||||
__builtin_assume_aligned( \
|
||||
HEDLEY_CONST_CAST( \
|
||||
void *, HEDLEY_REINTERPRET_CAST(const void *, \
|
||||
Pointer)), \
|
||||
Alignment))
|
||||
#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
|
||||
#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \
|
||||
(__extension__({ \
|
||||
__typeof__(v) simde_assume_aligned_t_ = (Pointer); \
|
||||
__assume_aligned(simde_assume_aligned_t_, Alignment); \
|
||||
simde_assume_aligned_t_; \
|
||||
}))
|
||||
#elif defined(__cplusplus) && (__cplusplus > 201703L)
|
||||
#include <memory>
|
||||
#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \
|
||||
std::assume_aligned<Alignment>(Pointer)
|
||||
#else
|
||||
#if defined(__cplusplus)
|
||||
template<typename T>
|
||||
HEDLEY_ALWAYS_INLINE static T *
|
||||
simde_align_assume_to_unchecked(T *ptr, const size_t alignment)
|
||||
#else
|
||||
HEDLEY_ALWAYS_INLINE static void *
|
||||
simde_align_assume_to_unchecked(void *ptr, const size_t alignment)
|
||||
#endif
|
||||
{
|
||||
HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) %
|
||||
SIMDE_ALIGN_CAP(alignment)) == 0);
|
||||
return ptr;
|
||||
}
|
||||
#if defined(__cplusplus)
|
||||
#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \
|
||||
simde_align_assume_to_unchecked((Pointer), (Alignment))
|
||||
#else
|
||||
#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \
|
||||
simde_align_assume_to_unchecked( \
|
||||
HEDLEY_CONST_CAST(void *, HEDLEY_REINTERPRET_CAST( \
|
||||
const void *, Pointer)), \
|
||||
(Alignment))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_ALIGN_DEBUG)
|
||||
#define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) \
|
||||
SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment)
|
||||
#else
|
||||
#include <stdio.h>
|
||||
#if defined(__cplusplus)
|
||||
template<typename T>
|
||||
static HEDLEY_ALWAYS_INLINE T *
|
||||
simde_align_assume_to_checked_uncapped(T *ptr, const size_t alignment,
|
||||
const char *file, int line,
|
||||
const char *ptrname)
|
||||
#else
|
||||
static HEDLEY_ALWAYS_INLINE void *
|
||||
simde_align_assume_to_checked_uncapped(void *ptr, const size_t alignment,
|
||||
const char *file, int line,
|
||||
const char *ptrname)
|
||||
#endif
|
||||
{
|
||||
if (HEDLEY_UNLIKELY(
|
||||
(HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) %
|
||||
HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_,
|
||||
SIMDE_ALIGN_CAP(alignment))) != 0)) {
|
||||
fprintf(stderr,
|
||||
"%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n",
|
||||
file, line, ptrname,
|
||||
HEDLEY_REINTERPRET_CAST(const void *, ptr),
|
||||
HEDLEY_STATIC_CAST(unsigned int,
|
||||
SIMDE_ALIGN_CAP(alignment)),
|
||||
HEDLEY_STATIC_CAST(
|
||||
unsigned int,
|
||||
HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_,
|
||||
(ptr)) %
|
||||
HEDLEY_STATIC_CAST(
|
||||
SIMDE_ALIGN_INTPTR_T_,
|
||||
SIMDE_ALIGN_CAP(alignment))));
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
#if defined(__cplusplus)
|
||||
#define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) \
|
||||
simde_align_assume_to_checked_uncapped((Pointer), (Alignment), \
|
||||
__FILE__, __LINE__, #Pointer)
|
||||
#else
|
||||
#define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) \
|
||||
simde_align_assume_to_checked_uncapped( \
|
||||
HEDLEY_CONST_CAST(void *, HEDLEY_REINTERPRET_CAST( \
|
||||
const void *, Pointer)), \
|
||||
(Alignment), __FILE__, __LINE__, #Pointer)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* SIMDE_ALIGN_LIKE(Type)
|
||||
* SIMDE_ALIGN_LIKE_#(Type)
|
||||
*
|
||||
* The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros
|
||||
* except instead of an integer they take a type; basically, it's just
|
||||
* a more convenient way to do something like:
|
||||
*
|
||||
* SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type))
|
||||
*
|
||||
* The versions with a numeric suffix will fall back on using a numeric
|
||||
* value in the event we can't use SIMDE_ALIGN_OF(Type). This is
|
||||
* mainly for MSVC, where __declspec(align()) can't handle anything
|
||||
* other than hard-coded numeric values.
|
||||
*/
|
||||
#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && \
|
||||
!defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE)
|
||||
#define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type))
|
||||
#define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type)
|
||||
#define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type)
|
||||
#define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type)
|
||||
#define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type)
|
||||
#else
|
||||
#define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64
|
||||
#define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32
|
||||
#define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16
|
||||
#define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8
|
||||
#endif
|
||||
|
||||
/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type)
|
||||
*
|
||||
* Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a
|
||||
* type instead of a numeric value. */
|
||||
#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO)
|
||||
#define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) \
|
||||
SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type))
|
||||
#endif
|
||||
|
||||
/* SIMDE_ALIGN_CAST(Type, Pointer)
|
||||
*
|
||||
* SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try
|
||||
* to silence warnings that some compilers may produce if you try
|
||||
* to assign to a type with increased alignment requirements.
|
||||
*
|
||||
* Note that it does *not* actually attempt to tell the compiler that
|
||||
* the pointer is aligned like the destination should be; that's the
|
||||
* job of the next macro. This macro is necessary for stupid APIs
|
||||
* like _mm_loadu_si128 where the input is a __m128i* but the function
|
||||
* is specifically for data which isn't necessarily aligned to
|
||||
* _Alignof(__m128i).
|
||||
*/
|
||||
#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(3, 4, 0)
|
||||
#define SIMDE_ALIGN_CAST(Type, Pointer) \
|
||||
(__extension__({ \
|
||||
HEDLEY_DIAGNOSTIC_PUSH \
|
||||
_Pragma("GCC diagnostic ignored \"-Wcast-align\"") \
|
||||
Type simde_r_ = \
|
||||
HEDLEY_REINTERPRET_CAST(Type, Pointer); \
|
||||
HEDLEY_DIAGNOSTIC_POP \
|
||||
simde_r_; \
|
||||
}))
|
||||
#else
|
||||
#define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer)
|
||||
#endif
|
||||
|
||||
/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer)
|
||||
*
|
||||
* This is sort of like a combination of a reinterpret_cast and a
|
||||
* SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell
|
||||
* the compiler that the pointer is aligned like the specified type
|
||||
* and casts the pointer to the specified type while suppressing any
|
||||
* warnings from the compiler about casting to a type with greater
|
||||
* alignment requirements.
|
||||
*/
|
||||
#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) \
|
||||
SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type)
|
||||
|
||||
#endif /* !defined(SIMDE_ALIGN_H) */
|
|
@ -27,14 +27,14 @@
|
|||
* an undefined macro being used (e.g., GCC with -Wundef).
|
||||
*
|
||||
* This was originally created for SIMDe
|
||||
* <https://github.com/nemequ/simde> (hence the prefix), but this
|
||||
* <https://github.com/simd-everywhere/simde> (hence the prefix), but this
|
||||
* header has no dependencies and may be used anywhere. It is
|
||||
* originally based on information from
|
||||
* <https://sourceforge.net/p/predef/wiki/Architectures/>, though it
|
||||
* has been enhanced with additional information.
|
||||
*
|
||||
* If you improve this file, or find a bug, please file the issue at
|
||||
* <https://github.com/nemequ/simde/issues>. If you copy this into
|
||||
* <https://github.com/simd-everywhere/simde/issues>. If you copy this into
|
||||
* your project, even if you change the prefix, please keep the links
|
||||
* to SIMDe intact so others know where to report issues, submit
|
||||
* enhancements, and find the latest version. */
|
||||
|
@ -70,7 +70,7 @@
|
|||
/* AMD64 / x86_64
|
||||
<https://en.wikipedia.org/wiki/X86-64> */
|
||||
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || \
|
||||
defined(__x86_64) || defined(_M_X66) || defined(_M_AMD64)
|
||||
defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
|
||||
#define SIMDE_ARCH_AMD64 1000
|
||||
#endif
|
||||
|
||||
|
@ -125,6 +125,9 @@
|
|||
#define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM
|
||||
#endif
|
||||
#endif
|
||||
#if defined(__ARM_FEATURE_SVE)
|
||||
#define SIMDE_ARCH_ARM_SVE
|
||||
#endif
|
||||
|
||||
/* Blackfin
|
||||
<https://en.wikipedia.org/wiki/Blackfin> */
|
||||
|
@ -276,6 +279,12 @@
|
|||
#define SIMDE_ARCH_X86_AVX 1
|
||||
#endif
|
||||
#endif
|
||||
#if defined(__AVX512VP2INTERSECT__)
|
||||
#define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1
|
||||
#endif
|
||||
#if defined(__AVX512VBMI__)
|
||||
#define SIMDE_ARCH_X86_AVX512VBMI 1
|
||||
#endif
|
||||
#if defined(__AVX512BW__)
|
||||
#define SIMDE_ARCH_X86_AVX512BW 1
|
||||
#endif
|
||||
|
@ -294,6 +303,12 @@
|
|||
#if defined(__GFNI__)
|
||||
#define SIMDE_ARCH_X86_GFNI 1
|
||||
#endif
|
||||
#if defined(__PCLMUL__)
|
||||
#define SIMDE_ARCH_X86_PCLMUL 1
|
||||
#endif
|
||||
#if defined(__VPCLMULQDQ__)
|
||||
#define SIMDE_ARCH_X86_VPCLMULQDQ 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Itanium
|
||||
|
@ -363,6 +378,10 @@
|
|||
#define SIMDE_ARCH_MIPS_CHECK(version) (0)
|
||||
#endif
|
||||
|
||||
#if defined(__mips_loongson_mmi)
|
||||
#define SIMDE_ARCH_MIPS_LOONGSON_MMI 1
|
||||
#endif
|
||||
|
||||
/* Matsushita MN10300
|
||||
<https://en.wikipedia.org/wiki/MN103> */
|
||||
#if defined(__MN10300__) || defined(__mn10300__)
|
||||
|
|
|
@ -30,62 +30,103 @@
|
|||
#include "hedley.h"
|
||||
|
||||
#define SIMDE_VERSION_MAJOR 0
|
||||
#define SIMDE_VERSION_MINOR 5
|
||||
#define SIMDE_VERSION_MICRO 0
|
||||
#define SIMDE_VERSION_MINOR 7
|
||||
#define SIMDE_VERSION_MICRO 1
|
||||
#define SIMDE_VERSION \
|
||||
HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, \
|
||||
SIMDE_VERSION_MICRO)
|
||||
|
||||
#include "simde-arch.h"
|
||||
#include "simde-features.h"
|
||||
#include "simde-diagnostic.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if HEDLEY_HAS_ATTRIBUTE(aligned) || HEDLEY_GCC_VERSION_CHECK(2, 95, 0) || \
|
||||
HEDLEY_CRAY_VERSION_CHECK(8, 4, 0) || \
|
||||
HEDLEY_IBM_VERSION_CHECK(11, 1, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
|
||||
HEDLEY_PGI_VERSION_CHECK(19, 4, 0) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
|
||||
HEDLEY_TINYC_VERSION_CHECK(0, 9, 24) || \
|
||||
HEDLEY_TI_VERSION_CHECK(8, 1, 0)
|
||||
#define SIMDE_ALIGN(alignment) __attribute__((aligned(alignment)))
|
||||
#elif defined(_MSC_VER) && !(defined(_M_ARM) && !defined(_M_ARM64))
|
||||
#define SIMDE_ALIGN(alignment) __declspec(align(alignment))
|
||||
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
|
||||
#define SIMDE_ALIGN(alignment) _Alignas(alignment)
|
||||
#elif defined(__cplusplus) && (__cplusplus >= 201103L)
|
||||
#define SIMDE_ALIGN(alignment) alignas(alignment)
|
||||
#else
|
||||
#define SIMDE_ALIGN(alignment)
|
||||
#include "simde-detect-clang.h"
|
||||
#include "simde-arch.h"
|
||||
#include "simde-features.h"
|
||||
#include "simde-diagnostic.h"
|
||||
#include "simde-math.h"
|
||||
#include "simde-constify.h"
|
||||
#include "simde-align.h"
|
||||
|
||||
/* In some situations, SIMDe has to make large performance sacrifices
|
||||
* for small increases in how faithfully it reproduces an API, but
|
||||
* only a relatively small number of users will actually need the API
|
||||
* to be completely accurate. The SIMDE_FAST_* options can be used to
|
||||
* disable these trade-offs.
|
||||
*
|
||||
* They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or
|
||||
* the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to
|
||||
* enable some optimizations. Using -ffast-math and/or
|
||||
* -ffinite-math-only will also enable the relevant options. If you
|
||||
* don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */
|
||||
|
||||
/* Most programs avoid NaNs by never passing values which can result in
|
||||
* a NaN; for example, if you only pass non-negative values to the sqrt
|
||||
* functions, it won't generate a NaN. On some platforms, similar
|
||||
* functions handle NaNs differently; for example, the _mm_min_ps SSE
|
||||
* function will return 0.0 if you pass it (0.0, NaN), but the NEON
|
||||
* vminq_f32 function will return NaN. Making them behave like one
|
||||
* another is expensive; it requires generating a mask of all lanes
|
||||
* with NaNs, then performing the operation (e.g., vminq_f32), then
|
||||
* blending together the result with another vector using the mask.
|
||||
*
|
||||
* If you don't want SIMDe to worry about the differences between how
|
||||
* NaNs are handled on the two platforms, define this (or pass
|
||||
* -ffinite-math-only) */
|
||||
#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && \
|
||||
defined(__FAST_MATH__)
|
||||
#define SIMDE_FAST_MATH
|
||||
#endif
|
||||
|
||||
#if HEDLEY_GNUC_VERSION_CHECK(2, 95, 0) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
|
||||
HEDLEY_IBM_VERSION_CHECK(11, 1, 0)
|
||||
#define SIMDE_ALIGN_OF(T) (__alignof__(T))
|
||||
#elif (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \
|
||||
HEDLEY_HAS_FEATURE(c11_alignof)
|
||||
#define SIMDE_ALIGN_OF(T) (_Alignof(T))
|
||||
#elif (defined(__cplusplus) && (__cplusplus >= 201103L)) || \
|
||||
HEDLEY_HAS_FEATURE(cxx_alignof)
|
||||
#define SIMDE_ALIGN_OF(T) (alignof(T))
|
||||
#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS)
|
||||
#if defined(SIMDE_FAST_MATH)
|
||||
#define SIMDE_FAST_NANS
|
||||
#elif defined(__FINITE_MATH_ONLY__)
|
||||
#if __FINITE_MATH_ONLY__
|
||||
#define SIMDE_FAST_NANS
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ALIGN_OF)
|
||||
#define SIMDE_ALIGN_AS(N, T) SIMDE_ALIGN(SIMDE_ALIGN_OF(T))
|
||||
#else
|
||||
#define SIMDE_ALIGN_AS(N, T) SIMDE_ALIGN(N)
|
||||
/* Many functions are defined as using the current rounding mode
|
||||
* (i.e., the SIMD version of fegetround()) when converting to
|
||||
* an integer. For example, _mm_cvtpd_epi32. Unfortunately,
|
||||
* on some platforms (such as ARMv8+ where round-to-nearest is
|
||||
* always used, regardless of the FPSCR register) this means we
|
||||
* have to first query the current rounding mode, then choose
|
||||
* the proper function (rounnd
|
||||
, ceil, floor, etc.) */
|
||||
#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && \
|
||||
defined(SIMDE_FAST_MATH)
|
||||
#define SIMDE_FAST_ROUND_MODE
|
||||
#endif
|
||||
|
||||
#define simde_assert_aligned(alignment, val) \
|
||||
simde_assert_int(HEDLEY_REINTERPRET_CAST( \
|
||||
uintptr_t, HEDLEY_REINTERPRET_CAST( \
|
||||
const void *, (val))) % \
|
||||
(alignment), \
|
||||
==, 0)
|
||||
/* This controls how ties are rounded. For example, does 10.5 round to
|
||||
* 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for
|
||||
* example) doesn't support it and it must be emulated (which is rather
|
||||
* slow). If you're okay with just using the default for whatever arch
|
||||
* you're on, you should definitely define this.
|
||||
*
|
||||
* Note that we don't use this macro to avoid correct implementations
|
||||
* in functions which are explicitly about rounding (such as vrnd* on
|
||||
* NEON, _mm_round_* on x86, etc.); it is only used for code where
|
||||
* rounding is a component in another function, and even then it isn't
|
||||
* usually a problem since such functions will use the current rounding
|
||||
* mode. */
|
||||
#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && \
|
||||
defined(SIMDE_FAST_MATH)
|
||||
#define SIMDE_FAST_ROUND_TIES
|
||||
#endif
|
||||
|
||||
/* For functions which convert from one type to another (mostly from
|
||||
* floating point to integer types), sometimes we need to do a range
|
||||
* check and potentially return a different result if the value
|
||||
* falls outside that range. Skipping this check can provide a
|
||||
* performance boost, at the expense of faithfulness to the API we're
|
||||
* emulating. */
|
||||
#if !defined(SIMDE_FAST_CONVERSION_RANGE) && \
|
||||
!defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH)
|
||||
#define SIMDE_FAST_CONVERSION_RANGE
|
||||
#endif
|
||||
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(3, 4, 0) || \
|
||||
|
@ -102,15 +143,21 @@
|
|||
#define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated())
|
||||
#endif
|
||||
|
||||
/* diagnose_if + __builtin_constant_p was broken until clang 9,
|
||||
* which is when __FILE_NAME__ was added. */
|
||||
#if defined(SIMDE_CHECK_CONSTANT_) && defined(__FILE_NAME__)
|
||||
#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT)
|
||||
#if defined(SIMDE_CHECK_CONSTANT_) && \
|
||||
SIMDE_DETECT_CLANG_VERSION_CHECK(9, 0, 0) && \
|
||||
(!defined(__apple_build_version__) || \
|
||||
((__apple_build_version__ < 11000000) || \
|
||||
(__apple_build_version__ >= 12000000)))
|
||||
#define SIMDE_REQUIRE_CONSTANT(arg) \
|
||||
HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), \
|
||||
"`" #arg "' must be constant")
|
||||
#else
|
||||
#define SIMDE_REQUIRE_CONSTANT(arg)
|
||||
#endif
|
||||
#else
|
||||
#define SIMDE_REQUIRE_CONSTANT(arg)
|
||||
#endif
|
||||
|
||||
#define SIMDE_REQUIRE_RANGE(arg, min, max) \
|
||||
HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), \
|
||||
|
@ -120,39 +167,20 @@
|
|||
SIMDE_REQUIRE_CONSTANT(arg) \
|
||||
SIMDE_REQUIRE_RANGE(arg, min, max)
|
||||
|
||||
/* SIMDE_ASSUME_ALIGNED allows you to (try to) tell the compiler
|
||||
* that a pointer is aligned to an `alignment`-byte boundary. */
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(4, 7, 0)
|
||||
#define SIMDE_ASSUME_ALIGNED(alignment, v) \
|
||||
HEDLEY_REINTERPRET_CAST(__typeof__(v), \
|
||||
__builtin_assume_aligned(v, alignment))
|
||||
#elif defined(__cplusplus) && (__cplusplus > 201703L)
|
||||
#define SIMDE_ASSUME_ALIGNED(alignment, v) std::assume_aligned<alignment>(v)
|
||||
#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
|
||||
#define SIMDE_ASSUME_ALIGNED(alignment, v) \
|
||||
(__extension__({ \
|
||||
__typeof__(v) simde_assume_aligned_t_ = (v); \
|
||||
__assume_aligned(simde_assume_aligned_t_, alignment); \
|
||||
simde_assume_aligned_t_; \
|
||||
}))
|
||||
#else
|
||||
#define SIMDE_ASSUME_ALIGNED(alignment, v) (v)
|
||||
#endif
|
||||
|
||||
/* SIMDE_ALIGN_CAST allows you to convert to a type with greater
|
||||
* aligment requirements without triggering a warning. */
|
||||
#if HEDLEY_HAS_WARNING("-Wcast-align")
|
||||
#define SIMDE_ALIGN_CAST(T, v) \
|
||||
(__extension__({ \
|
||||
HEDLEY_DIAGNOSTIC_PUSH \
|
||||
_Pragma("clang diagnostic ignored \"-Wcast-align\"") \
|
||||
T simde_r_ = HEDLEY_REINTERPRET_CAST(T, v); \
|
||||
HEDLEY_DIAGNOSTIC_POP \
|
||||
simde_r_; \
|
||||
}))
|
||||
#else
|
||||
#define SIMDE_ALIGN_CAST(T, v) HEDLEY_REINTERPRET_CAST(T, v)
|
||||
/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty
|
||||
* fallback if we can't find an implementation; instead we have to
|
||||
* check if SIMDE_STATIC_ASSERT is defined before using it. */
|
||||
#if !defined(__cplusplus) && \
|
||||
((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \
|
||||
HEDLEY_HAS_FEATURE(c_static_assert) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(6, 0, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || defined(_Static_assert))
|
||||
#define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message)
|
||||
#elif (defined(__cplusplus) && (__cplusplus >= 201103L)) || \
|
||||
HEDLEY_MSVC_VERSION_CHECK(16, 0, 0)
|
||||
#define SIMDE_STATIC_ASSERT(expr, message) \
|
||||
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
|
||||
static_assert(expr, message))
|
||||
#endif
|
||||
|
||||
#if (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \
|
||||
|
@ -170,6 +198,7 @@
|
|||
|
||||
* SIMDE_VECTOR - Declaring a vector.
|
||||
* SIMDE_VECTOR_OPS - basic operations (binary and unary).
|
||||
* SIMDE_VECTOR_NEGATE - negating a vector
|
||||
* SIMDE_VECTOR_SCALAR - For binary operators, the second argument
|
||||
can be a scalar, in which case the result is as if that scalar
|
||||
had been broadcast to all lanes of a vector.
|
||||
|
@ -182,11 +211,13 @@
|
|||
#if HEDLEY_GCC_VERSION_CHECK(4, 8, 0)
|
||||
#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
|
||||
#define SIMDE_VECTOR_OPS
|
||||
#define SIMDE_VECTOR_NEGATE
|
||||
#define SIMDE_VECTOR_SCALAR
|
||||
#define SIMDE_VECTOR_SUBSCRIPT
|
||||
#elif HEDLEY_INTEL_VERSION_CHECK(16, 0, 0)
|
||||
#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
|
||||
#define SIMDE_VECTOR_OPS
|
||||
#define SIMDE_VECTOR_NEGATE
|
||||
/* ICC only supports SIMDE_VECTOR_SCALAR for constants */
|
||||
#define SIMDE_VECTOR_SUBSCRIPT
|
||||
#elif HEDLEY_GCC_VERSION_CHECK(4, 1, 0) || HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
|
||||
|
@ -197,8 +228,9 @@
|
|||
#elif HEDLEY_HAS_ATTRIBUTE(vector_size)
|
||||
#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
|
||||
#define SIMDE_VECTOR_OPS
|
||||
#define SIMDE_VECTOR_NEGATE
|
||||
#define SIMDE_VECTOR_SUBSCRIPT
|
||||
#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) /* clang 4.0 */
|
||||
#if SIMDE_DETECT_CLANG_VERSION_CHECK(5, 0, 0)
|
||||
#define SIMDE_VECTOR_SCALAR
|
||||
#endif
|
||||
#endif
|
||||
|
@ -281,27 +313,34 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
#endif
|
||||
|
||||
#if defined(SIMDE_ENABLE_OPENMP)
|
||||
#define SIMDE_VECTORIZE _Pragma("omp simd")
|
||||
#define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd)
|
||||
#define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l))
|
||||
#if defined(__clang__)
|
||||
#define SIMDE_VECTORIZE_REDUCTION(r) \
|
||||
HEDLEY_DIAGNOSTIC_PUSH \
|
||||
_Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \
|
||||
HEDLEY_PRAGMA(omp simd reduction(r)) HEDLEY_DIAGNOSTIC_POP
|
||||
#else
|
||||
#define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r))
|
||||
#endif
|
||||
#define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a))
|
||||
#elif defined(SIMDE_ENABLE_CILKPLUS)
|
||||
#define SIMDE_VECTORIZE _Pragma("simd")
|
||||
#define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd)
|
||||
#define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
|
||||
#define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
|
||||
#define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a))
|
||||
#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION)
|
||||
#define SIMDE_VECTORIZE _Pragma("clang loop vectorize(enable)")
|
||||
#define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable))
|
||||
#define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l))
|
||||
#define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE
|
||||
#define SIMDE_VECTORIZE_ALIGNED(a)
|
||||
#elif HEDLEY_GCC_VERSION_CHECK(4, 9, 0)
|
||||
#define SIMDE_VECTORIZE _Pragma("GCC ivdep")
|
||||
#define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep)
|
||||
#define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE
|
||||
#define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE
|
||||
#define SIMDE_VECTORIZE_ALIGNED(a)
|
||||
#elif HEDLEY_CRAY_VERSION_CHECK(5, 0, 0)
|
||||
#define SIMDE_VECTORIZE _Pragma("_CRI ivdep")
|
||||
#define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep)
|
||||
#define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE
|
||||
#define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE
|
||||
#define SIMDE_VECTORIZE_ALIGNED(a)
|
||||
|
@ -350,20 +389,10 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
HEDLEY_DIAGNOSTIC_POP
|
||||
#endif
|
||||
|
||||
#if HEDLEY_HAS_WARNING("-Wpedantic")
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_INT128 \
|
||||
_Pragma("clang diagnostic ignored \"-Wpedantic\"")
|
||||
#elif defined(HEDLEY_GCC_VERSION)
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_INT128 \
|
||||
_Pragma("GCC diagnostic ignored \"-Wpedantic\"")
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_INT128
|
||||
#endif
|
||||
|
||||
#if defined(__SIZEOF_INT128__)
|
||||
#define SIMDE_HAVE_INT128_
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DIAGNOSTIC_DISABLE_INT128
|
||||
SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_
|
||||
typedef __int128 simde_int128;
|
||||
typedef unsigned __int128 simde_uint128;
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
@ -488,39 +517,6 @@ typedef SIMDE_FLOAT32_TYPE simde_float32;
|
|||
#endif
|
||||
typedef SIMDE_FLOAT64_TYPE simde_float64;
|
||||
|
||||
/* Whether to assume that the compiler can auto-vectorize reasonably
|
||||
well. This will cause SIMDe to attempt to compose vector
|
||||
operations using more simple vector operations instead of minimize
|
||||
serial work.
|
||||
|
||||
As an example, consider the _mm_add_ss(a, b) function from SSE,
|
||||
which returns { a0 + b0, a1, a2, a3 }. This pattern is repeated
|
||||
for other operations (sub, mul, etc.).
|
||||
|
||||
The naïve implementation would result in loading a0 and b0, adding
|
||||
them into a temporary variable, then splicing that value into a new
|
||||
vector with the remaining elements from a.
|
||||
|
||||
On platforms which support vectorization, it's generally faster to
|
||||
simply perform the operation on the entire vector to avoid having
|
||||
to move data between SIMD registers and non-SIMD registers.
|
||||
Basically, instead of the temporary variable being (a0 + b0) it
|
||||
would be a vector of (a + b), which is then combined with a to form
|
||||
the result.
|
||||
|
||||
By default, SIMDe will prefer the pure-vector versions if we detect
|
||||
a vector ISA extension, but this can be overridden by defining
|
||||
SIMDE_NO_ASSUME_VECTORIZATION. You can also define
|
||||
SIMDE_ASSUME_VECTORIZATION if you want to force SIMDe to use the
|
||||
vectorized version. */
|
||||
#if !defined(SIMDE_NO_ASSUME_VECTORIZATION) && \
|
||||
!defined(SIMDE_ASSUME_VECTORIZATION)
|
||||
#if defined(__SSE__) || defined(__ARM_NEON) || defined(__mips_msa) || \
|
||||
defined(__ALTIVEC__) || defined(__wasm_simd128__)
|
||||
#define SIMDE_ASSUME_VECTORIZATION
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if HEDLEY_HAS_WARNING("-Wbad-function-cast")
|
||||
#define SIMDE_CONVERT_FTOI(T, v) \
|
||||
HEDLEY_DIAGNOSTIC_PUSH \
|
||||
|
@ -530,11 +526,18 @@ typedef SIMDE_FLOAT64_TYPE simde_float64;
|
|||
#define SIMDE_CONVERT_FTOI(T, v) ((T)(v))
|
||||
#endif
|
||||
|
||||
/* TODO: detect compilers which support this outside of C11 mode */
|
||||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
|
||||
#define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) \
|
||||
(_Generic((value), to : (value), from : ((to)(value))))
|
||||
_Generic((value), to \
|
||||
: (value), default \
|
||||
: (_Generic((value), from \
|
||||
: ((to)(value)))))
|
||||
#define SIMDE_CHECKED_STATIC_CAST(to, from, value) \
|
||||
(_Generic((value), to : (value), from : ((to)(value))))
|
||||
_Generic((value), to \
|
||||
: (value), default \
|
||||
: (_Generic((value), from \
|
||||
: ((to)(value)))))
|
||||
#else
|
||||
#define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) \
|
||||
HEDLEY_REINTERPRET_CAST(to, value)
|
||||
|
@ -564,7 +567,7 @@ typedef SIMDE_FLOAT64_TYPE simde_float64;
|
|||
#if defined(__STDC_HOSTED__)
|
||||
#define SIMDE_STDC_HOSTED __STDC_HOSTED__
|
||||
#else
|
||||
#if defined(HEDLEY_PGI_VERSION_CHECK) || defined(HEDLEY_MSVC_VERSION_CHECK)
|
||||
#if defined(HEDLEY_PGI_VERSION) || defined(HEDLEY_MSVC_VERSION)
|
||||
#define SIMDE_STDC_HOSTED 1
|
||||
#else
|
||||
#define SIMDE_STDC_HOSTED 0
|
||||
|
@ -572,23 +575,34 @@ typedef SIMDE_FLOAT64_TYPE simde_float64;
|
|||
#endif
|
||||
|
||||
/* Try to deal with environments without a standard library. */
|
||||
#if !defined(simde_memcpy) || !defined(simde_memset)
|
||||
#if !defined(SIMDE_NO_STRING_H) && defined(__has_include)
|
||||
#if __has_include(<string.h>)
|
||||
#include <string.h>
|
||||
#if !defined(simde_memcpy)
|
||||
#define simde_memcpy(dest, src, n) memcpy(dest, src, n)
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_memcpy)
|
||||
#define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n)
|
||||
#endif
|
||||
#endif
|
||||
#if !defined(simde_memset)
|
||||
#define simde_memset(s, c, n) memset(s, c, n)
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_memset)
|
||||
#define simde_memset(s, c, n) __builtin_memset(s, c, n)
|
||||
#endif
|
||||
#else
|
||||
#endif
|
||||
#if !defined(simde_memcmp)
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_memcmp)
|
||||
#define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp)
|
||||
#if !defined(SIMDE_NO_STRING_H)
|
||||
#if defined(__has_include)
|
||||
#if !__has_include(<string.h>)
|
||||
#define SIMDE_NO_STRING_H
|
||||
#endif
|
||||
#elif (SIMDE_STDC_HOSTED == 0)
|
||||
#define SIMDE_NO_STRING_H
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#if !defined(simde_memcpy) || !defined(simde_memset)
|
||||
#if !defined(SIMDE_NO_STRING_H) && (SIMDE_STDC_HOSTED == 1)
|
||||
|
||||
#if !defined(SIMDE_NO_STRING_H)
|
||||
#include <string.h>
|
||||
#if !defined(simde_memcpy)
|
||||
#define simde_memcpy(dest, src, n) memcpy(dest, src, n)
|
||||
|
@ -596,14 +610,8 @@ typedef SIMDE_FLOAT64_TYPE simde_float64;
|
|||
#if !defined(simde_memset)
|
||||
#define simde_memset(s, c, n) memset(s, c, n)
|
||||
#endif
|
||||
#elif (HEDLEY_HAS_BUILTIN(__builtin_memcpy) && \
|
||||
HEDLEY_HAS_BUILTIN(__builtin_memset)) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(4, 2, 0)
|
||||
#if !defined(simde_memcpy)
|
||||
#define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n)
|
||||
#endif
|
||||
#if !defined(simde_memset)
|
||||
#define simde_memset(s, c, n) __builtin_memset(s, c, n)
|
||||
#if !defined(simde_memcmp)
|
||||
#define simde_memcmp(s1, s2, n) memcmp(s1, s2, n)
|
||||
#endif
|
||||
#else
|
||||
/* These are meant to be portable, not fast. If you're hitting them you
|
||||
|
@ -637,10 +645,24 @@ void simde_memset_(void *s, int c, size_t len)
|
|||
}
|
||||
#define simde_memset(s, c, n) simde_memset_(s, c, n)
|
||||
#endif
|
||||
#endif /* !defined(SIMDE_NO_STRING_H) && (SIMDE_STDC_HOSTED == 1) */
|
||||
#endif /* !defined(simde_memcpy) || !defined(simde_memset) */
|
||||
|
||||
#include "simde-math.h"
|
||||
#if !defined(simde_memcmp)
|
||||
SIMDE_FUCTION_ATTRIBUTES
|
||||
int simde_memcmp_(const void *s1, const void *s2, size_t n)
|
||||
{
|
||||
unsigned char *s1_ = HEDLEY_STATIC_CAST(unsigned char *, s1);
|
||||
unsigned char *s2_ = HEDLEY_STATIC_CAST(unsigned char *, s2);
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
if (s1_[i] != s2_[i]) {
|
||||
return (int)(s1_[i] - s2_[i]);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(FE_ALL_EXCEPT)
|
||||
#define SIMDE_HAVE_FENV_H
|
||||
|
@ -682,6 +704,105 @@ void simde_memset_(void *s, int c, size_t len)
|
|||
|
||||
#include "check.h"
|
||||
|
||||
/* GCC/clang have a bunch of functionality in builtins which we would
|
||||
* like to access, but the suffixes indicate whether the operate on
|
||||
* int, long, or long long, not fixed width types (e.g., int32_t).
|
||||
* we use these macros to attempt to map from fixed-width to the
|
||||
* names GCC uses. Note that you should still cast the input(s) and
|
||||
* return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if
|
||||
* types are the same size they may not be compatible according to the
|
||||
* compiler. For example, on x86 long and long lonsg are generally
|
||||
* both 64 bits, but platforms vary on whether an int64_t is mapped
|
||||
* to a long or long long. */
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_
|
||||
|
||||
#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN)
|
||||
#define SIMDE_BUILTIN_SUFFIX_8_
|
||||
#define SIMDE_BUILTIN_TYPE_8_ int
|
||||
#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN)
|
||||
#define SIMDE_BUILTIN_SUFFIX_8_ l
|
||||
#define SIMDE_BUILTIN_TYPE_8_ long
|
||||
#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN)
|
||||
#define SIMDE_BUILTIN_SUFFIX_8_ ll
|
||||
#define SIMDE_BUILTIN_TYPE_8_ long long
|
||||
#endif
|
||||
|
||||
#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN)
|
||||
#define SIMDE_BUILTIN_SUFFIX_16_
|
||||
#define SIMDE_BUILTIN_TYPE_16_ int
|
||||
#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN)
|
||||
#define SIMDE_BUILTIN_SUFFIX_16_ l
|
||||
#define SIMDE_BUILTIN_TYPE_16_ long
|
||||
#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN)
|
||||
#define SIMDE_BUILTIN_SUFFIX_16_ ll
|
||||
#define SIMDE_BUILTIN_TYPE_16_ long long
|
||||
#endif
|
||||
|
||||
#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN)
|
||||
#define SIMDE_BUILTIN_SUFFIX_32_
|
||||
#define SIMDE_BUILTIN_TYPE_32_ int
|
||||
#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN)
|
||||
#define SIMDE_BUILTIN_SUFFIX_32_ l
|
||||
#define SIMDE_BUILTIN_TYPE_32_ long
|
||||
#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN)
|
||||
#define SIMDE_BUILTIN_SUFFIX_32_ ll
|
||||
#define SIMDE_BUILTIN_TYPE_32_ long long
|
||||
#endif
|
||||
|
||||
#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN)
|
||||
#define SIMDE_BUILTIN_SUFFIX_64_
|
||||
#define SIMDE_BUILTIN_TYPE_64_ int
|
||||
#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN)
|
||||
#define SIMDE_BUILTIN_SUFFIX_64_ l
|
||||
#define SIMDE_BUILTIN_TYPE_64_ long
|
||||
#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN)
|
||||
#define SIMDE_BUILTIN_SUFFIX_64_ ll
|
||||
#define SIMDE_BUILTIN_TYPE_64_ long long
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_BUILTIN_SUFFIX_8_)
|
||||
#define SIMDE_BUILTIN_8_(name) \
|
||||
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)
|
||||
#define SIMDE_BUILTIN_HAS_8_(name) \
|
||||
HEDLEY_HAS_BUILTIN( \
|
||||
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_))
|
||||
#else
|
||||
#define SIMDE_BUILTIN_HAS_8_(name) 0
|
||||
#endif
|
||||
#if defined(SIMDE_BUILTIN_SUFFIX_16_)
|
||||
#define SIMDE_BUILTIN_16_(name) \
|
||||
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)
|
||||
#define SIMDE_BUILTIN_HAS_16_(name) \
|
||||
HEDLEY_HAS_BUILTIN( \
|
||||
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_))
|
||||
#else
|
||||
#define SIMDE_BUILTIN_HAS_16_(name) 0
|
||||
#endif
|
||||
#if defined(SIMDE_BUILTIN_SUFFIX_32_)
|
||||
#define SIMDE_BUILTIN_32_(name) \
|
||||
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)
|
||||
#define SIMDE_BUILTIN_HAS_32_(name) \
|
||||
HEDLEY_HAS_BUILTIN( \
|
||||
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_))
|
||||
#else
|
||||
#define SIMDE_BUILTIN_HAS_32_(name) 0
|
||||
#endif
|
||||
#if defined(SIMDE_BUILTIN_SUFFIX_64_)
|
||||
#define SIMDE_BUILTIN_64_(name) \
|
||||
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)
|
||||
#define SIMDE_BUILTIN_HAS_64_(name) \
|
||||
HEDLEY_HAS_BUILTIN( \
|
||||
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_))
|
||||
#else
|
||||
#define SIMDE_BUILTIN_HAS_64_(name) 0
|
||||
#endif
|
||||
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
/* Sometimes we run into problems with specific versions of compilers
|
||||
which make the native versions unusable for us. Often this is due
|
||||
to missing functions, sometimes buggy implementations, etc. These
|
||||
|
@ -712,29 +833,75 @@ void simde_memset_(void *s, int c, size_t len)
|
|||
#if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)
|
||||
#define SIMDE_BUG_GCC_94482
|
||||
#endif
|
||||
#if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || \
|
||||
defined(SIMDE_ARCH_SYSTEMZ)
|
||||
#define SIMDE_BUG_GCC_53784
|
||||
#endif
|
||||
#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)
|
||||
#if HEDLEY_GCC_VERSION_CHECK(4, 3, 0) /* -Wsign-conversion */
|
||||
#define SIMDE_BUG_GCC_95144
|
||||
#endif
|
||||
#endif
|
||||
#if !HEDLEY_GCC_VERSION_CHECK(9, 4, 0) && defined(SIMDE_ARCH_AARCH64)
|
||||
#define SIMDE_BUG_GCC_94488
|
||||
#endif
|
||||
#if defined(SIMDE_ARCH_POWER)
|
||||
#if defined(SIMDE_ARCH_ARM)
|
||||
#define SIMDE_BUG_GCC_95399
|
||||
#define SIMDE_BUG_GCC_95471
|
||||
#elif defined(SIMDE_ARCH_POWER)
|
||||
#define SIMDE_BUG_GCC_95227
|
||||
#define SIMDE_BUG_GCC_95782
|
||||
#elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)
|
||||
#if !HEDLEY_GCC_VERSION_CHECK(10, 2, 0) && !defined(__OPTIMIZE__)
|
||||
#define SIMDE_BUG_GCC_96174
|
||||
#endif
|
||||
#endif
|
||||
#define SIMDE_BUG_GCC_95399
|
||||
#elif defined(__clang__)
|
||||
#if defined(SIMDE_ARCH_AARCH64)
|
||||
#define SIMDE_BUG_CLANG_45541
|
||||
#define SIMDE_BUG_CLANG_46844
|
||||
#define SIMDE_BUG_CLANG_48257
|
||||
#if SIMDE_DETECT_CLANG_VERSION_CHECK(10, 0, 0) && \
|
||||
SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)
|
||||
#define SIMDE_BUG_CLANG_BAD_VI64_OPS
|
||||
#endif
|
||||
#endif
|
||||
#if defined(HEDLEY_EMSCRIPTEN_VERSION)
|
||||
#define SIMDE_BUG_EMSCRIPTEN_MISSING_IMPL /* Placeholder for (as yet) unfiled issues. */
|
||||
#define SIMDE_BUG_EMSCRIPTEN_5242
|
||||
#if defined(SIMDE_ARCH_POWER)
|
||||
#define SIMDE_BUG_CLANG_46770
|
||||
#endif
|
||||
#if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0) && \
|
||||
!defined(__OPTIMIZE__)
|
||||
#define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT
|
||||
#endif
|
||||
#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)
|
||||
#if HEDLEY_HAS_WARNING("-Wsign-conversion") && \
|
||||
SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)
|
||||
#define SIMDE_BUG_CLANG_45931
|
||||
#endif
|
||||
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
|
||||
SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)
|
||||
#define SIMDE_BUG_CLANG_44589
|
||||
#endif
|
||||
#endif
|
||||
#define SIMDE_BUG_CLANG_45959
|
||||
#elif defined(HEDLEY_MSVC_VERSION)
|
||||
#if defined(SIMDE_ARCH_X86)
|
||||
#define SIMDE_BUG_MSVC_ROUND_EXTRACT
|
||||
#endif
|
||||
#elif defined(HEDLEY_INTEL_VERSION)
|
||||
#define SIMDE_BUG_INTEL_857088
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* GCC and Clang both have the same issue:
|
||||
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144
|
||||
* https://bugs.llvm.org/show_bug.cgi?id=45931
|
||||
* This is just an easy way to work around it.
|
||||
*/
|
||||
#if HEDLEY_HAS_WARNING("-Wsign-conversion") || HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
|
||||
#if (HEDLEY_HAS_WARNING("-Wsign-conversion") && \
|
||||
SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
|
||||
#define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) \
|
||||
(__extension__({ \
|
||||
HEDLEY_DIAGNOSTIC_PUSH \
|
||||
|
|
|
@ -0,0 +1,925 @@
|
|||
/* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Copyright:
|
||||
* 2020 Evan Nemerson <evan@nemerson.com>
|
||||
*/
|
||||
|
||||
/* Constify macros. For internal use only.
|
||||
*
|
||||
* These are used to make it possible to call a function which takes
|
||||
* an Integer Constant Expression (ICE) using a compile time constant.
|
||||
* Technically it would also be possible to use a value not trivially
|
||||
* known by the compiler, but there would be a siginficant performance
|
||||
* hit (a switch switch is used).
|
||||
*
|
||||
* The basic idea is pretty simple; we just emit a do while loop which
|
||||
* contains a switch with a case for every possible value of the
|
||||
* constant.
|
||||
*
|
||||
* As long as the value you pass to the function in constant, pretty
|
||||
* much any copmiler shouldn't have a problem generating exactly the
|
||||
* same code as if you had used an ICE.
|
||||
*
|
||||
* This is intended to be used in the SIMDe implementations of
|
||||
* functions the compilers require to be an ICE, but the other benefit
|
||||
* is that if we also disable the warnings from
|
||||
* SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests
|
||||
* to use non-ICE parameters
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_CONSTIFY_H)
|
||||
#define SIMDE_CONSTIFY_H
|
||||
|
||||
#include "simde-diagnostic.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_
|
||||
SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_
|
||||
|
||||
#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \
|
||||
do { \
|
||||
switch (imm) { \
|
||||
case 0: \
|
||||
result = func_name(__VA_ARGS__, 0); \
|
||||
break; \
|
||||
case 1: \
|
||||
result = func_name(__VA_ARGS__, 1); \
|
||||
break; \
|
||||
default: \
|
||||
result = default_case; \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \
|
||||
do { \
|
||||
switch (imm) { \
|
||||
case 0: \
|
||||
result = func_name(__VA_ARGS__, 0); \
|
||||
break; \
|
||||
case 1: \
|
||||
result = func_name(__VA_ARGS__, 1); \
|
||||
break; \
|
||||
case 2: \
|
||||
result = func_name(__VA_ARGS__, 2); \
|
||||
break; \
|
||||
case 3: \
|
||||
result = func_name(__VA_ARGS__, 3); \
|
||||
break; \
|
||||
default: \
|
||||
result = default_case; \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \
|
||||
do { \
|
||||
switch (imm) { \
|
||||
case 0: \
|
||||
result = func_name(__VA_ARGS__, 0); \
|
||||
break; \
|
||||
case 1: \
|
||||
result = func_name(__VA_ARGS__, 1); \
|
||||
break; \
|
||||
case 2: \
|
||||
result = func_name(__VA_ARGS__, 2); \
|
||||
break; \
|
||||
case 3: \
|
||||
result = func_name(__VA_ARGS__, 3); \
|
||||
break; \
|
||||
case 4: \
|
||||
result = func_name(__VA_ARGS__, 4); \
|
||||
break; \
|
||||
case 5: \
|
||||
result = func_name(__VA_ARGS__, 5); \
|
||||
break; \
|
||||
case 6: \
|
||||
result = func_name(__VA_ARGS__, 6); \
|
||||
break; \
|
||||
case 7: \
|
||||
result = func_name(__VA_ARGS__, 7); \
|
||||
break; \
|
||||
default: \
|
||||
result = default_case; \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \
|
||||
do { \
|
||||
switch (imm) { \
|
||||
case 0: \
|
||||
result = func_name(__VA_ARGS__, 0); \
|
||||
break; \
|
||||
case 1: \
|
||||
result = func_name(__VA_ARGS__, 1); \
|
||||
break; \
|
||||
case 2: \
|
||||
result = func_name(__VA_ARGS__, 2); \
|
||||
break; \
|
||||
case 3: \
|
||||
result = func_name(__VA_ARGS__, 3); \
|
||||
break; \
|
||||
case 4: \
|
||||
result = func_name(__VA_ARGS__, 4); \
|
||||
break; \
|
||||
case 5: \
|
||||
result = func_name(__VA_ARGS__, 5); \
|
||||
break; \
|
||||
case 6: \
|
||||
result = func_name(__VA_ARGS__, 6); \
|
||||
break; \
|
||||
case 7: \
|
||||
result = func_name(__VA_ARGS__, 7); \
|
||||
break; \
|
||||
case 8: \
|
||||
result = func_name(__VA_ARGS__, 8); \
|
||||
break; \
|
||||
case 9: \
|
||||
result = func_name(__VA_ARGS__, 9); \
|
||||
break; \
|
||||
case 10: \
|
||||
result = func_name(__VA_ARGS__, 10); \
|
||||
break; \
|
||||
case 11: \
|
||||
result = func_name(__VA_ARGS__, 11); \
|
||||
break; \
|
||||
case 12: \
|
||||
result = func_name(__VA_ARGS__, 12); \
|
||||
break; \
|
||||
case 13: \
|
||||
result = func_name(__VA_ARGS__, 13); \
|
||||
break; \
|
||||
case 14: \
|
||||
result = func_name(__VA_ARGS__, 14); \
|
||||
break; \
|
||||
case 15: \
|
||||
result = func_name(__VA_ARGS__, 15); \
|
||||
break; \
|
||||
default: \
|
||||
result = default_case; \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \
|
||||
do { \
|
||||
switch (imm) { \
|
||||
case 0: \
|
||||
result = func_name(__VA_ARGS__, 0); \
|
||||
break; \
|
||||
case 1: \
|
||||
result = func_name(__VA_ARGS__, 1); \
|
||||
break; \
|
||||
case 2: \
|
||||
result = func_name(__VA_ARGS__, 2); \
|
||||
break; \
|
||||
case 3: \
|
||||
result = func_name(__VA_ARGS__, 3); \
|
||||
break; \
|
||||
case 4: \
|
||||
result = func_name(__VA_ARGS__, 4); \
|
||||
break; \
|
||||
case 5: \
|
||||
result = func_name(__VA_ARGS__, 5); \
|
||||
break; \
|
||||
case 6: \
|
||||
result = func_name(__VA_ARGS__, 6); \
|
||||
break; \
|
||||
case 7: \
|
||||
result = func_name(__VA_ARGS__, 7); \
|
||||
break; \
|
||||
case 8: \
|
||||
result = func_name(__VA_ARGS__, 8); \
|
||||
break; \
|
||||
case 9: \
|
||||
result = func_name(__VA_ARGS__, 9); \
|
||||
break; \
|
||||
case 10: \
|
||||
result = func_name(__VA_ARGS__, 10); \
|
||||
break; \
|
||||
case 11: \
|
||||
result = func_name(__VA_ARGS__, 11); \
|
||||
break; \
|
||||
case 12: \
|
||||
result = func_name(__VA_ARGS__, 12); \
|
||||
break; \
|
||||
case 13: \
|
||||
result = func_name(__VA_ARGS__, 13); \
|
||||
break; \
|
||||
case 14: \
|
||||
result = func_name(__VA_ARGS__, 14); \
|
||||
break; \
|
||||
case 15: \
|
||||
result = func_name(__VA_ARGS__, 15); \
|
||||
break; \
|
||||
case 16: \
|
||||
result = func_name(__VA_ARGS__, 16); \
|
||||
break; \
|
||||
case 17: \
|
||||
result = func_name(__VA_ARGS__, 17); \
|
||||
break; \
|
||||
case 18: \
|
||||
result = func_name(__VA_ARGS__, 18); \
|
||||
break; \
|
||||
case 19: \
|
||||
result = func_name(__VA_ARGS__, 19); \
|
||||
break; \
|
||||
case 20: \
|
||||
result = func_name(__VA_ARGS__, 20); \
|
||||
break; \
|
||||
case 21: \
|
||||
result = func_name(__VA_ARGS__, 21); \
|
||||
break; \
|
||||
case 22: \
|
||||
result = func_name(__VA_ARGS__, 22); \
|
||||
break; \
|
||||
case 23: \
|
||||
result = func_name(__VA_ARGS__, 23); \
|
||||
break; \
|
||||
case 24: \
|
||||
result = func_name(__VA_ARGS__, 24); \
|
||||
break; \
|
||||
case 25: \
|
||||
result = func_name(__VA_ARGS__, 25); \
|
||||
break; \
|
||||
case 26: \
|
||||
result = func_name(__VA_ARGS__, 26); \
|
||||
break; \
|
||||
case 27: \
|
||||
result = func_name(__VA_ARGS__, 27); \
|
||||
break; \
|
||||
case 28: \
|
||||
result = func_name(__VA_ARGS__, 28); \
|
||||
break; \
|
||||
case 29: \
|
||||
result = func_name(__VA_ARGS__, 29); \
|
||||
break; \
|
||||
case 30: \
|
||||
result = func_name(__VA_ARGS__, 30); \
|
||||
break; \
|
||||
case 31: \
|
||||
result = func_name(__VA_ARGS__, 31); \
|
||||
break; \
|
||||
default: \
|
||||
result = default_case; \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \
|
||||
do { \
|
||||
switch (imm) { \
|
||||
case 0: \
|
||||
result = func_name(__VA_ARGS__, 0); \
|
||||
break; \
|
||||
case 1: \
|
||||
result = func_name(__VA_ARGS__, 1); \
|
||||
break; \
|
||||
case 2: \
|
||||
result = func_name(__VA_ARGS__, 2); \
|
||||
break; \
|
||||
case 3: \
|
||||
result = func_name(__VA_ARGS__, 3); \
|
||||
break; \
|
||||
case 4: \
|
||||
result = func_name(__VA_ARGS__, 4); \
|
||||
break; \
|
||||
case 5: \
|
||||
result = func_name(__VA_ARGS__, 5); \
|
||||
break; \
|
||||
case 6: \
|
||||
result = func_name(__VA_ARGS__, 6); \
|
||||
break; \
|
||||
case 7: \
|
||||
result = func_name(__VA_ARGS__, 7); \
|
||||
break; \
|
||||
case 8: \
|
||||
result = func_name(__VA_ARGS__, 8); \
|
||||
break; \
|
||||
case 9: \
|
||||
result = func_name(__VA_ARGS__, 9); \
|
||||
break; \
|
||||
case 10: \
|
||||
result = func_name(__VA_ARGS__, 10); \
|
||||
break; \
|
||||
case 11: \
|
||||
result = func_name(__VA_ARGS__, 11); \
|
||||
break; \
|
||||
case 12: \
|
||||
result = func_name(__VA_ARGS__, 12); \
|
||||
break; \
|
||||
case 13: \
|
||||
result = func_name(__VA_ARGS__, 13); \
|
||||
break; \
|
||||
case 14: \
|
||||
result = func_name(__VA_ARGS__, 14); \
|
||||
break; \
|
||||
case 15: \
|
||||
result = func_name(__VA_ARGS__, 15); \
|
||||
break; \
|
||||
case 16: \
|
||||
result = func_name(__VA_ARGS__, 16); \
|
||||
break; \
|
||||
case 17: \
|
||||
result = func_name(__VA_ARGS__, 17); \
|
||||
break; \
|
||||
case 18: \
|
||||
result = func_name(__VA_ARGS__, 18); \
|
||||
break; \
|
||||
case 19: \
|
||||
result = func_name(__VA_ARGS__, 19); \
|
||||
break; \
|
||||
case 20: \
|
||||
result = func_name(__VA_ARGS__, 20); \
|
||||
break; \
|
||||
case 21: \
|
||||
result = func_name(__VA_ARGS__, 21); \
|
||||
break; \
|
||||
case 22: \
|
||||
result = func_name(__VA_ARGS__, 22); \
|
||||
break; \
|
||||
case 23: \
|
||||
result = func_name(__VA_ARGS__, 23); \
|
||||
break; \
|
||||
case 24: \
|
||||
result = func_name(__VA_ARGS__, 24); \
|
||||
break; \
|
||||
case 25: \
|
||||
result = func_name(__VA_ARGS__, 25); \
|
||||
break; \
|
||||
case 26: \
|
||||
result = func_name(__VA_ARGS__, 26); \
|
||||
break; \
|
||||
case 27: \
|
||||
result = func_name(__VA_ARGS__, 27); \
|
||||
break; \
|
||||
case 28: \
|
||||
result = func_name(__VA_ARGS__, 28); \
|
||||
break; \
|
||||
case 29: \
|
||||
result = func_name(__VA_ARGS__, 29); \
|
||||
break; \
|
||||
case 30: \
|
||||
result = func_name(__VA_ARGS__, 30); \
|
||||
break; \
|
||||
case 31: \
|
||||
result = func_name(__VA_ARGS__, 31); \
|
||||
break; \
|
||||
case 32: \
|
||||
result = func_name(__VA_ARGS__, 32); \
|
||||
break; \
|
||||
case 33: \
|
||||
result = func_name(__VA_ARGS__, 33); \
|
||||
break; \
|
||||
case 34: \
|
||||
result = func_name(__VA_ARGS__, 34); \
|
||||
break; \
|
||||
case 35: \
|
||||
result = func_name(__VA_ARGS__, 35); \
|
||||
break; \
|
||||
case 36: \
|
||||
result = func_name(__VA_ARGS__, 36); \
|
||||
break; \
|
||||
case 37: \
|
||||
result = func_name(__VA_ARGS__, 37); \
|
||||
break; \
|
||||
case 38: \
|
||||
result = func_name(__VA_ARGS__, 38); \
|
||||
break; \
|
||||
case 39: \
|
||||
result = func_name(__VA_ARGS__, 39); \
|
||||
break; \
|
||||
case 40: \
|
||||
result = func_name(__VA_ARGS__, 40); \
|
||||
break; \
|
||||
case 41: \
|
||||
result = func_name(__VA_ARGS__, 41); \
|
||||
break; \
|
||||
case 42: \
|
||||
result = func_name(__VA_ARGS__, 42); \
|
||||
break; \
|
||||
case 43: \
|
||||
result = func_name(__VA_ARGS__, 43); \
|
||||
break; \
|
||||
case 44: \
|
||||
result = func_name(__VA_ARGS__, 44); \
|
||||
break; \
|
||||
case 45: \
|
||||
result = func_name(__VA_ARGS__, 45); \
|
||||
break; \
|
||||
case 46: \
|
||||
result = func_name(__VA_ARGS__, 46); \
|
||||
break; \
|
||||
case 47: \
|
||||
result = func_name(__VA_ARGS__, 47); \
|
||||
break; \
|
||||
case 48: \
|
||||
result = func_name(__VA_ARGS__, 48); \
|
||||
break; \
|
||||
case 49: \
|
||||
result = func_name(__VA_ARGS__, 49); \
|
||||
break; \
|
||||
case 50: \
|
||||
result = func_name(__VA_ARGS__, 50); \
|
||||
break; \
|
||||
case 51: \
|
||||
result = func_name(__VA_ARGS__, 51); \
|
||||
break; \
|
||||
case 52: \
|
||||
result = func_name(__VA_ARGS__, 52); \
|
||||
break; \
|
||||
case 53: \
|
||||
result = func_name(__VA_ARGS__, 53); \
|
||||
break; \
|
||||
case 54: \
|
||||
result = func_name(__VA_ARGS__, 54); \
|
||||
break; \
|
||||
case 55: \
|
||||
result = func_name(__VA_ARGS__, 55); \
|
||||
break; \
|
||||
case 56: \
|
||||
result = func_name(__VA_ARGS__, 56); \
|
||||
break; \
|
||||
case 57: \
|
||||
result = func_name(__VA_ARGS__, 57); \
|
||||
break; \
|
||||
case 58: \
|
||||
result = func_name(__VA_ARGS__, 58); \
|
||||
break; \
|
||||
case 59: \
|
||||
result = func_name(__VA_ARGS__, 59); \
|
||||
break; \
|
||||
case 60: \
|
||||
result = func_name(__VA_ARGS__, 60); \
|
||||
break; \
|
||||
case 61: \
|
||||
result = func_name(__VA_ARGS__, 61); \
|
||||
break; \
|
||||
case 62: \
|
||||
result = func_name(__VA_ARGS__, 62); \
|
||||
break; \
|
||||
case 63: \
|
||||
result = func_name(__VA_ARGS__, 63); \
|
||||
break; \
|
||||
default: \
|
||||
result = default_case; \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \
|
||||
do { \
|
||||
switch (imm) { \
|
||||
case 0: \
|
||||
func_name(__VA_ARGS__, 0); \
|
||||
break; \
|
||||
case 1: \
|
||||
func_name(__VA_ARGS__, 1); \
|
||||
break; \
|
||||
default: \
|
||||
default_case; \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \
|
||||
do { \
|
||||
switch (imm) { \
|
||||
case 0: \
|
||||
func_name(__VA_ARGS__, 0); \
|
||||
break; \
|
||||
case 1: \
|
||||
func_name(__VA_ARGS__, 1); \
|
||||
break; \
|
||||
case 2: \
|
||||
func_name(__VA_ARGS__, 2); \
|
||||
break; \
|
||||
case 3: \
|
||||
func_name(__VA_ARGS__, 3); \
|
||||
break; \
|
||||
default: \
|
||||
default_case; \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \
|
||||
do { \
|
||||
switch (imm) { \
|
||||
case 0: \
|
||||
func_name(__VA_ARGS__, 0); \
|
||||
break; \
|
||||
case 1: \
|
||||
func_name(__VA_ARGS__, 1); \
|
||||
break; \
|
||||
case 2: \
|
||||
func_name(__VA_ARGS__, 2); \
|
||||
break; \
|
||||
case 3: \
|
||||
func_name(__VA_ARGS__, 3); \
|
||||
break; \
|
||||
case 4: \
|
||||
func_name(__VA_ARGS__, 4); \
|
||||
break; \
|
||||
case 5: \
|
||||
func_name(__VA_ARGS__, 5); \
|
||||
break; \
|
||||
case 6: \
|
||||
func_name(__VA_ARGS__, 6); \
|
||||
break; \
|
||||
case 7: \
|
||||
func_name(__VA_ARGS__, 7); \
|
||||
break; \
|
||||
default: \
|
||||
default_case; \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \
|
||||
do { \
|
||||
switch (imm) { \
|
||||
case 0: \
|
||||
func_name(__VA_ARGS__, 0); \
|
||||
break; \
|
||||
case 1: \
|
||||
func_name(__VA_ARGS__, 1); \
|
||||
break; \
|
||||
case 2: \
|
||||
func_name(__VA_ARGS__, 2); \
|
||||
break; \
|
||||
case 3: \
|
||||
func_name(__VA_ARGS__, 3); \
|
||||
break; \
|
||||
case 4: \
|
||||
func_name(__VA_ARGS__, 4); \
|
||||
break; \
|
||||
case 5: \
|
||||
func_name(__VA_ARGS__, 5); \
|
||||
break; \
|
||||
case 6: \
|
||||
func_name(__VA_ARGS__, 6); \
|
||||
break; \
|
||||
case 7: \
|
||||
func_name(__VA_ARGS__, 7); \
|
||||
break; \
|
||||
case 8: \
|
||||
func_name(__VA_ARGS__, 8); \
|
||||
break; \
|
||||
case 9: \
|
||||
func_name(__VA_ARGS__, 9); \
|
||||
break; \
|
||||
case 10: \
|
||||
func_name(__VA_ARGS__, 10); \
|
||||
break; \
|
||||
case 11: \
|
||||
func_name(__VA_ARGS__, 11); \
|
||||
break; \
|
||||
case 12: \
|
||||
func_name(__VA_ARGS__, 12); \
|
||||
break; \
|
||||
case 13: \
|
||||
func_name(__VA_ARGS__, 13); \
|
||||
break; \
|
||||
case 14: \
|
||||
func_name(__VA_ARGS__, 14); \
|
||||
break; \
|
||||
case 15: \
|
||||
func_name(__VA_ARGS__, 15); \
|
||||
break; \
|
||||
default: \
|
||||
default_case; \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \
|
||||
do { \
|
||||
switch (imm) { \
|
||||
case 0: \
|
||||
func_name(__VA_ARGS__, 0); \
|
||||
break; \
|
||||
case 1: \
|
||||
func_name(__VA_ARGS__, 1); \
|
||||
break; \
|
||||
case 2: \
|
||||
func_name(__VA_ARGS__, 2); \
|
||||
break; \
|
||||
case 3: \
|
||||
func_name(__VA_ARGS__, 3); \
|
||||
break; \
|
||||
case 4: \
|
||||
func_name(__VA_ARGS__, 4); \
|
||||
break; \
|
||||
case 5: \
|
||||
func_name(__VA_ARGS__, 5); \
|
||||
break; \
|
||||
case 6: \
|
||||
func_name(__VA_ARGS__, 6); \
|
||||
break; \
|
||||
case 7: \
|
||||
func_name(__VA_ARGS__, 7); \
|
||||
break; \
|
||||
case 8: \
|
||||
func_name(__VA_ARGS__, 8); \
|
||||
break; \
|
||||
case 9: \
|
||||
func_name(__VA_ARGS__, 9); \
|
||||
break; \
|
||||
case 10: \
|
||||
func_name(__VA_ARGS__, 10); \
|
||||
break; \
|
||||
case 11: \
|
||||
func_name(__VA_ARGS__, 11); \
|
||||
break; \
|
||||
case 12: \
|
||||
func_name(__VA_ARGS__, 12); \
|
||||
break; \
|
||||
case 13: \
|
||||
func_name(__VA_ARGS__, 13); \
|
||||
break; \
|
||||
case 14: \
|
||||
func_name(__VA_ARGS__, 14); \
|
||||
break; \
|
||||
case 15: \
|
||||
func_name(__VA_ARGS__, 15); \
|
||||
break; \
|
||||
case 16: \
|
||||
func_name(__VA_ARGS__, 16); \
|
||||
break; \
|
||||
case 17: \
|
||||
func_name(__VA_ARGS__, 17); \
|
||||
break; \
|
||||
case 18: \
|
||||
func_name(__VA_ARGS__, 18); \
|
||||
break; \
|
||||
case 19: \
|
||||
func_name(__VA_ARGS__, 19); \
|
||||
break; \
|
||||
case 20: \
|
||||
func_name(__VA_ARGS__, 20); \
|
||||
break; \
|
||||
case 21: \
|
||||
func_name(__VA_ARGS__, 21); \
|
||||
break; \
|
||||
case 22: \
|
||||
func_name(__VA_ARGS__, 22); \
|
||||
break; \
|
||||
case 23: \
|
||||
func_name(__VA_ARGS__, 23); \
|
||||
break; \
|
||||
case 24: \
|
||||
func_name(__VA_ARGS__, 24); \
|
||||
break; \
|
||||
case 25: \
|
||||
func_name(__VA_ARGS__, 25); \
|
||||
break; \
|
||||
case 26: \
|
||||
func_name(__VA_ARGS__, 26); \
|
||||
break; \
|
||||
case 27: \
|
||||
func_name(__VA_ARGS__, 27); \
|
||||
break; \
|
||||
case 28: \
|
||||
func_name(__VA_ARGS__, 28); \
|
||||
break; \
|
||||
case 29: \
|
||||
func_name(__VA_ARGS__, 29); \
|
||||
break; \
|
||||
case 30: \
|
||||
func_name(__VA_ARGS__, 30); \
|
||||
break; \
|
||||
case 31: \
|
||||
func_name(__VA_ARGS__, 31); \
|
||||
break; \
|
||||
default: \
|
||||
default_case; \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \
|
||||
do { \
|
||||
switch (imm) { \
|
||||
case 0: \
|
||||
func_name(__VA_ARGS__, 0); \
|
||||
break; \
|
||||
case 1: \
|
||||
func_name(__VA_ARGS__, 1); \
|
||||
break; \
|
||||
case 2: \
|
||||
func_name(__VA_ARGS__, 2); \
|
||||
break; \
|
||||
case 3: \
|
||||
func_name(__VA_ARGS__, 3); \
|
||||
break; \
|
||||
case 4: \
|
||||
func_name(__VA_ARGS__, 4); \
|
||||
break; \
|
||||
case 5: \
|
||||
func_name(__VA_ARGS__, 5); \
|
||||
break; \
|
||||
case 6: \
|
||||
func_name(__VA_ARGS__, 6); \
|
||||
break; \
|
||||
case 7: \
|
||||
func_name(__VA_ARGS__, 7); \
|
||||
break; \
|
||||
case 8: \
|
||||
func_name(__VA_ARGS__, 8); \
|
||||
break; \
|
||||
case 9: \
|
||||
func_name(__VA_ARGS__, 9); \
|
||||
break; \
|
||||
case 10: \
|
||||
func_name(__VA_ARGS__, 10); \
|
||||
break; \
|
||||
case 11: \
|
||||
func_name(__VA_ARGS__, 11); \
|
||||
break; \
|
||||
case 12: \
|
||||
func_name(__VA_ARGS__, 12); \
|
||||
break; \
|
||||
case 13: \
|
||||
func_name(__VA_ARGS__, 13); \
|
||||
break; \
|
||||
case 14: \
|
||||
func_name(__VA_ARGS__, 14); \
|
||||
break; \
|
||||
case 15: \
|
||||
func_name(__VA_ARGS__, 15); \
|
||||
break; \
|
||||
case 16: \
|
||||
func_name(__VA_ARGS__, 16); \
|
||||
break; \
|
||||
case 17: \
|
||||
func_name(__VA_ARGS__, 17); \
|
||||
break; \
|
||||
case 18: \
|
||||
func_name(__VA_ARGS__, 18); \
|
||||
break; \
|
||||
case 19: \
|
||||
func_name(__VA_ARGS__, 19); \
|
||||
break; \
|
||||
case 20: \
|
||||
func_name(__VA_ARGS__, 20); \
|
||||
break; \
|
||||
case 21: \
|
||||
func_name(__VA_ARGS__, 21); \
|
||||
break; \
|
||||
case 22: \
|
||||
func_name(__VA_ARGS__, 22); \
|
||||
break; \
|
||||
case 23: \
|
||||
func_name(__VA_ARGS__, 23); \
|
||||
break; \
|
||||
case 24: \
|
||||
func_name(__VA_ARGS__, 24); \
|
||||
break; \
|
||||
case 25: \
|
||||
func_name(__VA_ARGS__, 25); \
|
||||
break; \
|
||||
case 26: \
|
||||
func_name(__VA_ARGS__, 26); \
|
||||
break; \
|
||||
case 27: \
|
||||
func_name(__VA_ARGS__, 27); \
|
||||
break; \
|
||||
case 28: \
|
||||
func_name(__VA_ARGS__, 28); \
|
||||
break; \
|
||||
case 29: \
|
||||
func_name(__VA_ARGS__, 29); \
|
||||
break; \
|
||||
case 30: \
|
||||
func_name(__VA_ARGS__, 30); \
|
||||
break; \
|
||||
case 31: \
|
||||
func_name(__VA_ARGS__, 31); \
|
||||
break; \
|
||||
case 32: \
|
||||
func_name(__VA_ARGS__, 32); \
|
||||
break; \
|
||||
case 33: \
|
||||
func_name(__VA_ARGS__, 33); \
|
||||
break; \
|
||||
case 34: \
|
||||
func_name(__VA_ARGS__, 34); \
|
||||
break; \
|
||||
case 35: \
|
||||
func_name(__VA_ARGS__, 35); \
|
||||
break; \
|
||||
case 36: \
|
||||
func_name(__VA_ARGS__, 36); \
|
||||
break; \
|
||||
case 37: \
|
||||
func_name(__VA_ARGS__, 37); \
|
||||
break; \
|
||||
case 38: \
|
||||
func_name(__VA_ARGS__, 38); \
|
||||
break; \
|
||||
case 39: \
|
||||
func_name(__VA_ARGS__, 39); \
|
||||
break; \
|
||||
case 40: \
|
||||
func_name(__VA_ARGS__, 40); \
|
||||
break; \
|
||||
case 41: \
|
||||
func_name(__VA_ARGS__, 41); \
|
||||
break; \
|
||||
case 42: \
|
||||
func_name(__VA_ARGS__, 42); \
|
||||
break; \
|
||||
case 43: \
|
||||
func_name(__VA_ARGS__, 43); \
|
||||
break; \
|
||||
case 44: \
|
||||
func_name(__VA_ARGS__, 44); \
|
||||
break; \
|
||||
case 45: \
|
||||
func_name(__VA_ARGS__, 45); \
|
||||
break; \
|
||||
case 46: \
|
||||
func_name(__VA_ARGS__, 46); \
|
||||
break; \
|
||||
case 47: \
|
||||
func_name(__VA_ARGS__, 47); \
|
||||
break; \
|
||||
case 48: \
|
||||
func_name(__VA_ARGS__, 48); \
|
||||
break; \
|
||||
case 49: \
|
||||
func_name(__VA_ARGS__, 49); \
|
||||
break; \
|
||||
case 50: \
|
||||
func_name(__VA_ARGS__, 50); \
|
||||
break; \
|
||||
case 51: \
|
||||
func_name(__VA_ARGS__, 51); \
|
||||
break; \
|
||||
case 52: \
|
||||
func_name(__VA_ARGS__, 52); \
|
||||
break; \
|
||||
case 53: \
|
||||
func_name(__VA_ARGS__, 53); \
|
||||
break; \
|
||||
case 54: \
|
||||
func_name(__VA_ARGS__, 54); \
|
||||
break; \
|
||||
case 55: \
|
||||
func_name(__VA_ARGS__, 55); \
|
||||
break; \
|
||||
case 56: \
|
||||
func_name(__VA_ARGS__, 56); \
|
||||
break; \
|
||||
case 57: \
|
||||
func_name(__VA_ARGS__, 57); \
|
||||
break; \
|
||||
case 58: \
|
||||
func_name(__VA_ARGS__, 58); \
|
||||
break; \
|
||||
case 59: \
|
||||
func_name(__VA_ARGS__, 59); \
|
||||
break; \
|
||||
case 60: \
|
||||
func_name(__VA_ARGS__, 60); \
|
||||
break; \
|
||||
case 61: \
|
||||
func_name(__VA_ARGS__, 61); \
|
||||
break; \
|
||||
case 62: \
|
||||
func_name(__VA_ARGS__, 62); \
|
||||
break; \
|
||||
case 63: \
|
||||
func_name(__VA_ARGS__, 63); \
|
||||
break; \
|
||||
default: \
|
||||
default_case; \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif
|
|
@ -0,0 +1,114 @@
|
|||
/* Detect Clang Version
|
||||
* Created by Evan Nemerson <evan@nemerson.com>
|
||||
*
|
||||
* To the extent possible under law, the author(s) have dedicated all
|
||||
* copyright and related and neighboring rights to this software to
|
||||
* the public domain worldwide. This software is distributed without
|
||||
* any warranty.
|
||||
*
|
||||
* For details, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
* SPDX-License-Identifier: CC0-1.0
|
||||
*/
|
||||
|
||||
/* This file was originally part of SIMDe
|
||||
* (<https://github.com/simd-everywhere/simde>). You're free to do with it as
|
||||
* you please, but I do have a few small requests:
|
||||
*
|
||||
* * If you make improvements, please submit them back to SIMDe
|
||||
* (at <https://github.com/simd-everywhere/simde/issues>) so others can
|
||||
* benefit from them.
|
||||
* * Please keep a link to SIMDe intact so people know where to submit
|
||||
* improvements.
|
||||
* * If you expose it publicly, please change the SIMDE_ prefix to
|
||||
* something specific to your project.
|
||||
*
|
||||
* The version numbers clang exposes (in the ___clang_major__,
|
||||
* __clang_minor__, and __clang_patchlevel__ macros) are unreliable.
|
||||
* Vendors such as Apple will define these values to their version
|
||||
* numbers; for example, "Apple Clang 4.0" is really clang 3.1, but
|
||||
* __clang_major__ and __clang_minor__ are defined to 4 and 0
|
||||
* respectively, instead of 3 and 1.
|
||||
*
|
||||
* The solution is *usually* to use clang's feature detection macros
|
||||
* (<https://clang.llvm.org/docs/LanguageExtensions.html#feature-checking-macros>)
|
||||
* to determine if the feature you're interested in is available. This
|
||||
* generally works well, and it should probably be the first thing you
|
||||
* try. Unfortunately, it's not possible to check for everything. In
|
||||
* particular, compiler bugs.
|
||||
*
|
||||
* This file just uses the feature checking macros to detect features
|
||||
* added in specific versions of clang to identify which version of
|
||||
* clang the compiler is based on.
|
||||
*
|
||||
* Right now it only goes back to 3.6, but I'm happy to accept patches
|
||||
* to go back further. And, of course, newer versions are welcome if
|
||||
* they're not already present, and if you find a way to detect a point
|
||||
* release that would be great, too!
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_DETECT_CLANG_H)
|
||||
#define SIMDE_DETECT_CLANG_H 1
|
||||
|
||||
/* Attempt to detect the upstream clang version number. I usually only
|
||||
* worry about major version numbers (at least for 4.0+), but if you
|
||||
* need more resolution I'm happy to accept patches that are able to
|
||||
* detect minor versions as well. That said, you'll probably have a
|
||||
* hard time with detection since AFAIK most minor releases don't add
|
||||
* anything we can detect. */
|
||||
|
||||
#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION)
|
||||
#if __has_warning("-Wformat-insufficient-args")
|
||||
#define SIMDE_DETECT_CLANG_VERSION 120000
|
||||
#elif __has_warning("-Wimplicit-const-int-float-conversion")
|
||||
#define SIMDE_DETECT_CLANG_VERSION 110000
|
||||
#elif __has_warning("-Wmisleading-indentation")
|
||||
#define SIMDE_DETECT_CLANG_VERSION 100000
|
||||
#elif defined(__FILE_NAME__)
|
||||
#define SIMDE_DETECT_CLANG_VERSION 90000
|
||||
#elif __has_warning("-Wextra-semi-stmt") || \
|
||||
__has_builtin(__builtin_rotateleft32)
|
||||
#define SIMDE_DETECT_CLANG_VERSION 80000
|
||||
#elif __has_warning("-Wc++98-compat-extra-semi")
|
||||
#define SIMDE_DETECT_CLANG_VERSION 70000
|
||||
#elif __has_warning("-Wpragma-pack")
|
||||
#define SIMDE_DETECT_CLANG_VERSION 60000
|
||||
#elif __has_warning("-Wbitfield-enum-conversion")
|
||||
#define SIMDE_DETECT_CLANG_VERSION 50000
|
||||
#elif __has_attribute(diagnose_if)
|
||||
#define SIMDE_DETECT_CLANG_VERSION 40000
|
||||
#elif __has_warning("-Wcast-calling-convention")
|
||||
#define SIMDE_DETECT_CLANG_VERSION 30900
|
||||
#elif __has_warning("-WCL4")
|
||||
#define SIMDE_DETECT_CLANG_VERSION 30800
|
||||
#elif __has_warning("-WIndependentClass-attribute")
|
||||
#define SIMDE_DETECT_CLANG_VERSION 30700
|
||||
#elif __has_warning("-Wambiguous-ellipsis")
|
||||
#define SIMDE_DETECT_CLANG_VERSION 30600
|
||||
#else
|
||||
#define SIMDE_DETECT_CLANG_VERSION 1
|
||||
#endif
|
||||
#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */
|
||||
|
||||
/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty
|
||||
* straightforward; it returns true if the compiler is a derivative
|
||||
* of clang >= the specified version.
|
||||
*
|
||||
* Since this file is often (primarily?) useful for working around bugs
|
||||
* it is also helpful to have a macro which returns true if only if the
|
||||
* compiler is a version of clang *older* than the specified version to
|
||||
* make it a bit easier to ifdef regions to add code for older versions,
|
||||
* such as pragmas to disable a specific warning. */
|
||||
|
||||
#if defined(SIMDE_DETECT_CLANG_VERSION)
|
||||
#define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) \
|
||||
(SIMDE_DETECT_CLANG_VERSION >= \
|
||||
((major * 10000) + (minor * 1000) + (revision)))
|
||||
#define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) \
|
||||
(SIMDE_DETECT_CLANG_VERSION < \
|
||||
((major * 10000) + (minor * 1000) + (revision)))
|
||||
#else
|
||||
#define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0)
|
||||
#define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (1)
|
||||
#endif
|
||||
|
||||
#endif /* !defined(SIMDE_DETECT_CLANG_H) */
|
|
@ -45,8 +45,10 @@
|
|||
*/
|
||||
|
||||
#if !defined(SIMDE_DIAGNOSTIC_H)
|
||||
#define SIMDE_DIAGNOSTIC_H
|
||||
|
||||
#include "hedley.h"
|
||||
#include "simde-detect-clang.h"
|
||||
|
||||
/* This is only to help us implement functions like _mm_undefined_ps. */
|
||||
#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
|
||||
|
@ -119,6 +121,9 @@
|
|||
#define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_
|
||||
#endif
|
||||
|
||||
/* MSVC emits a diagnostic when we call a function (like
|
||||
* simde_mm_set_epi32) while initializing a struct. We currently do
|
||||
* this a *lot* in the tests. */
|
||||
#if defined(HEDLEY_MSVC_VERSION)
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \
|
||||
__pragma(warning(disable : 4204))
|
||||
|
@ -183,6 +188,32 @@
|
|||
#define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_
|
||||
#endif
|
||||
|
||||
/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro
|
||||
* before we can access certain SIMD intrinsics, but this diagnostic
|
||||
* warns about it being a reserved name. It is a reserved name, but
|
||||
* it's reserved for the compiler and we are using it to convey
|
||||
* information to the compiler.
|
||||
*
|
||||
* This is also used when enabling native aliases since we don't get to
|
||||
* choose the macro names. */
|
||||
#if HEDLEY_HAS_WARNING("-Wdouble-promotion")
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ \
|
||||
_Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"")
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_
|
||||
#endif
|
||||
|
||||
/* clang 3.8 warns about the packed attribute being unnecessary when
|
||||
* used in the _mm_loadu_* functions. That *may* be true for version
|
||||
* 3.8, but for later versions it is crucial in order to make unaligned
|
||||
* access safe. */
|
||||
#if HEDLEY_HAS_WARNING("-Wpacked")
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ \
|
||||
_Pragma("clang diagnostic ignored \"-Wpacked\"")
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_PACKED_
|
||||
#endif
|
||||
|
||||
/* Triggered when assigning a float to a double implicitly. We use
|
||||
* explicit casts in SIMDe, this is only used in the test suite. */
|
||||
#if HEDLEY_HAS_WARNING("-Wdouble-promotion")
|
||||
|
@ -194,7 +225,7 @@
|
|||
|
||||
/* Several compilers treat conformant array parameters as VLAs. We
|
||||
* test to make sure we're in C mode (C++ doesn't support CAPs), and
|
||||
* that the version of the standard supports CAPs. We also blacklist
|
||||
* that the version of the standard supports CAPs. We also reject
|
||||
* some buggy compilers like MSVC (the logic is in Hedley if you want
|
||||
* to take a look), but with certain warnings enabled some compilers
|
||||
* still like to emit a diagnostic. */
|
||||
|
@ -221,6 +252,9 @@
|
|||
#elif HEDLEY_GCC_VERSION_CHECK(3, 4, 0)
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ \
|
||||
_Pragma("GCC diagnostic ignored \"-Wunused-function\"")
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0) /* Likely goes back further */
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ \
|
||||
__pragma(warning(disable : 4505))
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_
|
||||
#endif
|
||||
|
@ -232,13 +266,63 @@
|
|||
#define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_
|
||||
#endif
|
||||
|
||||
/* https://github.com/nemequ/simde/issues/277 */
|
||||
#if HEDLEY_HAS_WARNING("-Wpadded")
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ \
|
||||
_Pragma("clang diagnostic ignored \"-Wpadded\"")
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0) /* Likely goes back further */
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable : 4324))
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_PADDED_
|
||||
#endif
|
||||
|
||||
#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant")
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ \
|
||||
_Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"")
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_
|
||||
#endif
|
||||
|
||||
#if HEDLEY_HAS_WARNING("-Wold-style-cast")
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ \
|
||||
_Pragma("clang diagnostic ignored \"-Wold-style-cast\"")
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_
|
||||
#endif
|
||||
|
||||
#if HEDLEY_HAS_WARNING("-Wcast-function-type") || \
|
||||
HEDLEY_GCC_VERSION_CHECK(8, 0, 0)
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ \
|
||||
_Pragma("GCC diagnostic ignored \"-Wcast-function-type\"")
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_
|
||||
#endif
|
||||
|
||||
/* clang will emit this warning when we use C99 extensions whan not in
|
||||
* C99 mode, even though it does support this. In such cases we check
|
||||
* the compiler and version first, so we know it's not a problem. */
|
||||
#if HEDLEY_HAS_WARNING("-Wc99-extensions")
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ \
|
||||
_Pragma("clang diagnostic ignored \"-Wc99-extensions\"")
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_
|
||||
#endif
|
||||
|
||||
/* https://github.com/simd-everywhere/simde/issues/277 */
|
||||
#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4, 6, 0) && \
|
||||
!HEDLEY_GCC_VERSION_CHECK(6, 0, 0) && defined(__cplusplus)
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE \
|
||||
!HEDLEY_GCC_VERSION_CHECK(6, 4, 0) && defined(__cplusplus)
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \
|
||||
_Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"")
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_
|
||||
#endif
|
||||
|
||||
/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS
|
||||
* to silence, but you have to do that before including anything and
|
||||
* that would require reordering includes. */
|
||||
#if defined(_MSC_VER)
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable : 4996))
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_
|
||||
#endif
|
||||
|
||||
/* Some compilers, such as clang, may use `long long` for 64-bit
|
||||
|
@ -246,13 +330,104 @@
|
|||
* -Wc++98-compat-pedantic which says 'long long' is incompatible with
|
||||
* C++98. */
|
||||
#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic")
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC \
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \
|
||||
_Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"")
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_
|
||||
#endif
|
||||
|
||||
/* Some problem as above */
|
||||
#if HEDLEY_HAS_WARNING("-Wc++11-long-long")
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \
|
||||
_Pragma("clang diagnostic ignored \"-Wc++11-long-long\"")
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_
|
||||
#endif
|
||||
|
||||
/* emscripten emits this whenever stdin/stdout/stderr is used in a
|
||||
* macro. */
|
||||
#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion")
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ \
|
||||
_Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"")
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_
|
||||
#endif
|
||||
|
||||
/* Clang uses C11 generic selections to implement some AltiVec
|
||||
* functions, which triggers this diagnostic when not compiling
|
||||
* in C11 mode */
|
||||
#if HEDLEY_HAS_WARNING("-Wc11-extensions")
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ \
|
||||
_Pragma("clang diagnostic ignored \"-Wc11-extensions\"")
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_
|
||||
#endif
|
||||
|
||||
/* Clang sometimes triggers this warning in macros in the AltiVec and
|
||||
* NEON headers, or due to missing functions. */
|
||||
#if HEDLEY_HAS_WARNING("-Wvector-conversion")
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ \
|
||||
_Pragma("clang diagnostic ignored \"-Wvector-conversion\"")
|
||||
/* For NEON, the situation with -Wvector-conversion in clang < 10 is
|
||||
* bad enough that we just disable the warning altogether. */
|
||||
#if defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \
|
||||
SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_
|
||||
#endif
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_
|
||||
#endif
|
||||
#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_)
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_
|
||||
#endif
|
||||
|
||||
/* SLEEF triggers this a *lot* in their headers */
|
||||
#if HEDLEY_HAS_WARNING("-Wignored-qualifiers")
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ \
|
||||
_Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"")
|
||||
#elif HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ \
|
||||
_Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"")
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_
|
||||
#endif
|
||||
|
||||
/* GCC emits this under some circumstances when using __int128 */
|
||||
#if HEDLEY_GCC_VERSION_CHECK(4, 8, 0)
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ \
|
||||
_Pragma("GCC diagnostic ignored \"-Wpedantic\"")
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_
|
||||
#endif
|
||||
|
||||
/* MSVC doesn't like (__assume(0), code) and will warn about code being
|
||||
* unreachable, but we want it there because not all compilers
|
||||
* understand the unreachable macro and will complain if it is missing.
|
||||
* I'm planning on adding a new macro to Hedley to handle this a bit
|
||||
* more elegantly, but until then... */
|
||||
#if defined(HEDLEY_MSVC_VERSION)
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable : 4702))
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_
|
||||
#endif
|
||||
|
||||
/* This is a false positive from GCC in a few places. */
|
||||
#if HEDLEY_GCC_VERSION_CHECK(4, 7, 0)
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ \
|
||||
_Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"")
|
||||
#else
|
||||
#define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ENABLE_NATIVE_ALIASES)
|
||||
#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \
|
||||
SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_
|
||||
#else
|
||||
#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_
|
||||
#endif
|
||||
|
||||
#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \
|
||||
SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \
|
||||
SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \
|
||||
SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \
|
||||
|
@ -264,7 +439,9 @@
|
|||
SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \
|
||||
SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ \
|
||||
SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \
|
||||
SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC \
|
||||
SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE
|
||||
SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \
|
||||
SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \
|
||||
SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \
|
||||
SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_
|
||||
|
||||
#endif
|
||||
#endif /* !defined(SIMDE_DIAGNOSTIC_H) */
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#define SIMDE_FEATURES_H
|
||||
|
||||
#include "simde-arch.h"
|
||||
#include "simde-diagnostic.h"
|
||||
|
||||
#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && \
|
||||
!defined(SIMDE_NO_NATIVE)
|
||||
|
@ -43,6 +44,28 @@
|
|||
#define SIMDE_X86_AVX512F_NATIVE
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && \
|
||||
!defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && \
|
||||
!defined(SIMDE_NO_NATIVE)
|
||||
#if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT)
|
||||
#define SIMDE_X86_AVX512VP2INTERSECT_NATIVE
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && \
|
||||
!defined(SIMDE_X86_AVX512F_NATIVE)
|
||||
#define SIMDE_X86_AVX512F_NATIVE
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && \
|
||||
!defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
|
||||
#if defined(SIMDE_ARCH_X86_AVX512VBMI)
|
||||
#define SIMDE_X86_AVX512VBMI_NATIVE
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)
|
||||
#define SIMDE_X86_AVX512F_NATIVE
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_X86_AVX512CD_NATIVE) && \
|
||||
!defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
|
||||
#if defined(SIMDE_ARCH_X86_AVX512CD)
|
||||
|
@ -194,6 +217,20 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_X86_PCLMUL_NATIVE) && \
|
||||
!defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
|
||||
#if defined(SIMDE_ARCH_X86_PCLMUL)
|
||||
#define SIMDE_X86_PCLMUL_NATIVE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && \
|
||||
!defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
|
||||
#if defined(SIMDE_ARCH_X86_VPCLMULQDQ)
|
||||
#define SIMDE_X86_VPCLMULQDQ_NATIVE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && \
|
||||
!defined(SIMDE_NO_NATIVE)
|
||||
#if defined(__INTEL_COMPILER)
|
||||
|
@ -206,8 +243,7 @@
|
|||
#pragma warning(disable : 4799)
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || \
|
||||
defined(SIMDE_X86_SVML_NATIVE)
|
||||
#if defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE)
|
||||
#include <immintrin.h>
|
||||
#elif defined(SIMDE_X86_SSE4_2_NATIVE)
|
||||
#include <nmmintrin.h>
|
||||
|
@ -243,7 +279,8 @@
|
|||
|
||||
#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \
|
||||
!defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
|
||||
#if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(80)
|
||||
#if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(80) && \
|
||||
(__ARM_NEON_FP & 0x02)
|
||||
#define SIMDE_ARM_NEON_A32V8_NATIVE
|
||||
#endif
|
||||
#endif
|
||||
|
@ -262,6 +299,14 @@
|
|||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && \
|
||||
!defined(SIMDE_NO_NATIVE)
|
||||
#if defined(SIMDE_ARCH_ARM_SVE)
|
||||
#define SIMDE_ARM_SVE_NATIVE
|
||||
#include <arm_sve.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_WASM_SIMD128_NATIVE) && \
|
||||
!defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
|
||||
#if defined(SIMDE_ARCH_WASM_SIMD128)
|
||||
|
@ -270,7 +315,10 @@
|
|||
#endif
|
||||
#if defined(SIMDE_WASM_SIMD128_NATIVE)
|
||||
#if !defined(__wasm_unimplemented_simd128__)
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_
|
||||
#define __wasm_unimplemented_simd128__
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
#endif
|
||||
#include <wasm_simd128.h>
|
||||
#endif
|
||||
|
@ -326,15 +374,28 @@
|
|||
#define SIMDE_POWER_ALTIVEC_P5_NATIVE
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_POWER_ALTIVEC_P5_NATIVE)
|
||||
/* stdbool.h conflicts with the bool in altivec.h */
|
||||
#if defined(bool) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF_BOOL_)
|
||||
|
||||
#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
|
||||
/* AltiVec conflicts with lots of stuff. The bool keyword conflicts
|
||||
* with the bool keyword in C++ and the bool macro in C99+ (defined
|
||||
* in stdbool.h). The vector keyword conflicts with std::vector in
|
||||
* C++ if you are `using std;`.
|
||||
*
|
||||
* Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel`
|
||||
* instead, but altivec.h will unconditionally define
|
||||
* `vector`/`bool`/`pixel` so we need to work around that.
|
||||
*
|
||||
* Unfortunately this means that if your code uses AltiVec directly
|
||||
* it may break. If this is the case you'll want to define
|
||||
* `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even
|
||||
* better, port your code to use the double-underscore versions. */
|
||||
#if defined(bool)
|
||||
#undef bool
|
||||
#endif
|
||||
|
||||
#include <altivec.h>
|
||||
/* GCC allows you to undefine these macros to prevent conflicts with
|
||||
* standard types as they become context-sensitive keywords. */
|
||||
#if defined(__cplusplus)
|
||||
|
||||
#if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF)
|
||||
#if defined(vector)
|
||||
#undef vector
|
||||
#endif
|
||||
|
@ -344,14 +405,146 @@
|
|||
#if defined(bool)
|
||||
#undef bool
|
||||
#endif
|
||||
#define SIMDE_POWER_ALTIVEC_VECTOR(T) vector T
|
||||
#define SIMDE_POWER_ALTIVEC_PIXEL pixel
|
||||
#define SIMDE_POWER_ALTIVEC_BOOL bool
|
||||
#else
|
||||
#endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */
|
||||
|
||||
/* Use these intsead of vector/pixel/bool in SIMDe. */
|
||||
#define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T
|
||||
#define SIMDE_POWER_ALTIVEC_PIXEL __pixel
|
||||
#define SIMDE_POWER_ALTIVEC_BOOL __bool
|
||||
#endif /* defined(__cplusplus) */
|
||||
|
||||
/* Re-define bool if we're using stdbool.h */
|
||||
#if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && \
|
||||
!defined(SIMDE_POWER_ALTIVEC_NO_UNDEF)
|
||||
#define bool _Bool
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && \
|
||||
!defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && \
|
||||
!defined(SIMDE_NO_NATIVE)
|
||||
#if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI)
|
||||
#define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1
|
||||
#endif
|
||||
#endif
|
||||
#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
#include <loongson-mmiintrin.h>
|
||||
#endif
|
||||
|
||||
/* This is used to determine whether or not to fall back on a vector
|
||||
* function in an earlier ISA extensions, as well as whether
|
||||
* we expected any attempts at vectorization to be fruitful or if we
|
||||
* expect to always be running serial code. */
|
||||
|
||||
#if !defined(SIMDE_NATURAL_VECTOR_SIZE)
|
||||
#if defined(SIMDE_X86_AVX512F_NATIVE)
|
||||
#define SIMDE_NATURAL_VECTOR_SIZE (512)
|
||||
#elif defined(SIMDE_X86_AVX_NATIVE)
|
||||
#define SIMDE_NATURAL_VECTOR_SIZE (256)
|
||||
#elif defined(SIMDE_X86_SSE_NATIVE) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \
|
||||
defined(SIMDE_WASM_SIMD128_NATIVE) || \
|
||||
defined(SIMDE_POWER_ALTIVEC_P5_NATIVE)
|
||||
#define SIMDE_NATURAL_VECTOR_SIZE (128)
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_NATURAL_VECTOR_SIZE)
|
||||
#define SIMDE_NATURAL_VECTOR_SIZE (0)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) \
|
||||
((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x)))
|
||||
#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) \
|
||||
((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x)))
|
||||
|
||||
/* Native aliases */
|
||||
#if defined(SIMDE_ENABLE_NATIVE_ALIASES)
|
||||
#if !defined(SIMDE_X86_MMX_NATIVE)
|
||||
#define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_SSE_NATIVE)
|
||||
#define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_SSE2_NATIVE)
|
||||
#define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_SSE3_NATIVE)
|
||||
#define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_SSSE3_NATIVE)
|
||||
#define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_SSE4_1_NATIVE)
|
||||
#define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_SSE4_2_NATIVE)
|
||||
#define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_AVX_NATIVE)
|
||||
#define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_AVX2_NATIVE)
|
||||
#define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_FMA_NATIVE)
|
||||
#define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_AVX512F_NATIVE)
|
||||
#define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_AVX512VL_NATIVE)
|
||||
#define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_AVX512BW_NATIVE)
|
||||
#define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_AVX512DQ_NATIVE)
|
||||
#define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_AVX512CD_NATIVE)
|
||||
#define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_GFNI_NATIVE)
|
||||
#define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_PCLMUL_NATIVE)
|
||||
#define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE)
|
||||
#define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE)
|
||||
#define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Are floating point values stored using IEEE 754? Knowing
|
||||
* this at during preprocessing is a bit tricky, mostly because what
|
||||
* we're curious about is how values are stored and not whether the
|
||||
* implementation is fully conformant in terms of rounding, NaN
|
||||
* handling, etc.
|
||||
*
|
||||
* For example, if you use -ffast-math or -Ofast on
|
||||
* GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754
|
||||
* support is not advertised (by defining __STDC_IEC_559__).
|
||||
*
|
||||
* However, what we care about is whether it is safe to assume that
|
||||
* floating point values are stored in IEEE 754 format, in which case
|
||||
* we can provide faster implementations of some functions.
|
||||
*
|
||||
* Luckily every vaugely modern architecture I'm aware of uses IEEE 754-
|
||||
* so we just assume IEEE 754 for now. There is a test which verifies
|
||||
* this, if that test fails sowewhere please let us know and we'll add
|
||||
* an exception for that platform. Meanwhile, you can define
|
||||
* SIMDE_NO_IEEE754_STORAGE. */
|
||||
#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE)
|
||||
#define SIMDE_IEEE754_STORAGE
|
||||
#endif
|
||||
|
||||
#endif /* !defined(SIMDE_FEATURES_H) */
|
||||
|
|
|
@ -34,6 +34,58 @@
|
|||
#include "hedley.h"
|
||||
#include "simde-features.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
|
||||
/* SLEEF support
|
||||
* https://sleef.org/
|
||||
*
|
||||
* If you include <sleef.h> prior to including SIMDe, SIMDe will use
|
||||
* SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to
|
||||
* including SIMDe to force the issue.
|
||||
*
|
||||
* Note that SLEEF does requires linking to libsleef.
|
||||
*
|
||||
* By default, SIMDe will use the 1 ULP functions, but if you use
|
||||
* SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is
|
||||
* only the case for the simde_math_* functions; for code in other
|
||||
* SIMDe headers which calls SLEEF directly we may use functions with
|
||||
* greater error if the API we're implementing is less precise (for
|
||||
* example, SVML guarantees 4 ULP, so we will generally use the 3.5
|
||||
* ULP functions from SLEEF). */
|
||||
#if !defined(SIMDE_MATH_SLEEF_DISABLE)
|
||||
#if defined(__SLEEF_H__)
|
||||
#define SIMDE_MATH_SLEEF_ENABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__)
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_
|
||||
#include <sleef.h>
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__)
|
||||
#if defined(SLEEF_VERSION_MAJOR)
|
||||
#define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) \
|
||||
(HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, \
|
||||
SLEEF_VERSION_PATCHLEVEL) >= \
|
||||
HEDLEY_VERSION_ENCODE(major, minor, patch))
|
||||
#else
|
||||
#define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) \
|
||||
(HEDLEY_VERSION_ENCODE(3, 0, 0) >= \
|
||||
HEDLEY_VERSION_ENCODE(major, minor, patch))
|
||||
#endif
|
||||
#else
|
||||
#define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0)
|
||||
#endif
|
||||
|
||||
#if defined(__has_builtin)
|
||||
#define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func)
|
||||
#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
|
||||
|
@ -82,11 +134,35 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(__cplusplus)
|
||||
/* If this is a problem we *might* be able to avoid including
|
||||
* <complex.h> on some compilers (gcc, clang, and others which
|
||||
* implement builtins like __builtin_cexpf). If you don't have
|
||||
* a <complex.h> please file an issue and we'll take a look. */
|
||||
/* Try to avoid including <complex> since it pulls in a *lot* of code. */
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_creal) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(4, 7, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_
|
||||
typedef __complex__ float simde_cfloat32;
|
||||
typedef __complex__ double simde_cfloat64;
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
#define SIMDE_MATH_CMPLX(x, y) \
|
||||
(HEDLEY_STATIC_CAST(double, x) + \
|
||||
HEDLEY_STATIC_CAST(double, y) * (__extension__ 1.0j))
|
||||
#define SIMDE_MATH_CMPLXF(x, y) \
|
||||
(HEDLEY_STATIC_CAST(float, x) + \
|
||||
HEDLEY_STATIC_CAST(float, y) * (__extension__ 1.0fj))
|
||||
|
||||
#if !defined(simde_math_creal)
|
||||
#define simde_math_crealf(z) __builtin_crealf(z)
|
||||
#endif
|
||||
#if !defined(simde_math_crealf)
|
||||
#define simde_math_creal(z) __builtin_creal(z)
|
||||
#endif
|
||||
#if !defined(simde_math_cimag)
|
||||
#define simde_math_cimagf(z) __builtin_cimagf(z)
|
||||
#endif
|
||||
#if !defined(simde_math_cimagf)
|
||||
#define simde_math_cimag(z) __builtin_cimag(z)
|
||||
#endif
|
||||
#elif !defined(__cplusplus)
|
||||
#include <complex.h>
|
||||
|
||||
#if !defined(HEDLEY_MSVC_VERSION)
|
||||
|
@ -96,20 +172,14 @@ typedef double _Complex simde_cfloat64;
|
|||
typedef _Fcomplex simde_cfloat32;
|
||||
typedef _Dcomplex simde_cfloat64;
|
||||
#endif
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_complex) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(4, 7, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
|
||||
#define SIMDE_MATH_CMPLX(x, y) __builtin_complex((double)(x), (double)(y))
|
||||
#define SIMDE_MATH_CMPLXF(x, y) __builtin_complex((float)(x), (float)(y))
|
||||
#elif defined(HEDLEY_MSVC_VERSION)
|
||||
|
||||
#if defined(HEDLEY_MSVC_VERSION)
|
||||
#define SIMDE_MATH_CMPLX(x, y) ((simde_cfloat64){(x), (y)})
|
||||
#define SIMDE_MATH_CMPLXF(x, y) ((simde_cfloat32){(x), (y)})
|
||||
#elif defined(CMPLX) && defined(CMPLXF)
|
||||
#define SIMDE_MATH_CMPLX(x, y) CMPLX(x, y)
|
||||
#define SIMDE_MATH_CMPLXF(x, y) CMPLXF(x, y)
|
||||
#else
|
||||
/* CMPLX / CMPLXF are in C99, but these seem to be necessary in
|
||||
* some compilers that aren't even MSVC. */
|
||||
#define SIMDE_MATH_CMPLX(x, y) \
|
||||
(HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * I)
|
||||
#define SIMDE_MATH_CMPLXF(x, y) \
|
||||
|
@ -117,38 +187,18 @@ typedef _Dcomplex simde_cfloat64;
|
|||
#endif
|
||||
|
||||
#if !defined(simde_math_creal)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(creal)
|
||||
#define simde_math_creal(z) __builtin_creal(z)
|
||||
#else
|
||||
#define simde_math_creal(z) creal(z)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_crealf)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(crealf)
|
||||
#define simde_math_crealf(z) __builtin_crealf(z)
|
||||
#else
|
||||
#define simde_math_crealf(z) crealf(z)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_cimag)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(cimag)
|
||||
#define simde_math_cimag(z) __builtin_cimag(z)
|
||||
#else
|
||||
#define simde_math_cimag(z) cimag(z)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_cimagf)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(cimagf)
|
||||
#define simde_math_cimagf(z) __builtin_cimagf(z)
|
||||
#else
|
||||
#define simde_math_cimagf(z) cimagf(z)
|
||||
#endif
|
||||
#endif
|
||||
#else
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
#if defined(HEDLEY_MSVC_VERSION)
|
||||
#pragma warning(disable : 4530)
|
||||
|
@ -240,6 +290,26 @@ typedef std::complex<double> simde_cfloat64;
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_MATH_PI_OVER_180)
|
||||
#define SIMDE_MATH_PI_OVER_180 \
|
||||
0.0174532925199432957692369076848861271344287188854172545609719144
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_MATH_PI_OVER_180F)
|
||||
#define SIMDE_MATH_PI_OVER_180F \
|
||||
0.0174532925199432957692369076848861271344287188854172545609719144f
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_MATH_180_OVER_PI)
|
||||
#define SIMDE_MATH_180_OVER_PI \
|
||||
57.295779513082320876798154814105170332405472466564321549160243861
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_MATH_180_OVER_PIF)
|
||||
#define SIMDE_MATH_180_OVER_PIF \
|
||||
57.295779513082320876798154814105170332405472466564321549160243861f
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_MATH_FLT_MIN)
|
||||
#if defined(FLT_MIN)
|
||||
#define SIMDE_MATH_FLT_MIN FLT_MIN
|
||||
|
@ -341,6 +411,36 @@ typedef std::complex<double> simde_cfloat64;
|
|||
#endif
|
||||
#endif
|
||||
|
||||
/*** Manipulation functions ***/
|
||||
|
||||
#if !defined(simde_math_nextafter)
|
||||
#if (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && \
|
||||
!defined(HEDLEY_IBM_VERSION)) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(3, 4, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
|
||||
#define simde_math_nextafter(x, y) __builtin_nextafter(x, y)
|
||||
#elif defined(SIMDE_MATH_HAVE_CMATH)
|
||||
#define simde_math_nextafter(x, y) std::nextafter(x, y)
|
||||
#elif defined(SIMDE_MATH_HAVE_MATH_H)
|
||||
#define simde_math_nextafter(x, y) nextafter(x, y)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_nextafterf)
|
||||
#if (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && \
|
||||
!defined(HEDLEY_IBM_VERSION)) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(3, 4, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
|
||||
#define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y)
|
||||
#elif defined(SIMDE_MATH_HAVE_CMATH)
|
||||
#define simde_math_nextafterf(x, y) std::nextafter(x, y)
|
||||
#elif defined(SIMDE_MATH_HAVE_MATH_H)
|
||||
#define simde_math_nextafterf(x, y) nextafterf(x, y)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*** Functions from C99 ***/
|
||||
|
||||
#if !defined(simde_math_abs)
|
||||
|
@ -353,13 +453,13 @@ typedef std::complex<double> simde_cfloat64;
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_absf)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(absf)
|
||||
#define simde_math_absf(v) __builtin_absf(v)
|
||||
#if !defined(simde_math_fabsf)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(fabsf)
|
||||
#define simde_math_fabsf(v) __builtin_fabsf(v)
|
||||
#elif defined(SIMDE_MATH_HAVE_CMATH)
|
||||
#define simde_math_absf(v) std::abs(v)
|
||||
#define simde_math_fabsf(v) std::abs(v)
|
||||
#elif defined(SIMDE_MATH_HAVE_MATH_H)
|
||||
#define simde_math_absf(v) absf(v)
|
||||
#define simde_math_fabsf(v) fabsf(v)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -574,7 +674,13 @@ typedef std::complex<double> simde_cfloat64;
|
|||
#endif
|
||||
|
||||
#if !defined(simde_math_cosf)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(cosf)
|
||||
#if defined(SIMDE_MATH_SLEEF_ENABLE)
|
||||
#if SIMDE_ACCURACY_PREFERENCE < 1
|
||||
#define simde_math_cosf(v) Sleef_cosf_u35(v)
|
||||
#else
|
||||
#define simde_math_cosf(v) Sleef_cosf_u10(v)
|
||||
#endif
|
||||
#elif SIMDE_MATH_BUILTIN_LIBM(cosf)
|
||||
#define simde_math_cosf(v) __builtin_cosf(v)
|
||||
#elif defined(SIMDE_MATH_HAVE_CMATH)
|
||||
#define simde_math_cosf(v) std::cos(v)
|
||||
|
@ -755,6 +861,46 @@ typedef std::complex<double> simde_cfloat64;
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_fma)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(fma)
|
||||
#define simde_math_fma(x, y, z) __builtin_fma(x, y, z)
|
||||
#elif defined(SIMDE_MATH_HAVE_CMATH)
|
||||
#define simde_math_fma(x, y, z) std::fma(x, y, z)
|
||||
#elif defined(SIMDE_MATH_HAVE_MATH_H)
|
||||
#define simde_math_fma(x, y, z) fma(x, y, z)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_fmaf)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(fmaf)
|
||||
#define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z)
|
||||
#elif defined(SIMDE_MATH_HAVE_CMATH)
|
||||
#define simde_math_fmaf(x, y, z) std::fma(x, y, z)
|
||||
#elif defined(SIMDE_MATH_HAVE_MATH_H)
|
||||
#define simde_math_fmaf(x, y, z) fmaf(x, y, z)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_fmax)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(fmax)
|
||||
#define simde_math_fmax(x, y, z) __builtin_fmax(x, y, z)
|
||||
#elif defined(SIMDE_MATH_HAVE_CMATH)
|
||||
#define simde_math_fmax(x, y, z) std::fmax(x, y, z)
|
||||
#elif defined(SIMDE_MATH_HAVE_MATH_H)
|
||||
#define simde_math_fmax(x, y, z) fmax(x, y, z)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_fmaxf)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(fmaxf)
|
||||
#define simde_math_fmaxf(x, y, z) __builtin_fmaxf(x, y, z)
|
||||
#elif defined(SIMDE_MATH_HAVE_CMATH)
|
||||
#define simde_math_fmaxf(x, y, z) std::fmax(x, y, z)
|
||||
#elif defined(SIMDE_MATH_HAVE_MATH_H)
|
||||
#define simde_math_fmaxf(x, y, z) fmaxf(x, y, z)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_hypot)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(hypot)
|
||||
#define simde_math_hypot(y, x) __builtin_hypot(y, x)
|
||||
|
@ -875,6 +1021,26 @@ typedef std::complex<double> simde_cfloat64;
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_modf)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(modf)
|
||||
#define simde_math_modf(x, iptr) __builtin_modf(x, iptr)
|
||||
#elif defined(SIMDE_MATH_HAVE_CMATH)
|
||||
#define simde_math_modf(x, iptr) std::modf(x, iptr)
|
||||
#elif defined(SIMDE_MATH_HAVE_MATH_H)
|
||||
#define simde_math_modf(x, iptr) modf(x, iptr)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_modff)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(modff)
|
||||
#define simde_math_modff(x, iptr) __builtin_modff(x, iptr)
|
||||
#elif defined(SIMDE_MATH_HAVE_CMATH)
|
||||
#define simde_math_modff(x, iptr) std::modf(x, iptr)
|
||||
#elif defined(SIMDE_MATH_HAVE_MATH_H)
|
||||
#define simde_math_modff(x, iptr) modff(x, iptr)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_nearbyint)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(nearbyint)
|
||||
#define simde_math_nearbyint(v) __builtin_nearbyint(v)
|
||||
|
@ -955,6 +1121,44 @@ typedef std::complex<double> simde_cfloat64;
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_roundeven)
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_roundeven) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(10, 0, 0)
|
||||
#define simde_math_roundeven(v) __builtin_roundeven(v)
|
||||
#elif defined(simde_math_round) && defined(simde_math_fabs)
|
||||
static HEDLEY_INLINE double simde_math_roundeven(double v)
|
||||
{
|
||||
double rounded = simde_math_round(v);
|
||||
double diff = rounded - v;
|
||||
if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) &&
|
||||
(HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) {
|
||||
rounded = v - diff;
|
||||
}
|
||||
return rounded;
|
||||
}
|
||||
#define simde_math_roundeven simde_math_roundeven
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_roundevenf)
|
||||
#if HEDLEY_HAS_BUILTIN(__builtin_roundevenf) || \
|
||||
HEDLEY_GCC_VERSION_CHECK(10, 0, 0)
|
||||
#define simde_math_roundevenf(v) __builtin_roundevenf(v)
|
||||
#elif defined(simde_math_roundf) && defined(simde_math_fabsf)
|
||||
static HEDLEY_INLINE float simde_math_roundevenf(float v)
|
||||
{
|
||||
float rounded = simde_math_roundf(v);
|
||||
float diff = rounded - v;
|
||||
if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) &&
|
||||
(HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) {
|
||||
rounded = v - diff;
|
||||
}
|
||||
return rounded;
|
||||
}
|
||||
#define simde_math_roundevenf simde_math_roundevenf
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_sin)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(sin)
|
||||
#define simde_math_sin(v) __builtin_sin(v)
|
||||
|
@ -1078,20 +1282,20 @@ typedef std::complex<double> simde_cfloat64;
|
|||
/*** Complex functions ***/
|
||||
|
||||
#if !defined(simde_math_cexp)
|
||||
#if defined(__cplusplus)
|
||||
#define simde_math_cexp(v) std::cexp(v)
|
||||
#elif SIMDE_MATH_BUILTIN_LIBM(cexp)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(cexp)
|
||||
#define simde_math_cexp(v) __builtin_cexp(v)
|
||||
#elif defined(__cplusplus)
|
||||
#define simde_math_cexp(v) std::cexp(v)
|
||||
#elif defined(SIMDE_MATH_HAVE_MATH_H)
|
||||
#define simde_math_cexp(v) cexp(v)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(simde_math_cexpf)
|
||||
#if defined(__cplusplus)
|
||||
#define simde_math_cexpf(v) std::exp(v)
|
||||
#elif SIMDE_MATH_BUILTIN_LIBM(cexpf)
|
||||
#if SIMDE_MATH_BUILTIN_LIBM(cexpf)
|
||||
#define simde_math_cexpf(v) __builtin_cexpf(v)
|
||||
#elif defined(__cplusplus)
|
||||
#define simde_math_cexpf(v) std::exp(v)
|
||||
#elif defined(SIMDE_MATH_HAVE_MATH_H)
|
||||
#define simde_math_cexpf(v) cexpf(v)
|
||||
#endif
|
||||
|
@ -1393,22 +1597,262 @@ HEDLEY_DIAGNOSTIC_POP
|
|||
|
||||
static HEDLEY_INLINE double simde_math_rad2deg(double radians)
|
||||
{
|
||||
return radians * (180.0 / SIMDE_MATH_PI);
|
||||
return radians * SIMDE_MATH_180_OVER_PI;
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE float simde_math_rad2degf(float radians)
|
||||
{
|
||||
return radians * (180.0f / SIMDE_MATH_PIF);
|
||||
return radians * SIMDE_MATH_180_OVER_PIF;
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE double simde_math_deg2rad(double degrees)
|
||||
{
|
||||
return degrees * (SIMDE_MATH_PI / 180.0);
|
||||
return degrees * SIMDE_MATH_PI_OVER_180;
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE float simde_math_deg2radf(float degrees)
|
||||
{
|
||||
return degrees * (SIMDE_MATH_PIF / 180.0f);
|
||||
return degrees * (SIMDE_MATH_PI_OVER_180F);
|
||||
}
|
||||
|
||||
/*** Saturated arithmetic ***/
|
||||
|
||||
static HEDLEY_INLINE int8_t simde_math_adds_i8(int8_t a, int8_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqaddb_s8(a, b);
|
||||
#else
|
||||
uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a);
|
||||
uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b);
|
||||
uint8_t r_ = a_ + b_;
|
||||
|
||||
a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX;
|
||||
if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {
|
||||
r_ = a_;
|
||||
}
|
||||
|
||||
return HEDLEY_STATIC_CAST(int8_t, r_);
|
||||
#endif
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE int16_t simde_math_adds_i16(int16_t a, int16_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqaddh_s16(a, b);
|
||||
#else
|
||||
uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a);
|
||||
uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b);
|
||||
uint16_t r_ = a_ + b_;
|
||||
|
||||
a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX;
|
||||
if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {
|
||||
r_ = a_;
|
||||
}
|
||||
|
||||
return HEDLEY_STATIC_CAST(int16_t, r_);
|
||||
#endif
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE int32_t simde_math_adds_i32(int32_t a, int32_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqadds_s32(a, b);
|
||||
#else
|
||||
uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a);
|
||||
uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b);
|
||||
uint32_t r_ = a_ + b_;
|
||||
|
||||
a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX;
|
||||
if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {
|
||||
r_ = a_;
|
||||
}
|
||||
|
||||
return HEDLEY_STATIC_CAST(int32_t, r_);
|
||||
#endif
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE int64_t simde_math_adds_i64(int64_t a, int64_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqaddd_s64(a, b);
|
||||
#else
|
||||
uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a);
|
||||
uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b);
|
||||
uint64_t r_ = a_ + b_;
|
||||
|
||||
a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX;
|
||||
if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {
|
||||
r_ = a_;
|
||||
}
|
||||
|
||||
return HEDLEY_STATIC_CAST(int64_t, r_);
|
||||
#endif
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE uint8_t simde_math_adds_u8(uint8_t a, uint8_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqaddb_u8(a, b);
|
||||
#else
|
||||
uint8_t r = a + b;
|
||||
r |= -(r < a);
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE uint16_t simde_math_adds_u16(uint16_t a, uint16_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqaddh_u16(a, b);
|
||||
#else
|
||||
uint16_t r = a + b;
|
||||
r |= -(r < a);
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE uint32_t simde_math_adds_u32(uint32_t a, uint32_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqadds_u32(a, b);
|
||||
#else
|
||||
uint32_t r = a + b;
|
||||
r |= -(r < a);
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE uint64_t simde_math_adds_u64(uint64_t a, uint64_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqaddd_u64(a, b);
|
||||
#else
|
||||
uint64_t r = a + b;
|
||||
r |= -(r < a);
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE int8_t simde_math_subs_i8(int8_t a, int8_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqsubb_s8(a, b);
|
||||
#else
|
||||
uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a);
|
||||
uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b);
|
||||
uint8_t r_ = a_ - b_;
|
||||
|
||||
a_ = (a_ >> 7) + INT8_MAX;
|
||||
|
||||
if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {
|
||||
r_ = a_;
|
||||
}
|
||||
|
||||
return HEDLEY_STATIC_CAST(int8_t, r_);
|
||||
#endif
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE int16_t simde_math_subs_i16(int16_t a, int16_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqsubh_s16(a, b);
|
||||
#else
|
||||
uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a);
|
||||
uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b);
|
||||
uint16_t r_ = a_ - b_;
|
||||
|
||||
a_ = (a_ >> 15) + INT16_MAX;
|
||||
|
||||
if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {
|
||||
r_ = a_;
|
||||
}
|
||||
|
||||
return HEDLEY_STATIC_CAST(int16_t, r_);
|
||||
#endif
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE int32_t simde_math_subs_i32(int32_t a, int32_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqsubs_s32(a, b);
|
||||
#else
|
||||
uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a);
|
||||
uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b);
|
||||
uint32_t r_ = a_ - b_;
|
||||
|
||||
a_ = (a_ >> 31) + INT32_MAX;
|
||||
|
||||
if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {
|
||||
r_ = a_;
|
||||
}
|
||||
|
||||
return HEDLEY_STATIC_CAST(int32_t, r_);
|
||||
#endif
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE int64_t simde_math_subs_i64(int64_t a, int64_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqsubd_s64(a, b);
|
||||
#else
|
||||
uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a);
|
||||
uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b);
|
||||
uint64_t r_ = a_ - b_;
|
||||
|
||||
a_ = (a_ >> 63) + INT64_MAX;
|
||||
|
||||
if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {
|
||||
r_ = a_;
|
||||
}
|
||||
|
||||
return HEDLEY_STATIC_CAST(int64_t, r_);
|
||||
#endif
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE uint8_t simde_math_subs_u8(uint8_t a, uint8_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqsubb_u8(a, b);
|
||||
#else
|
||||
uint8_t res = a - b;
|
||||
res &= -(res <= a);
|
||||
return res;
|
||||
#endif
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE uint16_t simde_math_subs_u16(uint16_t a, uint16_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqsubh_u16(a, b);
|
||||
#else
|
||||
uint16_t res = a - b;
|
||||
res &= -(res <= a);
|
||||
return res;
|
||||
#endif
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE uint32_t simde_math_subs_u32(uint32_t a, uint32_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqsubs_u32(a, b);
|
||||
#else
|
||||
uint32_t res = a - b;
|
||||
res &= -(res <= a);
|
||||
return res;
|
||||
#endif
|
||||
}
|
||||
|
||||
static HEDLEY_INLINE uint64_t simde_math_subs_u64(uint64_t a, uint64_t b)
|
||||
{
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
return vqsubd_u64(a, b);
|
||||
#else
|
||||
uint64_t res = a - b;
|
||||
res &= -(res <= a);
|
||||
return res;
|
||||
#endif
|
||||
}
|
||||
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
|
||||
#endif /* !defined(SIMDE_MATH_H) */
|
||||
|
|
|
@ -27,11 +27,7 @@
|
|||
#if !defined(SIMDE_X86_MMX_H)
|
||||
#define SIMDE_X86_MMX_H
|
||||
|
||||
#include "simde-common.h"
|
||||
|
||||
#if !defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
|
||||
#define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES
|
||||
#endif
|
||||
#include "../simde-common.h"
|
||||
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
||||
|
@ -46,6 +42,8 @@ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
|
|||
#include <mmintrin.h>
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
#include <arm_neon.h>
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
#include <loongson-mmiintrin.h>
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
@ -55,29 +53,29 @@ SIMDE_BEGIN_DECLS_
|
|||
|
||||
typedef union {
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT)
|
||||
SIMDE_ALIGN(8) int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN(8) int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN(8) int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN(8) int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN(8) uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN(8) uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN(8) uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN(8) uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN(8) simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN(8) int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN(8) uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
#else
|
||||
SIMDE_ALIGN(8) int8_t i8[8];
|
||||
SIMDE_ALIGN(8) int16_t i16[4];
|
||||
SIMDE_ALIGN(8) int32_t i32[2];
|
||||
SIMDE_ALIGN(8) int64_t i64[1];
|
||||
SIMDE_ALIGN(8) uint8_t u8[8];
|
||||
SIMDE_ALIGN(8) uint16_t u16[4];
|
||||
SIMDE_ALIGN(8) uint32_t u32[2];
|
||||
SIMDE_ALIGN(8) uint64_t u64[1];
|
||||
SIMDE_ALIGN(8) simde_float32 f32[2];
|
||||
SIMDE_ALIGN(8) int_fast32_t i32f[8 / sizeof(int_fast32_t)];
|
||||
SIMDE_ALIGN(8) uint_fast32_t u32f[8 / sizeof(uint_fast32_t)];
|
||||
SIMDE_ALIGN_TO_8 int8_t i8[8];
|
||||
SIMDE_ALIGN_TO_8 int16_t i16[4];
|
||||
SIMDE_ALIGN_TO_8 int32_t i32[2];
|
||||
SIMDE_ALIGN_TO_8 int64_t i64[1];
|
||||
SIMDE_ALIGN_TO_8 uint8_t u8[8];
|
||||
SIMDE_ALIGN_TO_8 uint16_t u16[4];
|
||||
SIMDE_ALIGN_TO_8 uint32_t u32[2];
|
||||
SIMDE_ALIGN_TO_8 uint64_t u64[1];
|
||||
SIMDE_ALIGN_TO_8 simde_float32 f32[2];
|
||||
SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)];
|
||||
SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)];
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
|
||||
|
@ -94,14 +92,26 @@ typedef union {
|
|||
uint64x1_t neon_u64;
|
||||
float32x2_t neon_f32;
|
||||
#endif
|
||||
#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
int8x8_t mmi_i8;
|
||||
int16x4_t mmi_i16;
|
||||
int32x2_t mmi_i32;
|
||||
int64_t mmi_i64;
|
||||
uint8x8_t mmi_u8;
|
||||
uint16x4_t mmi_u16;
|
||||
uint32x2_t mmi_u32;
|
||||
uint64_t mmi_u64;
|
||||
#endif
|
||||
} simde__m64_private;
|
||||
|
||||
#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
|
||||
typedef __m64 simde__m64;
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
typedef int32x2_t simde__m64;
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
typedef int32x2_t simde__m64;
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
|
||||
typedef int32_t simde__m64 SIMDE_ALIGN(8) SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
|
||||
#else
|
||||
typedef simde__m64_private simde__m64;
|
||||
#endif
|
||||
|
@ -169,6 +179,17 @@ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64)
|
|||
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32)
|
||||
#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */
|
||||
|
||||
#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8)
|
||||
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16)
|
||||
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32)
|
||||
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64)
|
||||
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8)
|
||||
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16)
|
||||
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32)
|
||||
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64)
|
||||
#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde__m64 simde_mm_add_pi8(simde__m64 a, simde__m64 b)
|
||||
{
|
||||
|
@ -181,6 +202,8 @@ simde__m64 simde_mm_add_pi8(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.i8 = a_.i8 + b_.i8;
|
||||
#else
|
||||
|
@ -211,6 +234,8 @@ simde__m64 simde_mm_add_pi16(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.i16 = a_.i16 + b_.i16;
|
||||
#else
|
||||
|
@ -226,7 +251,7 @@ simde__m64 simde_mm_add_pi16(simde__m64 a, simde__m64 b)
|
|||
#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b)
|
||||
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
||||
#define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b)
|
||||
#define _m_add_paddw(a, b) simde_mm_add_pi16(a, b)
|
||||
#define _m_paddw(a, b) simde_mm_add_pi16(a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
|
@ -241,6 +266,8 @@ simde__m64 simde_mm_add_pi32(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.i32 = a_.i32 + b_.i32;
|
||||
#else
|
||||
|
@ -256,7 +283,7 @@ simde__m64 simde_mm_add_pi32(simde__m64 a, simde__m64 b)
|
|||
#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b)
|
||||
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
||||
#define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b)
|
||||
#define _m_add_paddd(a, b) simde_mm_add_pi32(a, b)
|
||||
#define _m_paddd(a, b) simde_mm_add_pi32(a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
|
@ -270,6 +297,8 @@ simde__m64 simde_mm_adds_pi8(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
||||
|
@ -291,7 +320,7 @@ simde__m64 simde_mm_adds_pi8(simde__m64 a, simde__m64 b)
|
|||
#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b)
|
||||
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
||||
#define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b)
|
||||
#define _m_add_paddsb(a, b) simde_mm_adds_pi8(a, b)
|
||||
#define _m_paddsb(a, b) simde_mm_adds_pi8(a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
|
@ -306,6 +335,8 @@ simde__m64 simde_mm_adds_pu8(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
|
||||
|
@ -340,6 +371,8 @@ simde__m64 simde_mm_adds_pi16(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
||||
|
@ -376,6 +409,8 @@ simde__m64 simde_mm_adds_pu16(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
||||
|
@ -435,6 +470,8 @@ simde__m64 simde_mm_andnot_si64(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.i32f = ~a_.i32f & b_.i32f;
|
||||
#else
|
||||
|
@ -461,7 +498,9 @@ simde__m64 simde_mm_cmpeq_pi8(simde__m64 a, simde__m64 b)
|
|||
simde__m64_private b_ = simde__m64_to_private(b);
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i8 = vreinterpret_s8_u8(vceq_s8(a_.neon_i8, b_.neon_i8));
|
||||
r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
||||
|
@ -489,7 +528,9 @@ simde__m64 simde_mm_cmpeq_pi16(simde__m64 a, simde__m64 b)
|
|||
simde__m64_private b_ = simde__m64_to_private(b);
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i16 = vreinterpret_s16_u16(vceq_s16(a_.neon_i16, b_.neon_i16));
|
||||
r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
||||
|
@ -517,7 +558,9 @@ simde__m64 simde_mm_cmpeq_pi32(simde__m64 a, simde__m64 b)
|
|||
simde__m64_private b_ = simde__m64_to_private(b);
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i32 = vreinterpret_s32_u32(vceq_s32(a_.neon_i32, b_.neon_i32));
|
||||
r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
||||
|
@ -545,7 +588,9 @@ simde__m64 simde_mm_cmpgt_pi8(simde__m64 a, simde__m64 b)
|
|||
simde__m64_private b_ = simde__m64_to_private(b);
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i8 = vreinterpret_s8_u8(vcgt_s8(a_.neon_i8, b_.neon_i8));
|
||||
r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
||||
|
@ -573,7 +618,9 @@ simde__m64 simde_mm_cmpgt_pi16(simde__m64 a, simde__m64 b)
|
|||
simde__m64_private b_ = simde__m64_to_private(b);
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i16 = vreinterpret_s16_u16(vcgt_s16(a_.neon_i16, b_.neon_i16));
|
||||
r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
||||
|
@ -601,7 +648,9 @@ simde__m64 simde_mm_cmpgt_pi32(simde__m64 a, simde__m64 b)
|
|||
simde__m64_private b_ = simde__m64_to_private(b);
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i32 = vreinterpret_s32_u32(vcgt_s32(a_.neon_i32, b_.neon_i32));
|
||||
r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
||||
|
@ -628,7 +677,13 @@ int64_t simde_mm_cvtm64_si64(simde__m64 a)
|
|||
simde__m64_private a_ = simde__m64_to_private(a);
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
|
||||
SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
|
||||
#pragma clang diagnostic ignored "-Wvector-conversion"
|
||||
#endif
|
||||
return vget_lane_s64(a_.neon_i64, 0);
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
#else
|
||||
return a_.i64[0];
|
||||
#endif
|
||||
|
@ -698,7 +753,13 @@ int32_t simde_mm_cvtsi64_si32(simde__m64 a)
|
|||
simde__m64_private a_ = simde__m64_to_private(a);
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
|
||||
SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
|
||||
#pragma clang diagnostic ignored "-Wvector-conversion"
|
||||
#endif
|
||||
return vget_lane_s32(a_.neon_i32, 0);
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
#else
|
||||
return a_.i32[0];
|
||||
#endif
|
||||
|
@ -714,6 +775,7 @@ void simde_mm_empty(void)
|
|||
#if defined(SIMDE_X86_MMX_NATIVE)
|
||||
_mm_empty();
|
||||
#else
|
||||
/* noop */
|
||||
#endif
|
||||
}
|
||||
#define simde_m_empty() simde_mm_empty()
|
||||
|
@ -735,6 +797,8 @@ simde__m64 simde_mm_madd_pi16(simde__m64 a, simde__m64 b)
|
|||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16);
|
||||
r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1));
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i += 2) {
|
||||
|
@ -766,7 +830,9 @@ simde__m64 simde_mm_mulhi_pi16(simde__m64 a, simde__m64 b)
|
|||
const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
|
||||
const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16);
|
||||
const uint16x4_t t3 = vmovn_u32(t2);
|
||||
r_.neon_i16 = vreinterpret_s16_u16(t3);
|
||||
r_.neon_u16 = t3;
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
||||
|
@ -797,7 +863,9 @@ simde__m64 simde_mm_mullo_pi16(simde__m64 a, simde__m64 b)
|
|||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
|
||||
const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1));
|
||||
r_.neon_i16 = vreinterpret_s16_u16(t2);
|
||||
r_.neon_u16 = t2;
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
||||
|
@ -854,6 +922,8 @@ simde__m64 simde_mm_packs_pi16(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16));
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
||||
|
@ -884,7 +954,7 @@ simde__m64 simde_mm_packs_pi16(simde__m64 a, simde__m64 b)
|
|||
#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b)
|
||||
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
||||
#define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b)
|
||||
#define _m_packsswb(a, b) mm_packs_pi16(a, b)
|
||||
#define _m_packsswb(a, b) simde_mm_packs_pi16(a, b)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
|
@ -899,6 +969,8 @@ simde__m64 simde_mm_packs_pi32(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32));
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (8 / sizeof(a_.i32[0])); i++) {
|
||||
|
@ -950,7 +1022,8 @@ simde__m64 simde_mm_packs_pu16(simde__m64 a, simde__m64 b)
|
|||
vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1)));
|
||||
|
||||
/* Vector with all s16 elements set to UINT8_MAX */
|
||||
const int16x8_t vmax = vmovq_n_s16((int16_t)UINT8_MAX);
|
||||
const int16x8_t vmax =
|
||||
vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX));
|
||||
|
||||
/* Elements which are within the acceptable range */
|
||||
const int16x8_t le_max =
|
||||
|
@ -962,6 +1035,8 @@ simde__m64 simde_mm_packs_pu16(simde__m64 a, simde__m64 b)
|
|||
const int16x8_t values = vorrq_s16(le_max, gt_max);
|
||||
|
||||
r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values));
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
||||
|
@ -1074,6 +1149,7 @@ simde__m64 simde_mm_set_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0)
|
|||
r_.i16[2] = e2;
|
||||
r_.i16[3] = e3;
|
||||
#endif
|
||||
|
||||
return simde__m64_from_private(r_);
|
||||
#endif
|
||||
}
|
||||
|
@ -1285,6 +1361,36 @@ simde__m64 simde_mm_setzero_si64(void)
|
|||
#define _mm_setzero_si64() simde_mm_setzero_si64()
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde__m64 simde_x_mm_load_si64(const void *mem_addr)
|
||||
{
|
||||
simde__m64 r;
|
||||
simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64),
|
||||
sizeof(r));
|
||||
return r;
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde__m64 simde_x_mm_loadu_si64(const void *mem_addr)
|
||||
{
|
||||
simde__m64 r;
|
||||
simde_memcpy(&r, mem_addr, sizeof(r));
|
||||
return r;
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
void simde_x_mm_store_si64(void *mem_addr, simde__m64 value)
|
||||
{
|
||||
simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value,
|
||||
sizeof(value));
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
void simde_x_mm_storeu_si64(void *mem_addr, simde__m64 value)
|
||||
{
|
||||
simde_memcpy(mem_addr, &value, sizeof(value));
|
||||
}
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
simde__m64 simde_x_mm_setone_si64(void)
|
||||
{
|
||||
|
@ -1302,8 +1408,22 @@ simde__m64 simde_mm_sll_pi16(simde__m64 a, simde__m64 count)
|
|||
simde__m64_private count_ = simde__m64_to_private(count);
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t)vget_lane_u64(
|
||||
count_.neon_u64, 0)));
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
|
||||
SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
|
||||
#pragma clang diagnostic ignored "-Wvector-conversion"
|
||||
#endif
|
||||
r_.neon_i16 =
|
||||
vshl_s16(a_.neon_i16,
|
||||
vmov_n_s16(HEDLEY_STATIC_CAST(
|
||||
int16_t, vget_lane_u64(count_.neon_u64, 0))));
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
|
||||
defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)
|
||||
if (HEDLEY_UNLIKELY(count_.u64[0] > 15))
|
||||
return simde_mm_setzero_si64();
|
||||
|
||||
r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.i16 = a_.i16 << count_.u64[0];
|
||||
#else
|
||||
|
@ -1339,8 +1459,16 @@ simde__m64 simde_mm_sll_pi32(simde__m64 a, simde__m64 count)
|
|||
simde__m64_private count_ = simde__m64_to_private(count);
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t)vget_lane_u64(
|
||||
count_.neon_u64, 0)));
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
|
||||
SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
|
||||
#pragma clang diagnostic ignored "-Wvector-conversion"
|
||||
#endif
|
||||
r_.neon_i32 =
|
||||
vshl_s32(a_.neon_i32,
|
||||
vmov_n_s32(HEDLEY_STATIC_CAST(
|
||||
int32_t, vget_lane_u64(count_.neon_u64, 0))));
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.i32 = a_.i32 << count_.u64[0];
|
||||
#else
|
||||
|
@ -1373,10 +1501,19 @@ simde__m64 simde_mm_slli_pi16(simde__m64 a, int count)
|
|||
simde__m64_private r_;
|
||||
simde__m64_private a_ = simde__m64_to_private(a);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
|
||||
defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)
|
||||
if (HEDLEY_UNLIKELY(count > 15))
|
||||
return simde_mm_setzero_si64();
|
||||
|
||||
r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.i16 = a_.i16 << count;
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t)count));
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i16 = psllh_s(a_.mmi_i16, b_.mmi_i16);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
|
||||
|
@ -1406,6 +1543,8 @@ simde__m64 simde_mm_slli_pi32(simde__m64 a, int count)
|
|||
r_.i32 = a_.i32 << count;
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t)count));
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
|
||||
|
@ -1490,7 +1629,13 @@ simde__m64 simde_mm_srl_pi16(simde__m64 a, simde__m64 count)
|
|||
simde__m64_private a_ = simde__m64_to_private(a);
|
||||
simde__m64_private count_ = simde__m64_to_private(count);
|
||||
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
|
||||
defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)
|
||||
if (HEDLEY_UNLIKELY(count_.u64[0] > 15))
|
||||
return simde_mm_setzero_si64();
|
||||
|
||||
r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, count_.u64[0]);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.u16 = a_.u16 >> count_.u64[0];
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_u16 = vshl_u16(
|
||||
|
@ -1567,6 +1712,8 @@ simde__m64 simde_mm_srli_pi16(simde__m64 a, int count)
|
|||
r_.u16 = a_.u16 >> count;
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t)count)));
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
|
||||
|
@ -1596,6 +1743,8 @@ simde__m64 simde_mm_srli_pi32(simde__m64 a, int count)
|
|||
r_.u32 = a_.u32 >> count;
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t)count)));
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
|
||||
|
@ -1682,7 +1831,10 @@ simde__m64 simde_mm_srai_pi16(simde__m64 a, int count)
|
|||
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
|
||||
r_.i16 = a_.i16 >> (count & 0xff);
|
||||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count));
|
||||
r_.neon_i16 = vshl_s16(a_.neon_i16,
|
||||
vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count)));
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i16 = psrah_s(a_.mmi_i16, count);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
||||
|
@ -1713,6 +1865,8 @@ simde__m64 simde_mm_srai_pi32(simde__m64 a, int count)
|
|||
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i32 = vshl_s32(a_.neon_i32,
|
||||
vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count)));
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i32 = psraw_s(a_.mmi_i32, count);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
|
||||
|
@ -1726,7 +1880,7 @@ simde__m64 simde_mm_srai_pi32(simde__m64 a, int count)
|
|||
#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count)
|
||||
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
|
||||
#define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count)
|
||||
#define _m_srai_pi32(a, count) simde_mm_srai_pi32(a, count)
|
||||
#define _m_psradi(a, count) simde_mm_srai_pi32(a, count)
|
||||
#endif
|
||||
|
||||
SIMDE_FUNCTION_ATTRIBUTES
|
||||
|
@ -1813,6 +1967,8 @@ simde__m64 simde_mm_sub_pi8(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.i8 = a_.i8 - b_.i8;
|
||||
#else
|
||||
|
@ -1843,6 +1999,8 @@ simde__m64 simde_mm_sub_pi16(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.i16 = a_.i16 - b_.i16;
|
||||
#else
|
||||
|
@ -1873,6 +2031,8 @@ simde__m64 simde_mm_sub_pi32(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32);
|
||||
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
|
||||
r_.i32 = a_.i32 - b_.i32;
|
||||
#else
|
||||
|
@ -1903,6 +2063,8 @@ simde__m64 simde_mm_subs_pi8(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
|
||||
|
@ -1938,6 +2100,8 @@ simde__m64 simde_mm_subs_pu8(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
|
||||
|
@ -1973,6 +2137,8 @@ simde__m64 simde_mm_subs_pi16(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
|
||||
|
@ -2008,6 +2174,8 @@ simde__m64 simde_mm_subs_pu16(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16);
|
||||
#else
|
||||
SIMDE_VECTORIZE
|
||||
for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
|
||||
|
@ -2046,6 +2214,8 @@ simde__m64 simde_mm_unpackhi_pi8(simde__m64 a, simde__m64 b)
|
|||
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14,
|
||||
7, 15);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8);
|
||||
#else
|
||||
r_.i8[0] = a_.i8[4];
|
||||
r_.i8[1] = b_.i8[4];
|
||||
|
@ -2078,6 +2248,8 @@ simde__m64 simde_mm_unpackhi_pi16(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16);
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7);
|
||||
#else
|
||||
|
@ -2108,6 +2280,8 @@ simde__m64 simde_mm_unpackhi_pi32(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32);
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3);
|
||||
#else
|
||||
|
@ -2136,6 +2310,8 @@ simde__m64 simde_mm_unpacklo_pi8(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8);
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3,
|
||||
11);
|
||||
|
@ -2171,6 +2347,8 @@ simde__m64 simde_mm_unpacklo_pi16(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16);
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5);
|
||||
#else
|
||||
|
@ -2201,6 +2379,8 @@ simde__m64 simde_mm_unpacklo_pi32(simde__m64 a, simde__m64 b)
|
|||
|
||||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
|
||||
r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32);
|
||||
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
|
||||
r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32);
|
||||
#elif defined(SIMDE_SHUFFLE_VECTOR_)
|
||||
r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2);
|
||||
#else
|
||||
|
@ -2253,7 +2433,13 @@ int32_t simde_m_to_int(simde__m64 a)
|
|||
simde__m64_private a_ = simde__m64_to_private(a);
|
||||
|
||||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
|
||||
HEDLEY_DIAGNOSTIC_PUSH
|
||||
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
|
||||
SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
|
||||
#pragma clang diagnostic ignored "-Wvector-conversion"
|
||||
#endif
|
||||
return vget_lane_s32(a_.neon_i32, 0);
|
||||
HEDLEY_DIAGNOSTIC_POP
|
||||
#else
|
||||
return a_.i32[0];
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -17,55 +17,9 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#if NEEDS_SIMDE
|
||||
|
||||
#include "simde/sse2.h"
|
||||
|
||||
#define __m128 simde__m128
|
||||
#define _mm_setzero_ps simde_mm_setzero_ps
|
||||
#define _mm_set_ps simde_mm_set_ps
|
||||
#define _mm_add_ps simde_mm_add_ps
|
||||
#define _mm_sub_ps simde_mm_sub_ps
|
||||
#define _mm_mul_ps simde_mm_mul_ps
|
||||
#define _mm_div_ps simde_mm_div_ps
|
||||
#define _mm_set1_ps simde_mm_set1_ps
|
||||
#define _mm_movehl_ps simde_mm_movehl_ps
|
||||
#define _mm_shuffle_ps simde_mm_shuffle_ps
|
||||
#define _mm_min_ps simde_mm_min_ps
|
||||
#define _mm_max_ps simde_mm_max_ps
|
||||
#define _mm_movelh_ps simde_mm_movelh_ps
|
||||
#define _mm_unpacklo_ps simde_mm_unpacklo_ps
|
||||
#define _mm_unpackhi_ps simde_mm_unpackhi_ps
|
||||
#define _mm_load_ps simde_mm_load_ps
|
||||
#define _mm_andnot_ps simde_mm_andnot_ps
|
||||
#define _mm_storeu_ps simde_mm_storeu_ps
|
||||
#define _mm_loadu_ps simde_mm_loadu_ps
|
||||
|
||||
#define __m128i simde__m128i
|
||||
#define _mm_set1_epi32 simde_mm_set1_epi32
|
||||
#define _mm_set1_epi16 simde_mm_set1_epi16
|
||||
#define _mm_load_si128 simde_mm_load_si128
|
||||
#define _mm_packs_epi32 simde_mm_packs_epi32
|
||||
#define _mm_srli_si128 simde_mm_srli_si128
|
||||
#define _mm_and_si128 simde_mm_and_si128
|
||||
#define _mm_packus_epi16 simde_mm_packus_epi16
|
||||
#define _mm_add_epi64 simde_mm_add_epi64
|
||||
#define _mm_shuffle_epi32 simde_mm_shuffle_epi32
|
||||
#define _mm_srai_epi16 simde_mm_srai_epi16
|
||||
#define _mm_shufflelo_epi16 simde_mm_shufflelo_epi16
|
||||
#define _mm_storeu_si128 simde_mm_storeu_si128
|
||||
|
||||
#define _MM_SHUFFLE SIMDE_MM_SHUFFLE
|
||||
#define _MM_TRANSPOSE4_PS SIMDE_MM_TRANSPOSE4_PS
|
||||
|
||||
#else
|
||||
|
||||
#if defined(__aarch64__) || defined(__arm__)
|
||||
#include <arm_neon.h>
|
||||
#include "sse2neon.h"
|
||||
#else
|
||||
#include <xmmintrin.h>
|
||||
#if defined(_MSC_VER)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define SIMDE_ENABLE_NATIVE_ALIASES
|
||||
#include "simde/x86/sse2.h"
|
||||
#endif
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue