libobs: Update to SIMDe 0.7.1

c3d7abfaba

Simplify usage of the SIMDe header

This obviates the need for sse2neon as well and fixes compilation of all
plugins that referenced sse-intrin.h on all architectures, not just
arm*.
master
Michael R. Crusoe 2020-12-31 13:52:09 +01:00 committed by Jim
parent fdd34c35fc
commit 1e96573328
19 changed files with 6340 additions and 5719 deletions

View File

@ -123,18 +123,14 @@ else ()
endif ()
if(LOWERCASE_CMAKE_SYSTEM_PROCESSOR MATCHES "(i[3-6]86|x86|x64|x86_64|amd64|e2k)")
set(NEEDS_SIMDE "0")
if(NOT MSVC)
set(ARCH_SIMD_FLAGS "-mmmx" "-msse" "-msse2")
endif()
elseif(LOWERCASE_CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64(le)?")
set(NEEDS_SIMDE "0")
set(ARCH_SIMD_DEFINES "-DNO_WARN_X86_INTRINSICS")
set(ARCH_SIMD_FLAGS "-mvsx")
add_compile_definitions(NO_WARN_X86_INTRINSICS)
else()
set(NEEDS_SIMDE "1")
add_definitions(-DNEEDS_SIMDE=1)
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSIMDE_ENABLE_OPENMP")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DSIMDE_ENABLE_OPENMP")

View File

@ -188,20 +188,8 @@ elseif(UNIX)
util/pipe-posix.c
util/platform-nix.c)
if(NEEDS_SIMDE)
set(libobs_PLATFORM_HEADERS
util/simde/check.h
util/simde/hedley.h
util/simde/mmx.h
util/simde/simde-arch.h
util/simde/simde-common.h
util/simde/sse.h
util/simde/sse2.h
util/threading-posix.h)
else()
set(libobs_PLATFORM_HEADERS
util/threading-posix.h)
endif()
set(libobs_PLATFORM_HEADERS
util/threading-posix.h)
if(HAVE_PULSEAUDIO)
set(libobs_audio_monitoring_HEADERS
@ -369,7 +357,6 @@ set(libobs_util_SOURCES
set(libobs_util_HEADERS
util/curl/curl-helper.h
util/sse-intrin.h
util/sse2neon.h
util/array-serializer.h
util/file-serializer.h
util/utf8.h
@ -419,6 +406,20 @@ set(libobs_libobs_SOURCES
obs-video-gpu-encode.c
obs-video.c)
set(libobs_libobs_HEADERS
util/simde/check.h
util/simde/debug-trap.h
util/simde/hedley.h
util/simde/simde-align.h
util/simde/simde-arch.h
util/simde/simde-common.h
util/simde/simde-constify.h
util/simde/simde-detect-clang.h
util/simde/simde-diagnostic.h
util/simde/simde-features.h
util/simde/simde-math.h
util/simde/x86/mmx.h
util/simde/x86/sse2.h
util/simde/x86/sse.h
${libobs_PLATFORM_HEADERS}
obs-audio-controls.h
obs-defs.h

View File

@ -18,7 +18,6 @@
#define HAVE_DBUS @HAVE_DBUS@
#define HAVE_PULSEAUDIO @HAVE_PULSEAUDIO@
#define USE_XINPUT @USE_XINPUT@
#define NEEDS_SIMDE @NEEDS_SIMDE@
#define LIBOBS_IMAGEMAGICK_DIR_STYLE_6L 6
#define LIBOBS_IMAGEMAGICK_DIR_STYLE_7GE 7
#define LIBOBS_IMAGEMAGICK_DIR_STYLE @LIBOBS_IMAGEMAGICK_DIR_STYLE@

View File

@ -1,5 +1,5 @@
This is a slightly modified version of https://github.com/nemequ/simde/commit/cafec4b952fa5a31a51a10326f97c2e7c9067771
sse{,2}.h and mmx.h was moved down from the original "x86" subdirectory,
subsequently the '#include "../simde-common.h"' line in mmx.h was changed to '#include "simde-common.h"'
This is a slightly modified version of the simde directory in
https://github.com/simd-everywhere/simde/commit/c3d7abfaba6729a8b11d09a314b34a4db628911d
Unused files have removed.
Then the code was reformatted using the "formatcode.sh" script in the root of this repository.

View File

@ -18,6 +18,7 @@
#endif
#include "hedley.h"
#include "simde-diagnostic.h"
#include <stdint.h>
#if !defined(_WIN32)

View File

@ -10,11 +10,11 @@
* SPDX-License-Identifier: CC0-1.0
*/
#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 12)
#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 14)
#if defined(HEDLEY_VERSION)
#undef HEDLEY_VERSION
#endif
#define HEDLEY_VERSION 12
#define HEDLEY_VERSION 14
#if defined(HEDLEY_STRINGIFY_EX)
#undef HEDLEY_STRINGIFY_EX
@ -36,6 +36,16 @@
#endif
#define HEDLEY_CONCAT(a, b) HEDLEY_CONCAT_EX(a, b)
#if defined(HEDLEY_CONCAT3_EX)
#undef HEDLEY_CONCAT3_EX
#endif
#define HEDLEY_CONCAT3_EX(a, b, c) a##b##c
#if defined(HEDLEY_CONCAT3)
#undef HEDLEY_CONCAT3
#endif
#define HEDLEY_CONCAT3(a, b, c) HEDLEY_CONCAT3_EX(a, b, c)
#if defined(HEDLEY_VERSION_ENCODE)
#undef HEDLEY_VERSION_ENCODE
#endif
@ -80,17 +90,17 @@
#if defined(HEDLEY_MSVC_VERSION)
#undef HEDLEY_MSVC_VERSION
#endif
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000)
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL)
#define HEDLEY_MSVC_VERSION \
HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, \
(_MSC_FULL_VER % 10000000) / 100000, \
(_MSC_FULL_VER % 100000) / 100)
#elif defined(_MSC_FULL_VER)
#elif defined(_MSC_FULL_VER) && !defined(__ICL)
#define HEDLEY_MSVC_VERSION \
HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, \
(_MSC_FULL_VER % 1000000) / 10000, \
(_MSC_FULL_VER % 10000) / 10)
#elif defined(_MSC_VER)
#elif defined(_MSC_VER) && !defined(__ICL)
#define HEDLEY_MSVC_VERSION \
HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0)
#endif
@ -98,7 +108,7 @@
#if defined(HEDLEY_MSVC_VERSION_CHECK)
#undef HEDLEY_MSVC_VERSION_CHECK
#endif
#if !defined(_MSC_VER)
#if !defined(HEDLEY_MSVC_VERSION)
#define HEDLEY_MSVC_VERSION_CHECK(major, minor, patch) (0)
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
#define HEDLEY_MSVC_VERSION_CHECK(major, minor, patch) \
@ -114,11 +124,12 @@
#if defined(HEDLEY_INTEL_VERSION)
#undef HEDLEY_INTEL_VERSION
#endif
#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE)
#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && \
!defined(__ICL)
#define HEDLEY_INTEL_VERSION \
HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, \
__INTEL_COMPILER_UPDATE)
#elif defined(__INTEL_COMPILER)
#elif defined(__INTEL_COMPILER) && !defined(__ICL)
#define HEDLEY_INTEL_VERSION \
HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0)
#endif
@ -133,6 +144,25 @@
#define HEDLEY_INTEL_VERSION_CHECK(major, minor, patch) (0)
#endif
#if defined(HEDLEY_INTEL_CL_VERSION)
#undef HEDLEY_INTEL_CL_VERSION
#endif
#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && \
defined(__ICL)
#define HEDLEY_INTEL_CL_VERSION \
HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0)
#endif
#if defined(HEDLEY_INTEL_CL_VERSION_CHECK)
#undef HEDLEY_INTEL_CL_VERSION_CHECK
#endif
#if defined(HEDLEY_INTEL_CL_VERSION)
#define HEDLEY_INTEL_CL_VERSION_CHECK(major, minor, patch) \
(HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch))
#else
#define HEDLEY_INTEL_CL_VERSION_CHECK(major, minor, patch) (0)
#endif
#if defined(HEDLEY_PGI_VERSION)
#undef HEDLEY_PGI_VERSION
#endif
@ -788,6 +818,68 @@
HEDLEY_GCC_VERSION_CHECK(major, minor, patch)
#endif
#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \
defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3, 0, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
HEDLEY_IAR_VERSION_CHECK(8, 0, 0) || \
HEDLEY_PGI_VERSION_CHECK(18, 4, 0) || \
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \
HEDLEY_TI_ARMCL_VERSION_CHECK(4, 7, 0) || \
HEDLEY_TI_CL430_VERSION_CHECK(2, 0, 1) || \
HEDLEY_TI_CL2000_VERSION_CHECK(6, 1, 0) || \
HEDLEY_TI_CL6X_VERSION_CHECK(7, 0, 0) || \
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) || \
HEDLEY_CRAY_VERSION_CHECK(5, 0, 0) || \
HEDLEY_TINYC_VERSION_CHECK(0, 9, 17) || \
HEDLEY_SUNPRO_VERSION_CHECK(8, 0, 0) || \
(HEDLEY_IBM_VERSION_CHECK(10, 1, 0) && defined(__C99_PRAGMA_OPERATOR))
#define HEDLEY_PRAGMA(value) _Pragma(#value)
#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0)
#define HEDLEY_PRAGMA(value) __pragma(value)
#else
#define HEDLEY_PRAGMA(value)
#endif
#if defined(HEDLEY_DIAGNOSTIC_PUSH)
#undef HEDLEY_DIAGNOSTIC_PUSH
#endif
#if defined(HEDLEY_DIAGNOSTIC_POP)
#undef HEDLEY_DIAGNOSTIC_POP
#endif
#if defined(__clang__)
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
#define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
#define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
#elif HEDLEY_GCC_VERSION_CHECK(4, 6, 0)
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push")
#define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop")
#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0) || \
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
#define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push))
#define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop))
#elif HEDLEY_ARM_VERSION_CHECK(5, 6, 0)
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push")
#define HEDLEY_DIAGNOSTIC_POP _Pragma("pop")
#elif HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \
HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \
HEDLEY_TI_CL430_VERSION_CHECK(4, 4, 0) || \
HEDLEY_TI_CL6X_VERSION_CHECK(8, 1, 0) || \
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push")
#define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop")
#elif HEDLEY_PELLES_VERSION_CHECK(2, 90, 0)
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
#define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
#else
#define HEDLEY_DIAGNOSTIC_PUSH
#define HEDLEY_DIAGNOSTIC_POP
#endif
/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for
HEDLEY INTERNAL USE ONLY. API subject to change without notice. */
#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_)
@ -796,11 +888,20 @@
#if defined(__cplusplus)
#if HEDLEY_HAS_WARNING("-Wc++98-compat")
#if HEDLEY_HAS_WARNING("-Wc++17-extensions")
#if HEDLEY_HAS_WARNING("-Wc++1z-extensions")
#define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \
HEDLEY_DIAGNOSTIC_PUSH \
_Pragma("clang diagnostic ignored \"-Wc++98-compat\"") _Pragma( \
"clang diagnostic ignored \"-Wc++17-extensions\"") \
_Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \
xpr HEDLEY_DIAGNOSTIC_POP
#else
#define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \
HEDLEY_DIAGNOSTIC_PUSH \
_Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \
_Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \
xpr HEDLEY_DIAGNOSTIC_POP
#endif
#else
#define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \
HEDLEY_DIAGNOSTIC_PUSH \
@ -861,74 +962,14 @@
#elif HEDLEY_IAR_VERSION_CHECK(8, 3, 0)
#define HEDLEY_CPP_CAST(T, expr) \
HEDLEY_DIAGNOSTIC_PUSH \
_Pragma("diag_suppress=Pe137") HEDLEY_DIAGNOSTIC_POP #else
_Pragma("diag_suppress=Pe137") HEDLEY_DIAGNOSTIC_POP
#else
#define HEDLEY_CPP_CAST(T, expr) ((T)(expr))
#endif
#else
#define HEDLEY_CPP_CAST(T, expr) (expr)
#endif
#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \
defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3, 0, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
HEDLEY_IAR_VERSION_CHECK(8, 0, 0) || \
HEDLEY_PGI_VERSION_CHECK(18, 4, 0) || \
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \
HEDLEY_TI_ARMCL_VERSION_CHECK(4, 7, 0) || \
HEDLEY_TI_CL430_VERSION_CHECK(2, 0, 1) || \
HEDLEY_TI_CL2000_VERSION_CHECK(6, 1, 0) || \
HEDLEY_TI_CL6X_VERSION_CHECK(7, 0, 0) || \
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) || \
HEDLEY_CRAY_VERSION_CHECK(5, 0, 0) || \
HEDLEY_TINYC_VERSION_CHECK(0, 9, 17) || \
HEDLEY_SUNPRO_VERSION_CHECK(8, 0, 0) || \
(HEDLEY_IBM_VERSION_CHECK(10, 1, 0) && defined(__C99_PRAGMA_OPERATOR))
#define HEDLEY_PRAGMA(value) _Pragma(#value)
#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0)
#define HEDLEY_PRAGMA(value) __pragma(value)
#else
#define HEDLEY_PRAGMA(value)
#endif
#if defined(HEDLEY_DIAGNOSTIC_PUSH)
#undef HEDLEY_DIAGNOSTIC_PUSH
#endif
#if defined(HEDLEY_DIAGNOSTIC_POP)
#undef HEDLEY_DIAGNOSTIC_POP
#endif
#if defined(__clang__)
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
#define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
#define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
#elif HEDLEY_GCC_VERSION_CHECK(4, 6, 0)
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push")
#define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop")
#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0)
#define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push))
#define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop))
#elif HEDLEY_ARM_VERSION_CHECK(5, 6, 0)
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push")
#define HEDLEY_DIAGNOSTIC_POP _Pragma("pop")
#elif HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \
HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \
HEDLEY_TI_CL430_VERSION_CHECK(4, 4, 0) || \
HEDLEY_TI_CL6X_VERSION_CHECK(8, 1, 0) || \
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push")
#define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop")
#elif HEDLEY_PELLES_VERSION_CHECK(2, 90, 0)
#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
#define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
#else
#define HEDLEY_DIAGNOSTIC_PUSH
#define HEDLEY_DIAGNOSTIC_POP
#endif
#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED)
#undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED
#endif
@ -938,6 +979,12 @@
#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \
_Pragma("warning(disable:1478 1786)")
#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \
__pragma(warning(disable : 1478 1786))
#elif HEDLEY_PGI_VERSION_CHECK(20, 7, 0)
#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \
_Pragma("diag_suppress 1215,1216,1444,1445")
#elif HEDLEY_PGI_VERSION_CHECK(17, 10, 0)
#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444")
#elif HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
@ -985,6 +1032,9 @@
#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
_Pragma("warning(disable:161)")
#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
__pragma(warning(disable : 161))
#elif HEDLEY_PGI_VERSION_CHECK(17, 10, 0)
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675")
#elif HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
@ -1018,9 +1068,15 @@
#elif HEDLEY_INTEL_VERSION_CHECK(17, 0, 0)
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
_Pragma("warning(disable:1292)")
#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
__pragma(warning(disable : 1292))
#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0)
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
__pragma(warning(disable : 5030))
#elif HEDLEY_PGI_VERSION_CHECK(20, 7, 0)
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
_Pragma("diag_suppress 1097,1098")
#elif HEDLEY_PGI_VERSION_CHECK(17, 10, 0)
#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \
_Pragma("diag_suppress 1097")
@ -1061,13 +1117,11 @@
#if defined(HEDLEY_DEPRECATED_FOR)
#undef HEDLEY_DEPRECATED_FOR
#endif
#if defined(__cplusplus) && (__cplusplus >= 201402L)
#define HEDLEY_DEPRECATED(since) \
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
[[deprecated("Since " #since)]])
#define HEDLEY_DEPRECATED_FOR(since, replacement) \
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
[[deprecated("Since " #since "; use " #replacement)]])
#if HEDLEY_MSVC_VERSION_CHECK(14, 0, 0) || \
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
#define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " #since))
#define HEDLEY_DEPRECATED_FOR(since, replacement) \
__declspec(deprecated("Since " #since "; use " #replacement))
#elif HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) || \
HEDLEY_GCC_VERSION_CHECK(4, 5, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
@ -1083,6 +1137,13 @@
__attribute__((__deprecated__("Since " #since)))
#define HEDLEY_DEPRECATED_FOR(since, replacement) \
__attribute__((__deprecated__("Since " #since "; use " #replacement)))
#elif defined(__cplusplus) && (__cplusplus >= 201402L)
#define HEDLEY_DEPRECATED(since) \
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
[[deprecated("Since " #since)]])
#define HEDLEY_DEPRECATED_FOR(since, replacement) \
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
[[deprecated("Since " #since "; use " #replacement)]])
#elif HEDLEY_HAS_ATTRIBUTE(deprecated) || HEDLEY_GCC_VERSION_CHECK(3, 1, 0) || \
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \
@ -1103,12 +1164,9 @@
#define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__))
#define HEDLEY_DEPRECATED_FOR(since, replacement) \
__attribute__((__deprecated__))
#elif HEDLEY_MSVC_VERSION_CHECK(14, 0, 0)
#define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " #since))
#define HEDLEY_DEPRECATED_FOR(since, replacement) \
__declspec(deprecated("Since " #since "; use " #replacement))
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \
HEDLEY_PELLES_VERSION_CHECK(6, 50, 0)
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \
HEDLEY_PELLES_VERSION_CHECK(6, 50, 0) || \
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
#define HEDLEY_DEPRECATED(since) __declspec(deprecated)
#define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated)
#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0)
@ -1136,17 +1194,7 @@
#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG)
#undef HEDLEY_WARN_UNUSED_RESULT_MSG
#endif
#if (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L)
#define HEDLEY_WARN_UNUSED_RESULT \
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]])
#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard)
#define HEDLEY_WARN_UNUSED_RESULT \
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
#elif HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \
#if HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \
HEDLEY_GCC_VERSION_CHECK(3, 4, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \
@ -1169,6 +1217,16 @@
#define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \
__attribute__((__warn_unused_result__))
#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L)
#define HEDLEY_WARN_UNUSED_RESULT \
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]])
#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard)
#define HEDLEY_WARN_UNUSED_RESULT \
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
#elif defined(_Check_return_) /* SAL */
#define HEDLEY_WARN_UNUSED_RESULT _Check_return_
#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_
@ -1222,7 +1280,8 @@
#define HEDLEY_NO_RETURN __attribute__((__noreturn__))
#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0)
#define HEDLEY_NO_RETURN _Pragma("does_not_return")
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0)
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
#define HEDLEY_NO_RETURN __declspec(noreturn)
#elif HEDLEY_TI_CL6X_VERSION_CHECK(6, 0, 0) && defined(__cplusplus)
#define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;")
@ -1252,7 +1311,9 @@
#if defined(HEDLEY_ASSUME)
#undef HEDLEY_ASSUME
#endif
#if HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
#if HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
#define HEDLEY_ASSUME(expr) __assume(expr)
#elif HEDLEY_HAS_BUILTIN(__builtin_assume)
#define HEDLEY_ASSUME(expr) __builtin_assume(expr)
@ -1389,7 +1450,8 @@ HEDLEY_DIAGNOSTIC_POP
#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable)
#define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr))
#endif
#if HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) || \
#if (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && \
!defined(HEDLEY_PGI_VERSION)) || \
HEDLEY_GCC_VERSION_CHECK(9, 0, 0)
#define HEDLEY_PREDICT(expr, value, probability) \
__builtin_expect_with_probability((expr), (value), (probability))
@ -1399,7 +1461,8 @@ HEDLEY_DIAGNOSTIC_POP
__builtin_expect_with_probability(!!(expr), 0, (probability))
#define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1)
#define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
#elif HEDLEY_HAS_BUILTIN(__builtin_expect) || \
#elif (HEDLEY_HAS_BUILTIN(__builtin_expect) && \
!defined(HEDLEY_INTEL_CL_VERSION)) || \
HEDLEY_GCC_VERSION_CHECK(3, 0, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
(HEDLEY_SUNPRO_VERSION_CHECK(5, 15, 0) && defined(__cplusplus)) || \
@ -1476,7 +1539,8 @@ HEDLEY_DIAGNOSTIC_POP
#define HEDLEY_MALLOC __attribute__((__malloc__))
#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0)
#define HEDLEY_MALLOC _Pragma("returns_new_memory")
#elif HEDLEY_MSVC_VERSION_CHECK(14, 0, 0)
#elif HEDLEY_MSVC_VERSION_CHECK(14, 0, 0) || \
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
#define HEDLEY_MALLOC __declspec(restrict)
#else
#define HEDLEY_MALLOC
@ -1557,6 +1621,7 @@ HEDLEY_DIAGNOSTIC_POP
#elif HEDLEY_GCC_VERSION_CHECK(3, 1, 0) || \
HEDLEY_MSVC_VERSION_CHECK(14, 0, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) || \
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
HEDLEY_IBM_VERSION_CHECK(10, 1, 0) || \
HEDLEY_PGI_VERSION_CHECK(17, 10, 0) || \
@ -1581,13 +1646,14 @@ HEDLEY_DIAGNOSTIC_POP
#define HEDLEY_INLINE inline
#elif defined(HEDLEY_GCC_VERSION) || HEDLEY_ARM_VERSION_CHECK(6, 2, 0)
#define HEDLEY_INLINE __inline__
#elif HEDLEY_MSVC_VERSION_CHECK(12, 0, 0) || \
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
HEDLEY_TI_ARMCL_VERSION_CHECK(5, 1, 0) || \
HEDLEY_TI_CL430_VERSION_CHECK(3, 1, 0) || \
HEDLEY_TI_CL2000_VERSION_CHECK(6, 2, 0) || \
HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) || \
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
#elif HEDLEY_MSVC_VERSION_CHECK(12, 0, 0) || \
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) || \
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
HEDLEY_TI_ARMCL_VERSION_CHECK(5, 1, 0) || \
HEDLEY_TI_CL430_VERSION_CHECK(3, 1, 0) || \
HEDLEY_TI_CL2000_VERSION_CHECK(6, 2, 0) || \
HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) || \
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
#define HEDLEY_INLINE __inline
#else
@ -1619,7 +1685,8 @@ HEDLEY_DIAGNOSTIC_POP
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
#define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE
#elif HEDLEY_MSVC_VERSION_CHECK(12, 0, 0)
#elif HEDLEY_MSVC_VERSION_CHECK(12, 0, 0) || \
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
#define HEDLEY_ALWAYS_INLINE __forceinline
#elif defined(__cplusplus) && (HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \
HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) || \
@ -1658,7 +1725,8 @@ HEDLEY_DIAGNOSTIC_POP
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)
#define HEDLEY_NEVER_INLINE __attribute__((__noinline__))
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0)
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
#define HEDLEY_NEVER_INLINE __declspec(noinline)
#elif HEDLEY_PGI_VERSION_CHECK(10, 2, 0)
#define HEDLEY_NEVER_INLINE _Pragma("noinline")
@ -1711,7 +1779,9 @@ HEDLEY_DIAGNOSTIC_POP
#if HEDLEY_HAS_ATTRIBUTE(nothrow) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
#define HEDLEY_NO_THROW __attribute__((__nothrow__))
#elif HEDLEY_MSVC_VERSION_CHECK(13, 1, 0) || HEDLEY_ARM_VERSION_CHECK(4, 1, 0)
#elif HEDLEY_MSVC_VERSION_CHECK(13, 1, 0) || \
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) || \
HEDLEY_ARM_VERSION_CHECK(4, 1, 0)
#define HEDLEY_NO_THROW __declspec(nothrow)
#else
#define HEDLEY_NO_THROW
@ -1720,8 +1790,7 @@ HEDLEY_DIAGNOSTIC_POP
#if defined(HEDLEY_FALL_THROUGH)
#undef HEDLEY_FALL_THROUGH
#endif
#if HEDLEY_GNUC_HAS_ATTRIBUTE(fallthrough, 7, 0, 0) && \
!defined(HEDLEY_PGI_VERSION)
#if HEDLEY_HAS_ATTRIBUTE(fallthrough) || HEDLEY_GCC_VERSION_CHECK(7, 0, 0)
#define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__))
#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang, fallthrough)
#define HEDLEY_FALL_THROUGH \
@ -1866,12 +1935,14 @@ HEDLEY_DIAGNOSTIC_POP
#endif
#if !defined(__cplusplus) && \
((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \
HEDLEY_HAS_FEATURE(c_static_assert) || \
(HEDLEY_HAS_FEATURE(c_static_assert) && \
!defined(HEDLEY_INTEL_CL_VERSION)) || \
HEDLEY_GCC_VERSION_CHECK(6, 0, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || defined(_Static_assert))
#define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message)
#elif (defined(__cplusplus) && (__cplusplus >= 201103L)) || \
HEDLEY_MSVC_VERSION_CHECK(16, 0, 0)
HEDLEY_MSVC_VERSION_CHECK(16, 0, 0) || \
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
#define HEDLEY_STATIC_ASSERT(expr, message) \
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
static_assert(expr, message))
@ -1930,7 +2001,8 @@ HEDLEY_DIAGNOSTIC_POP
HEDLEY_PGI_VERSION_CHECK(18, 4, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
#define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg)
#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0)
#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0) || \
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
#define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg))
#else
#define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg)
@ -1970,6 +2042,8 @@ HEDLEY_DIAGNOSTIC_POP
#endif
#if HEDLEY_HAS_ATTRIBUTE(flag_enum)
#define HEDLEY_FLAGS __attribute__((__flag_enum__))
#else
#define HEDLEY_FLAGS
#endif
#if defined(HEDLEY_FLAGS_CAST)
@ -1989,8 +2063,9 @@ HEDLEY_DIAGNOSTIC_POP
#if defined(HEDLEY_EMPTY_BASES)
#undef HEDLEY_EMPTY_BASES
#endif
#if HEDLEY_MSVC_VERSION_CHECK(19, 0, 23918) && \
!HEDLEY_MSVC_VERSION_CHECK(20, 0, 0)
#if (HEDLEY_MSVC_VERSION_CHECK(19, 0, 23918) && \
!HEDLEY_MSVC_VERSION_CHECK(20, 0, 0)) || \
HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0)
#define HEDLEY_EMPTY_BASES __declspec(empty_bases)
#else
#define HEDLEY_EMPTY_BASES

View File

@ -0,0 +1,481 @@
/* Alignment
* Created by Evan Nemerson <evan@nemerson.com>
*
* To the extent possible under law, the authors have waived all
* copyright and related or neighboring rights to this code. For
* details, see the Creative Commons Zero 1.0 Universal license at
* <https://creativecommons.org/publicdomain/zero/1.0/>
*
* SPDX-License-Identifier: CC0-1.0
*
**********************************************************************
*
* This is portability layer which should help iron out some
* differences across various compilers, as well as various verisons of
* C and C++.
*
* It was originally developed for SIMD Everywhere
* (<https://github.com/simd-everywhere/simde>), but since its only
* dependency is Hedley (<https://nemequ.github.io/hedley>, also CC0)
* it can easily be used in other projects, so please feel free to do
* so.
*
* If you do use this in your project, please keep a link to SIMDe in
* your code to remind you where to report any bugs and/or check for
* updated versions.
*
* # API Overview
*
* The API has several parts, and most macros have a few variations.
* There are APIs for declaring aligned fields/variables, optimization
* hints, and run-time alignment checks.
*
* Briefly, macros ending with "_TO" take numeric values and are great
* when you know the value you would like to use. Macros ending with
* "_LIKE", on the other hand, accept a type and are used when you want
* to use the alignment of a type instead of hardcoding a value.
*
* Documentation for each section of the API is inline.
*
* True to form, MSVC is the main problem and imposes several
* limitations on the effectiveness of the APIs. Detailed descriptions
* of the limitations of each macro are inline, but in general:
*
* * On C11+ or C++11+ code written using this API will work. The
* ASSUME macros may or may not generate a hint to the compiler, but
* that is only an optimization issue and will not actually cause
* failures.
* * If you're using pretty much any compiler other than MSVC,
* everything should basically work as well as in C11/C++11.
*/
#if !defined(SIMDE_ALIGN_H)
#define SIMDE_ALIGN_H
#include "hedley.h"
/* I know this seems a little silly, but some non-hosted compilers
* don't have stddef.h, so we try to accomodate them. */
#if !defined(SIMDE_ALIGN_SIZE_T_)
#if defined(__SIZE_TYPE__)
#define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__
#elif defined(__SIZE_T_TYPE__)
#define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__
#elif defined(__cplusplus)
#include <cstddef>
#define SIMDE_ALIGN_SIZE_T_ size_t
#else
#include <stddef.h>
#define SIMDE_ALIGN_SIZE_T_ size_t
#endif
#endif
#if !defined(SIMDE_ALIGN_INTPTR_T_)
#if defined(__INTPTR_TYPE__)
#define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__
#elif defined(__PTRDIFF_TYPE__)
#define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__
#elif defined(__PTRDIFF_T_TYPE__)
#define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__
#elif defined(__cplusplus)
#include <cstddef>
#define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t
#else
#include <stddef.h>
#define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t
#endif
#endif
#if defined(SIMDE_ALIGN_DEBUG)
#if defined(__cplusplus)
#include <cstdio>
#else
#include <stdio.h>
#endif
#endif
/* SIMDE_ALIGN_OF(Type)
*
* The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or
* __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler.
* It isn't defined everywhere (only when the compiler has some alignof-
* like feature we can use to implement it), but it should work in most
* modern compilers, as well as C11 and C++11.
*
* If we can't find an implementation for SIMDE_ALIGN_OF then the macro
* will not be defined, so if you can handle that situation sensibly
* you may need to sprinkle some ifdefs into your code.
*/
#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \
(0 && HEDLEY_HAS_FEATURE(c_alignof))
#define SIMDE_ALIGN_OF(Type) _Alignof(Type)
#elif (defined(__cplusplus) && (__cplusplus >= 201103L)) || \
(0 && HEDLEY_HAS_FEATURE(cxx_alignof))
#define SIMDE_ALIGN_OF(Type) alignof(Type)
#elif HEDLEY_GCC_VERSION_CHECK(2, 95, 0) || \
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
HEDLEY_SUNPRO_VERSION_CHECK(5, 13, 0) || \
HEDLEY_TINYC_VERSION_CHECK(0, 9, 24) || \
HEDLEY_PGI_VERSION_CHECK(19, 10, 0) || \
HEDLEY_CRAY_VERSION_CHECK(10, 0, 0) || \
HEDLEY_TI_ARMCL_VERSION_CHECK(16, 9, 0) || \
HEDLEY_TI_CL2000_VERSION_CHECK(16, 9, 0) || \
HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) || \
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
HEDLEY_TI_CL430_VERSION_CHECK(16, 9, 0) || \
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 3, 2) || defined(__IBM__ALIGNOF__) || \
defined(__clang__)
#define SIMDE_ALIGN_OF(Type) __alignof__(Type)
#elif HEDLEY_IAR_VERSION_CHECK(8, 40, 0)
#define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type)
#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0)
/* Probably goes back much further, but MS takes down their old docs.
* If you can verify that this works in earlier versions please let
* me know! */
#define SIMDE_ALIGN_OF(Type) __alignof(Type)
#endif
/* SIMDE_ALIGN_MAXIMUM:
*
* This is the maximum alignment that the compiler supports. You can
* define the value prior to including SIMDe if necessary, but in that
* case *please* submit an issue so we can add the platform to the
* detection code.
*
* Most compilers are okay with types which are aligned beyond what
* they think is the maximum, as long as the alignment is a power
* of two. MSVC is the exception (of course), so we need to cap the
* alignment requests at values that the implementation supports.
*
* XL C/C++ will accept values larger than 16 (which is the alignment
* of an AltiVec vector), but will not reliably align to the larger
* value, so so we cap the value at 16 there.
*
* If the compiler accepts any power-of-two value within reason then
* this macro should be left undefined, and the SIMDE_ALIGN_CAP
* macro will just return the value passed to it. */
#if !defined(SIMDE_ALIGN_MAXIMUM)
#if defined(HEDLEY_MSVC_VERSION)
#if defined(_M_IX86) || defined(_M_AMD64)
#if HEDLEY_MSVC_VERSION_CHECK(19, 14, 0)
#define SIMDE_ALIGN_PLATFORM_MAXIMUM 64
#elif HEDLEY_MSVC_VERSION_CHECK(16, 0, 0)
/* VS 2010 is really a guess based on Wikipedia; if anyone can
* test with old VS versions I'd really appreciate it. */
#define SIMDE_ALIGN_PLATFORM_MAXIMUM 32
#else
#define SIMDE_ALIGN_PLATFORM_MAXIMUM 16
#endif
#elif defined(_M_ARM) || defined(_M_ARM64)
#define SIMDE_ALIGN_PLATFORM_MAXIMUM 8
#endif
#elif defined(HEDLEY_IBM_VERSION)
#define SIMDE_ALIGN_PLATFORM_MAXIMUM 16
#endif
#endif
/* You can mostly ignore these; they're intended for internal use.
* If you do need to use them please let me know; if they fulfill
* a common use case I'll probably drop the trailing underscore
* and make them part of the public API. */
#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM)
#if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64
#define SIMDE_ALIGN_64_ 64
#define SIMDE_ALIGN_32_ 32
#define SIMDE_ALIGN_16_ 16
#define SIMDE_ALIGN_8_ 8
#elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32
#define SIMDE_ALIGN_64_ 32
#define SIMDE_ALIGN_32_ 32
#define SIMDE_ALIGN_16_ 16
#define SIMDE_ALIGN_8_ 8
#elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16
#define SIMDE_ALIGN_64_ 16
#define SIMDE_ALIGN_32_ 16
#define SIMDE_ALIGN_16_ 16
#define SIMDE_ALIGN_8_ 8
#elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8
#define SIMDE_ALIGN_64_ 8
#define SIMDE_ALIGN_32_ 8
#define SIMDE_ALIGN_16_ 8
#define SIMDE_ALIGN_8_ 8
#else
#error Max alignment expected to be >= 8
#endif
#else
#define SIMDE_ALIGN_64_ 64
#define SIMDE_ALIGN_32_ 32
#define SIMDE_ALIGN_16_ 16
#define SIMDE_ALIGN_8_ 8
#endif
/**
* SIMDE_ALIGN_CAP(Alignment)
*
* Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM.
*/
#if defined(SIMDE_ALIGN_MAXIMUM)
#define SIMDE_ALIGN_CAP(Alignment) \
(((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) \
? (Alignment) \
: (SIMDE_ALIGN_PLATFORM_MAXIMUM))
#else
#define SIMDE_ALIGN_CAP(Alignment) (Alignment)
#endif
/* SIMDE_ALIGN_TO(Alignment)
*
* SIMDE_ALIGN_TO is used to declare types or variables. It basically
* maps to the align attribute in most compilers, the align declspec
* in MSVC, or _Alignas/alignas in C11/C++11.
*
* Example:
*
* struct i32x4 {
* SIMDE_ALIGN_TO(16) int32_t values[4];
* }
*
* Limitations:
*
* MSVC requires that the Alignment parameter be numeric; you can't do
* something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is
* unfortunate because that's really how the LIKE macros are
* implemented, and I am not aware of a way to get anything like this
* to work without using the C11/C++11 keywords.
*
* It also means that we can't use SIMDE_ALIGN_CAP to limit the
* alignment to the value specified, which MSVC also requires, so on
* MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead.
* They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would,
* but should be safe to use on MSVC.
*
* All this is to say that, if you want your code to work on MSVC, you
* should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of
* SIMDE_ALIGN_TO(8/16/32/64).
*/
#if HEDLEY_HAS_ATTRIBUTE(aligned) || HEDLEY_GCC_VERSION_CHECK(2, 95, 0) || \
HEDLEY_CRAY_VERSION_CHECK(8, 4, 0) || \
HEDLEY_IBM_VERSION_CHECK(11, 1, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
HEDLEY_PGI_VERSION_CHECK(19, 4, 0) || \
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
HEDLEY_TINYC_VERSION_CHECK(0, 9, 24) || \
HEDLEY_TI_ARMCL_VERSION_CHECK(16, 9, 0) || \
HEDLEY_TI_CL2000_VERSION_CHECK(16, 9, 0) || \
HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) || \
HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \
HEDLEY_TI_CL430_VERSION_CHECK(16, 9, 0) || \
HEDLEY_TI_CLPRU_VERSION_CHECK(2, 3, 2)
#define SIMDE_ALIGN_TO(Alignment) \
__attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment))))
#elif (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L))
#define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment))
#elif (defined(__cplusplus) && (__cplusplus >= 201103L))
#define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment))
#elif defined(HEDLEY_MSVC_VERSION)
#define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment))
/* Unfortunately MSVC can't handle __declspec(align(__alignof(Type)));
* the alignment passed to the declspec has to be an integer. */
#define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE
#endif
#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_)
#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_)
#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_)
#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_)
/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment)
*
* SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's
* std::assume_aligned, or __builtin_assume_aligned. It tells the
* compiler to assume that the provided pointer is aligned to an
* `Alignment`-byte boundary.
*
* If you define SIMDE_ALIGN_DEBUG prior to including this header then
* SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't
* integrate with NDEBUG in this header, but it may be a good idea to
* put something like this in your code:
*
* #if !defined(NDEBUG)
* #define SIMDE_ALIGN_DEBUG
* #endif
* #include <.../simde-align.h>
*/
#if HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \
HEDLEY_GCC_VERSION_CHECK(4, 7, 0)
#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \
HEDLEY_REINTERPRET_CAST( \
__typeof__(Pointer), \
__builtin_assume_aligned( \
HEDLEY_CONST_CAST( \
void *, HEDLEY_REINTERPRET_CAST(const void *, \
Pointer)), \
Alignment))
#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \
(__extension__({ \
__typeof__(v) simde_assume_aligned_t_ = (Pointer); \
__assume_aligned(simde_assume_aligned_t_, Alignment); \
simde_assume_aligned_t_; \
}))
#elif defined(__cplusplus) && (__cplusplus > 201703L)
#include <memory>
#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \
std::assume_aligned<Alignment>(Pointer)
#else
#if defined(__cplusplus)
template<typename T>
HEDLEY_ALWAYS_INLINE static T *
simde_align_assume_to_unchecked(T *ptr, const size_t alignment)
#else
HEDLEY_ALWAYS_INLINE static void *
simde_align_assume_to_unchecked(void *ptr, const size_t alignment)
#endif
{
HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) %
SIMDE_ALIGN_CAP(alignment)) == 0);
return ptr;
}
#if defined(__cplusplus)
#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \
simde_align_assume_to_unchecked((Pointer), (Alignment))
#else
#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \
simde_align_assume_to_unchecked( \
HEDLEY_CONST_CAST(void *, HEDLEY_REINTERPRET_CAST( \
const void *, Pointer)), \
(Alignment))
#endif
#endif
#if !defined(SIMDE_ALIGN_DEBUG)
#define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) \
SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment)
#else
#include <stdio.h>
#if defined(__cplusplus)
template<typename T>
static HEDLEY_ALWAYS_INLINE T *
simde_align_assume_to_checked_uncapped(T *ptr, const size_t alignment,
const char *file, int line,
const char *ptrname)
#else
static HEDLEY_ALWAYS_INLINE void *
simde_align_assume_to_checked_uncapped(void *ptr, const size_t alignment,
const char *file, int line,
const char *ptrname)
#endif
{
if (HEDLEY_UNLIKELY(
(HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) %
HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_,
SIMDE_ALIGN_CAP(alignment))) != 0)) {
fprintf(stderr,
"%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n",
file, line, ptrname,
HEDLEY_REINTERPRET_CAST(const void *, ptr),
HEDLEY_STATIC_CAST(unsigned int,
SIMDE_ALIGN_CAP(alignment)),
HEDLEY_STATIC_CAST(
unsigned int,
HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_,
(ptr)) %
HEDLEY_STATIC_CAST(
SIMDE_ALIGN_INTPTR_T_,
SIMDE_ALIGN_CAP(alignment))));
}
return ptr;
}
#if defined(__cplusplus)
#define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) \
simde_align_assume_to_checked_uncapped((Pointer), (Alignment), \
__FILE__, __LINE__, #Pointer)
#else
#define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) \
simde_align_assume_to_checked_uncapped( \
HEDLEY_CONST_CAST(void *, HEDLEY_REINTERPRET_CAST( \
const void *, Pointer)), \
(Alignment), __FILE__, __LINE__, #Pointer)
#endif
#endif
/* SIMDE_ALIGN_LIKE(Type)
* SIMDE_ALIGN_LIKE_#(Type)
*
* The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros
* except instead of an integer they take a type; basically, it's just
* a more convenient way to do something like:
*
* SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type))
*
* The versions with a numeric suffix will fall back on using a numeric
* value in the event we can't use SIMDE_ALIGN_OF(Type). This is
* mainly for MSVC, where __declspec(align()) can't handle anything
* other than hard-coded numeric values.
*/
#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && \
!defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE)
#define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type))
#define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type)
#define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type)
#define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type)
#define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type)
#else
#define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64
#define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32
#define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16
#define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8
#endif
/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type)
*
* Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a
* type instead of a numeric value. */
#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO)
#define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) \
SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type))
#endif
/* SIMDE_ALIGN_CAST(Type, Pointer)
*
* SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try
* to silence warnings that some compilers may produce if you try
* to assign to a type with increased alignment requirements.
*
* Note that it does *not* actually attempt to tell the compiler that
* the pointer is aligned like the destination should be; that's the
* job of the next macro. This macro is necessary for stupid APIs
* like _mm_loadu_si128 where the input is a __m128i* but the function
* is specifically for data which isn't necessarily aligned to
* _Alignof(__m128i).
*/
#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || \
HEDLEY_GCC_VERSION_CHECK(3, 4, 0)
#define SIMDE_ALIGN_CAST(Type, Pointer) \
(__extension__({ \
HEDLEY_DIAGNOSTIC_PUSH \
_Pragma("GCC diagnostic ignored \"-Wcast-align\"") \
Type simde_r_ = \
HEDLEY_REINTERPRET_CAST(Type, Pointer); \
HEDLEY_DIAGNOSTIC_POP \
simde_r_; \
}))
#else
#define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer)
#endif
/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer)
*
* This is sort of like a combination of a reinterpret_cast and a
* SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell
* the compiler that the pointer is aligned like the specified type
* and casts the pointer to the specified type while suppressing any
* warnings from the compiler about casting to a type with greater
* alignment requirements.
*/
#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) \
SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type)
#endif /* !defined(SIMDE_ALIGN_H) */

View File

@ -27,14 +27,14 @@
* an undefined macro being used (e.g., GCC with -Wundef).
*
* This was originally created for SIMDe
* <https://github.com/nemequ/simde> (hence the prefix), but this
* <https://github.com/simd-everywhere/simde> (hence the prefix), but this
* header has no dependencies and may be used anywhere. It is
* originally based on information from
* <https://sourceforge.net/p/predef/wiki/Architectures/>, though it
* has been enhanced with additional information.
*
* If you improve this file, or find a bug, please file the issue at
* <https://github.com/nemequ/simde/issues>. If you copy this into
* <https://github.com/simd-everywhere/simde/issues>. If you copy this into
* your project, even if you change the prefix, please keep the links
* to SIMDe intact so others know where to report issues, submit
* enhancements, and find the latest version. */
@ -70,7 +70,7 @@
/* AMD64 / x86_64
<https://en.wikipedia.org/wiki/X86-64> */
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || \
defined(__x86_64) || defined(_M_X66) || defined(_M_AMD64)
defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
#define SIMDE_ARCH_AMD64 1000
#endif
@ -125,6 +125,9 @@
#define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM
#endif
#endif
#if defined(__ARM_FEATURE_SVE)
#define SIMDE_ARCH_ARM_SVE
#endif
/* Blackfin
<https://en.wikipedia.org/wiki/Blackfin> */
@ -276,6 +279,12 @@
#define SIMDE_ARCH_X86_AVX 1
#endif
#endif
#if defined(__AVX512VP2INTERSECT__)
#define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1
#endif
#if defined(__AVX512VBMI__)
#define SIMDE_ARCH_X86_AVX512VBMI 1
#endif
#if defined(__AVX512BW__)
#define SIMDE_ARCH_X86_AVX512BW 1
#endif
@ -294,6 +303,12 @@
#if defined(__GFNI__)
#define SIMDE_ARCH_X86_GFNI 1
#endif
#if defined(__PCLMUL__)
#define SIMDE_ARCH_X86_PCLMUL 1
#endif
#if defined(__VPCLMULQDQ__)
#define SIMDE_ARCH_X86_VPCLMULQDQ 1
#endif
#endif
/* Itanium
@ -363,6 +378,10 @@
#define SIMDE_ARCH_MIPS_CHECK(version) (0)
#endif
#if defined(__mips_loongson_mmi)
#define SIMDE_ARCH_MIPS_LOONGSON_MMI 1
#endif
/* Matsushita MN10300
<https://en.wikipedia.org/wiki/MN103> */
#if defined(__MN10300__) || defined(__mn10300__)

View File

@ -30,62 +30,103 @@
#include "hedley.h"
#define SIMDE_VERSION_MAJOR 0
#define SIMDE_VERSION_MINOR 5
#define SIMDE_VERSION_MICRO 0
#define SIMDE_VERSION_MINOR 7
#define SIMDE_VERSION_MICRO 1
#define SIMDE_VERSION \
HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, \
SIMDE_VERSION_MICRO)
#include "simde-arch.h"
#include "simde-features.h"
#include "simde-diagnostic.h"
#include <stddef.h>
#include <stdint.h>
#if HEDLEY_HAS_ATTRIBUTE(aligned) || HEDLEY_GCC_VERSION_CHECK(2, 95, 0) || \
HEDLEY_CRAY_VERSION_CHECK(8, 4, 0) || \
HEDLEY_IBM_VERSION_CHECK(11, 1, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
HEDLEY_PGI_VERSION_CHECK(19, 4, 0) || \
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
HEDLEY_TINYC_VERSION_CHECK(0, 9, 24) || \
HEDLEY_TI_VERSION_CHECK(8, 1, 0)
#define SIMDE_ALIGN(alignment) __attribute__((aligned(alignment)))
#elif defined(_MSC_VER) && !(defined(_M_ARM) && !defined(_M_ARM64))
#define SIMDE_ALIGN(alignment) __declspec(align(alignment))
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
#define SIMDE_ALIGN(alignment) _Alignas(alignment)
#elif defined(__cplusplus) && (__cplusplus >= 201103L)
#define SIMDE_ALIGN(alignment) alignas(alignment)
#else
#define SIMDE_ALIGN(alignment)
#include "simde-detect-clang.h"
#include "simde-arch.h"
#include "simde-features.h"
#include "simde-diagnostic.h"
#include "simde-math.h"
#include "simde-constify.h"
#include "simde-align.h"
/* In some situations, SIMDe has to make large performance sacrifices
* for small increases in how faithfully it reproduces an API, but
* only a relatively small number of users will actually need the API
* to be completely accurate. The SIMDE_FAST_* options can be used to
* disable these trade-offs.
*
* They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or
* the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to
* enable some optimizations. Using -ffast-math and/or
* -ffinite-math-only will also enable the relevant options. If you
* don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */
/* Most programs avoid NaNs by never passing values which can result in
* a NaN; for example, if you only pass non-negative values to the sqrt
* functions, it won't generate a NaN. On some platforms, similar
* functions handle NaNs differently; for example, the _mm_min_ps SSE
* function will return 0.0 if you pass it (0.0, NaN), but the NEON
* vminq_f32 function will return NaN. Making them behave like one
* another is expensive; it requires generating a mask of all lanes
* with NaNs, then performing the operation (e.g., vminq_f32), then
* blending together the result with another vector using the mask.
*
* If you don't want SIMDe to worry about the differences between how
* NaNs are handled on the two platforms, define this (or pass
* -ffinite-math-only) */
#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && \
defined(__FAST_MATH__)
#define SIMDE_FAST_MATH
#endif
#if HEDLEY_GNUC_VERSION_CHECK(2, 95, 0) || \
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
HEDLEY_IBM_VERSION_CHECK(11, 1, 0)
#define SIMDE_ALIGN_OF(T) (__alignof__(T))
#elif (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \
HEDLEY_HAS_FEATURE(c11_alignof)
#define SIMDE_ALIGN_OF(T) (_Alignof(T))
#elif (defined(__cplusplus) && (__cplusplus >= 201103L)) || \
HEDLEY_HAS_FEATURE(cxx_alignof)
#define SIMDE_ALIGN_OF(T) (alignof(T))
#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS)
#if defined(SIMDE_FAST_MATH)
#define SIMDE_FAST_NANS
#elif defined(__FINITE_MATH_ONLY__)
#if __FINITE_MATH_ONLY__
#define SIMDE_FAST_NANS
#endif
#endif
#endif
#if defined(SIMDE_ALIGN_OF)
#define SIMDE_ALIGN_AS(N, T) SIMDE_ALIGN(SIMDE_ALIGN_OF(T))
#else
#define SIMDE_ALIGN_AS(N, T) SIMDE_ALIGN(N)
/* Many functions are defined as using the current rounding mode
* (i.e., the SIMD version of fegetround()) when converting to
* an integer. For example, _mm_cvtpd_epi32. Unfortunately,
* on some platforms (such as ARMv8+ where round-to-nearest is
* always used, regardless of the FPSCR register) this means we
* have to first query the current rounding mode, then choose
* the proper function (rounnd
, ceil, floor, etc.) */
#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && \
defined(SIMDE_FAST_MATH)
#define SIMDE_FAST_ROUND_MODE
#endif
#define simde_assert_aligned(alignment, val) \
simde_assert_int(HEDLEY_REINTERPRET_CAST( \
uintptr_t, HEDLEY_REINTERPRET_CAST( \
const void *, (val))) % \
(alignment), \
==, 0)
/* This controls how ties are rounded. For example, does 10.5 round to
* 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for
* example) doesn't support it and it must be emulated (which is rather
* slow). If you're okay with just using the default for whatever arch
* you're on, you should definitely define this.
*
* Note that we don't use this macro to avoid correct implementations
* in functions which are explicitly about rounding (such as vrnd* on
* NEON, _mm_round_* on x86, etc.); it is only used for code where
* rounding is a component in another function, and even then it isn't
* usually a problem since such functions will use the current rounding
* mode. */
#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && \
defined(SIMDE_FAST_MATH)
#define SIMDE_FAST_ROUND_TIES
#endif
/* For functions which convert from one type to another (mostly from
* floating point to integer types), sometimes we need to do a range
* check and potentially return a different result if the value
* falls outside that range. Skipping this check can provide a
* performance boost, at the expense of faithfulness to the API we're
* emulating. */
#if !defined(SIMDE_FAST_CONVERSION_RANGE) && \
!defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH)
#define SIMDE_FAST_CONVERSION_RANGE
#endif
#if HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \
HEDLEY_GCC_VERSION_CHECK(3, 4, 0) || \
@ -102,15 +143,21 @@
#define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated())
#endif
/* diagnose_if + __builtin_constant_p was broken until clang 9,
* which is when __FILE_NAME__ was added. */
#if defined(SIMDE_CHECK_CONSTANT_) && defined(__FILE_NAME__)
#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT)
#if defined(SIMDE_CHECK_CONSTANT_) && \
SIMDE_DETECT_CLANG_VERSION_CHECK(9, 0, 0) && \
(!defined(__apple_build_version__) || \
((__apple_build_version__ < 11000000) || \
(__apple_build_version__ >= 12000000)))
#define SIMDE_REQUIRE_CONSTANT(arg) \
HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), \
"`" #arg "' must be constant")
#else
#define SIMDE_REQUIRE_CONSTANT(arg)
#endif
#else
#define SIMDE_REQUIRE_CONSTANT(arg)
#endif
#define SIMDE_REQUIRE_RANGE(arg, min, max) \
HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), \
@ -120,39 +167,20 @@
SIMDE_REQUIRE_CONSTANT(arg) \
SIMDE_REQUIRE_RANGE(arg, min, max)
/* SIMDE_ASSUME_ALIGNED allows you to (try to) tell the compiler
* that a pointer is aligned to an `alignment`-byte boundary. */
#if HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \
HEDLEY_GCC_VERSION_CHECK(4, 7, 0)
#define SIMDE_ASSUME_ALIGNED(alignment, v) \
HEDLEY_REINTERPRET_CAST(__typeof__(v), \
__builtin_assume_aligned(v, alignment))
#elif defined(__cplusplus) && (__cplusplus > 201703L)
#define SIMDE_ASSUME_ALIGNED(alignment, v) std::assume_aligned<alignment>(v)
#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
#define SIMDE_ASSUME_ALIGNED(alignment, v) \
(__extension__({ \
__typeof__(v) simde_assume_aligned_t_ = (v); \
__assume_aligned(simde_assume_aligned_t_, alignment); \
simde_assume_aligned_t_; \
}))
#else
#define SIMDE_ASSUME_ALIGNED(alignment, v) (v)
#endif
/* SIMDE_ALIGN_CAST allows you to convert to a type with greater
* aligment requirements without triggering a warning. */
#if HEDLEY_HAS_WARNING("-Wcast-align")
#define SIMDE_ALIGN_CAST(T, v) \
(__extension__({ \
HEDLEY_DIAGNOSTIC_PUSH \
_Pragma("clang diagnostic ignored \"-Wcast-align\"") \
T simde_r_ = HEDLEY_REINTERPRET_CAST(T, v); \
HEDLEY_DIAGNOSTIC_POP \
simde_r_; \
}))
#else
#define SIMDE_ALIGN_CAST(T, v) HEDLEY_REINTERPRET_CAST(T, v)
/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty
* fallback if we can't find an implementation; instead we have to
* check if SIMDE_STATIC_ASSERT is defined before using it. */
#if !defined(__cplusplus) && \
((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \
HEDLEY_HAS_FEATURE(c_static_assert) || \
HEDLEY_GCC_VERSION_CHECK(6, 0, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || defined(_Static_assert))
#define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message)
#elif (defined(__cplusplus) && (__cplusplus >= 201103L)) || \
HEDLEY_MSVC_VERSION_CHECK(16, 0, 0)
#define SIMDE_STATIC_ASSERT(expr, message) \
HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \
static_assert(expr, message))
#endif
#if (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \
@ -170,6 +198,7 @@
* SIMDE_VECTOR - Declaring a vector.
* SIMDE_VECTOR_OPS - basic operations (binary and unary).
* SIMDE_VECTOR_NEGATE - negating a vector
* SIMDE_VECTOR_SCALAR - For binary operators, the second argument
can be a scalar, in which case the result is as if that scalar
had been broadcast to all lanes of a vector.
@ -182,11 +211,13 @@
#if HEDLEY_GCC_VERSION_CHECK(4, 8, 0)
#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
#define SIMDE_VECTOR_OPS
#define SIMDE_VECTOR_NEGATE
#define SIMDE_VECTOR_SCALAR
#define SIMDE_VECTOR_SUBSCRIPT
#elif HEDLEY_INTEL_VERSION_CHECK(16, 0, 0)
#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
#define SIMDE_VECTOR_OPS
#define SIMDE_VECTOR_NEGATE
/* ICC only supports SIMDE_VECTOR_SCALAR for constants */
#define SIMDE_VECTOR_SUBSCRIPT
#elif HEDLEY_GCC_VERSION_CHECK(4, 1, 0) || HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
@ -197,8 +228,9 @@
#elif HEDLEY_HAS_ATTRIBUTE(vector_size)
#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size)))
#define SIMDE_VECTOR_OPS
#define SIMDE_VECTOR_NEGATE
#define SIMDE_VECTOR_SUBSCRIPT
#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) /* clang 4.0 */
#if SIMDE_DETECT_CLANG_VERSION_CHECK(5, 0, 0)
#define SIMDE_VECTOR_SCALAR
#endif
#endif
@ -281,27 +313,34 @@ HEDLEY_DIAGNOSTIC_POP
#endif
#if defined(SIMDE_ENABLE_OPENMP)
#define SIMDE_VECTORIZE _Pragma("omp simd")
#define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd)
#define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l))
#if defined(__clang__)
#define SIMDE_VECTORIZE_REDUCTION(r) \
HEDLEY_DIAGNOSTIC_PUSH \
_Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \
HEDLEY_PRAGMA(omp simd reduction(r)) HEDLEY_DIAGNOSTIC_POP
#else
#define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r))
#endif
#define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a))
#elif defined(SIMDE_ENABLE_CILKPLUS)
#define SIMDE_VECTORIZE _Pragma("simd")
#define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd)
#define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
#define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
#define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a))
#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION)
#define SIMDE_VECTORIZE _Pragma("clang loop vectorize(enable)")
#define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable))
#define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l))
#define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE
#define SIMDE_VECTORIZE_ALIGNED(a)
#elif HEDLEY_GCC_VERSION_CHECK(4, 9, 0)
#define SIMDE_VECTORIZE _Pragma("GCC ivdep")
#define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep)
#define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE
#define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE
#define SIMDE_VECTORIZE_ALIGNED(a)
#elif HEDLEY_CRAY_VERSION_CHECK(5, 0, 0)
#define SIMDE_VECTORIZE _Pragma("_CRI ivdep")
#define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep)
#define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE
#define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE
#define SIMDE_VECTORIZE_ALIGNED(a)
@ -350,20 +389,10 @@ HEDLEY_DIAGNOSTIC_POP
HEDLEY_DIAGNOSTIC_POP
#endif
#if HEDLEY_HAS_WARNING("-Wpedantic")
#define SIMDE_DIAGNOSTIC_DISABLE_INT128 \
_Pragma("clang diagnostic ignored \"-Wpedantic\"")
#elif defined(HEDLEY_GCC_VERSION)
#define SIMDE_DIAGNOSTIC_DISABLE_INT128 \
_Pragma("GCC diagnostic ignored \"-Wpedantic\"")
#else
#define SIMDE_DIAGNOSTIC_DISABLE_INT128
#endif
#if defined(__SIZEOF_INT128__)
#define SIMDE_HAVE_INT128_
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DIAGNOSTIC_DISABLE_INT128
SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_
typedef __int128 simde_int128;
typedef unsigned __int128 simde_uint128;
HEDLEY_DIAGNOSTIC_POP
@ -488,39 +517,6 @@ typedef SIMDE_FLOAT32_TYPE simde_float32;
#endif
typedef SIMDE_FLOAT64_TYPE simde_float64;
/* Whether to assume that the compiler can auto-vectorize reasonably
well. This will cause SIMDe to attempt to compose vector
operations using more simple vector operations instead of minimize
serial work.
As an example, consider the _mm_add_ss(a, b) function from SSE,
which returns { a0 + b0, a1, a2, a3 }. This pattern is repeated
for other operations (sub, mul, etc.).
The naïve implementation would result in loading a0 and b0, adding
them into a temporary variable, then splicing that value into a new
vector with the remaining elements from a.
On platforms which support vectorization, it's generally faster to
simply perform the operation on the entire vector to avoid having
to move data between SIMD registers and non-SIMD registers.
Basically, instead of the temporary variable being (a0 + b0) it
would be a vector of (a + b), which is then combined with a to form
the result.
By default, SIMDe will prefer the pure-vector versions if we detect
a vector ISA extension, but this can be overridden by defining
SIMDE_NO_ASSUME_VECTORIZATION. You can also define
SIMDE_ASSUME_VECTORIZATION if you want to force SIMDe to use the
vectorized version. */
#if !defined(SIMDE_NO_ASSUME_VECTORIZATION) && \
!defined(SIMDE_ASSUME_VECTORIZATION)
#if defined(__SSE__) || defined(__ARM_NEON) || defined(__mips_msa) || \
defined(__ALTIVEC__) || defined(__wasm_simd128__)
#define SIMDE_ASSUME_VECTORIZATION
#endif
#endif
#if HEDLEY_HAS_WARNING("-Wbad-function-cast")
#define SIMDE_CONVERT_FTOI(T, v) \
HEDLEY_DIAGNOSTIC_PUSH \
@ -530,11 +526,18 @@ typedef SIMDE_FLOAT64_TYPE simde_float64;
#define SIMDE_CONVERT_FTOI(T, v) ((T)(v))
#endif
/* TODO: detect compilers which support this outside of C11 mode */
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
#define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) \
(_Generic((value), to : (value), from : ((to)(value))))
_Generic((value), to \
: (value), default \
: (_Generic((value), from \
: ((to)(value)))))
#define SIMDE_CHECKED_STATIC_CAST(to, from, value) \
(_Generic((value), to : (value), from : ((to)(value))))
_Generic((value), to \
: (value), default \
: (_Generic((value), from \
: ((to)(value)))))
#else
#define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) \
HEDLEY_REINTERPRET_CAST(to, value)
@ -564,7 +567,7 @@ typedef SIMDE_FLOAT64_TYPE simde_float64;
#if defined(__STDC_HOSTED__)
#define SIMDE_STDC_HOSTED __STDC_HOSTED__
#else
#if defined(HEDLEY_PGI_VERSION_CHECK) || defined(HEDLEY_MSVC_VERSION_CHECK)
#if defined(HEDLEY_PGI_VERSION) || defined(HEDLEY_MSVC_VERSION)
#define SIMDE_STDC_HOSTED 1
#else
#define SIMDE_STDC_HOSTED 0
@ -572,23 +575,34 @@ typedef SIMDE_FLOAT64_TYPE simde_float64;
#endif
/* Try to deal with environments without a standard library. */
#if !defined(simde_memcpy) || !defined(simde_memset)
#if !defined(SIMDE_NO_STRING_H) && defined(__has_include)
#if __has_include(<string.h>)
#include <string.h>
#if !defined(simde_memcpy)
#define simde_memcpy(dest, src, n) memcpy(dest, src, n)
#if HEDLEY_HAS_BUILTIN(__builtin_memcpy)
#define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n)
#endif
#endif
#if !defined(simde_memset)
#define simde_memset(s, c, n) memset(s, c, n)
#if HEDLEY_HAS_BUILTIN(__builtin_memset)
#define simde_memset(s, c, n) __builtin_memset(s, c, n)
#endif
#else
#endif
#if !defined(simde_memcmp)
#if HEDLEY_HAS_BUILTIN(__builtin_memcmp)
#define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n)
#endif
#endif
#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp)
#if !defined(SIMDE_NO_STRING_H)
#if defined(__has_include)
#if !__has_include(<string.h>)
#define SIMDE_NO_STRING_H
#endif
#elif (SIMDE_STDC_HOSTED == 0)
#define SIMDE_NO_STRING_H
#endif
#endif
#endif
#if !defined(simde_memcpy) || !defined(simde_memset)
#if !defined(SIMDE_NO_STRING_H) && (SIMDE_STDC_HOSTED == 1)
#if !defined(SIMDE_NO_STRING_H)
#include <string.h>
#if !defined(simde_memcpy)
#define simde_memcpy(dest, src, n) memcpy(dest, src, n)
@ -596,14 +610,8 @@ typedef SIMDE_FLOAT64_TYPE simde_float64;
#if !defined(simde_memset)
#define simde_memset(s, c, n) memset(s, c, n)
#endif
#elif (HEDLEY_HAS_BUILTIN(__builtin_memcpy) && \
HEDLEY_HAS_BUILTIN(__builtin_memset)) || \
HEDLEY_GCC_VERSION_CHECK(4, 2, 0)
#if !defined(simde_memcpy)
#define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n)
#endif
#if !defined(simde_memset)
#define simde_memset(s, c, n) __builtin_memset(s, c, n)
#if !defined(simde_memcmp)
#define simde_memcmp(s1, s2, n) memcmp(s1, s2, n)
#endif
#else
/* These are meant to be portable, not fast. If you're hitting them you
@ -637,10 +645,24 @@ void simde_memset_(void *s, int c, size_t len)
}
#define simde_memset(s, c, n) simde_memset_(s, c, n)
#endif
#endif /* !defined(SIMDE_NO_STRING_H) && (SIMDE_STDC_HOSTED == 1) */
#endif /* !defined(simde_memcpy) || !defined(simde_memset) */
#include "simde-math.h"
#if !defined(simde_memcmp)
SIMDE_FUCTION_ATTRIBUTES
int simde_memcmp_(const void *s1, const void *s2, size_t n)
{
unsigned char *s1_ = HEDLEY_STATIC_CAST(unsigned char *, s1);
unsigned char *s2_ = HEDLEY_STATIC_CAST(unsigned char *, s2);
for (size_t i = 0; i < len; i++) {
if (s1_[i] != s2_[i]) {
return (int)(s1_[i] - s2_[i]);
}
}
return 0;
}
#define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n)
#endif
#endif
#endif
#if defined(FE_ALL_EXCEPT)
#define SIMDE_HAVE_FENV_H
@ -682,6 +704,105 @@ void simde_memset_(void *s, int c, size_t len)
#include "check.h"
/* GCC/clang have a bunch of functionality in builtins which we would
* like to access, but the suffixes indicate whether the operate on
* int, long, or long long, not fixed width types (e.g., int32_t).
* we use these macros to attempt to map from fixed-width to the
* names GCC uses. Note that you should still cast the input(s) and
* return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if
* types are the same size they may not be compatible according to the
* compiler. For example, on x86 long and long lonsg are generally
* both 64 bits, but platforms vary on whether an int64_t is mapped
* to a long or long long. */
#include <limits.h>
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_
#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN)
#define SIMDE_BUILTIN_SUFFIX_8_
#define SIMDE_BUILTIN_TYPE_8_ int
#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN)
#define SIMDE_BUILTIN_SUFFIX_8_ l
#define SIMDE_BUILTIN_TYPE_8_ long
#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN)
#define SIMDE_BUILTIN_SUFFIX_8_ ll
#define SIMDE_BUILTIN_TYPE_8_ long long
#endif
#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN)
#define SIMDE_BUILTIN_SUFFIX_16_
#define SIMDE_BUILTIN_TYPE_16_ int
#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN)
#define SIMDE_BUILTIN_SUFFIX_16_ l
#define SIMDE_BUILTIN_TYPE_16_ long
#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN)
#define SIMDE_BUILTIN_SUFFIX_16_ ll
#define SIMDE_BUILTIN_TYPE_16_ long long
#endif
#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN)
#define SIMDE_BUILTIN_SUFFIX_32_
#define SIMDE_BUILTIN_TYPE_32_ int
#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN)
#define SIMDE_BUILTIN_SUFFIX_32_ l
#define SIMDE_BUILTIN_TYPE_32_ long
#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN)
#define SIMDE_BUILTIN_SUFFIX_32_ ll
#define SIMDE_BUILTIN_TYPE_32_ long long
#endif
#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN)
#define SIMDE_BUILTIN_SUFFIX_64_
#define SIMDE_BUILTIN_TYPE_64_ int
#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN)
#define SIMDE_BUILTIN_SUFFIX_64_ l
#define SIMDE_BUILTIN_TYPE_64_ long
#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN)
#define SIMDE_BUILTIN_SUFFIX_64_ ll
#define SIMDE_BUILTIN_TYPE_64_ long long
#endif
#if defined(SIMDE_BUILTIN_SUFFIX_8_)
#define SIMDE_BUILTIN_8_(name) \
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)
#define SIMDE_BUILTIN_HAS_8_(name) \
HEDLEY_HAS_BUILTIN( \
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_))
#else
#define SIMDE_BUILTIN_HAS_8_(name) 0
#endif
#if defined(SIMDE_BUILTIN_SUFFIX_16_)
#define SIMDE_BUILTIN_16_(name) \
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)
#define SIMDE_BUILTIN_HAS_16_(name) \
HEDLEY_HAS_BUILTIN( \
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_))
#else
#define SIMDE_BUILTIN_HAS_16_(name) 0
#endif
#if defined(SIMDE_BUILTIN_SUFFIX_32_)
#define SIMDE_BUILTIN_32_(name) \
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)
#define SIMDE_BUILTIN_HAS_32_(name) \
HEDLEY_HAS_BUILTIN( \
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_))
#else
#define SIMDE_BUILTIN_HAS_32_(name) 0
#endif
#if defined(SIMDE_BUILTIN_SUFFIX_64_)
#define SIMDE_BUILTIN_64_(name) \
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)
#define SIMDE_BUILTIN_HAS_64_(name) \
HEDLEY_HAS_BUILTIN( \
HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_))
#else
#define SIMDE_BUILTIN_HAS_64_(name) 0
#endif
HEDLEY_DIAGNOSTIC_POP
/* Sometimes we run into problems with specific versions of compilers
which make the native versions unusable for us. Often this is due
to missing functions, sometimes buggy implementations, etc. These
@ -712,29 +833,75 @@ void simde_memset_(void *s, int c, size_t len)
#if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)
#define SIMDE_BUG_GCC_94482
#endif
#if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || \
defined(SIMDE_ARCH_SYSTEMZ)
#define SIMDE_BUG_GCC_53784
#endif
#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)
#if HEDLEY_GCC_VERSION_CHECK(4, 3, 0) /* -Wsign-conversion */
#define SIMDE_BUG_GCC_95144
#endif
#endif
#if !HEDLEY_GCC_VERSION_CHECK(9, 4, 0) && defined(SIMDE_ARCH_AARCH64)
#define SIMDE_BUG_GCC_94488
#endif
#if defined(SIMDE_ARCH_POWER)
#if defined(SIMDE_ARCH_ARM)
#define SIMDE_BUG_GCC_95399
#define SIMDE_BUG_GCC_95471
#elif defined(SIMDE_ARCH_POWER)
#define SIMDE_BUG_GCC_95227
#define SIMDE_BUG_GCC_95782
#elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)
#if !HEDLEY_GCC_VERSION_CHECK(10, 2, 0) && !defined(__OPTIMIZE__)
#define SIMDE_BUG_GCC_96174
#endif
#endif
#define SIMDE_BUG_GCC_95399
#elif defined(__clang__)
#if defined(SIMDE_ARCH_AARCH64)
#define SIMDE_BUG_CLANG_45541
#define SIMDE_BUG_CLANG_46844
#define SIMDE_BUG_CLANG_48257
#if SIMDE_DETECT_CLANG_VERSION_CHECK(10, 0, 0) && \
SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)
#define SIMDE_BUG_CLANG_BAD_VI64_OPS
#endif
#endif
#if defined(HEDLEY_EMSCRIPTEN_VERSION)
#define SIMDE_BUG_EMSCRIPTEN_MISSING_IMPL /* Placeholder for (as yet) unfiled issues. */
#define SIMDE_BUG_EMSCRIPTEN_5242
#if defined(SIMDE_ARCH_POWER)
#define SIMDE_BUG_CLANG_46770
#endif
#if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0) && \
!defined(__OPTIMIZE__)
#define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT
#endif
#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64)
#if HEDLEY_HAS_WARNING("-Wsign-conversion") && \
SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)
#define SIMDE_BUG_CLANG_45931
#endif
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)
#define SIMDE_BUG_CLANG_44589
#endif
#endif
#define SIMDE_BUG_CLANG_45959
#elif defined(HEDLEY_MSVC_VERSION)
#if defined(SIMDE_ARCH_X86)
#define SIMDE_BUG_MSVC_ROUND_EXTRACT
#endif
#elif defined(HEDLEY_INTEL_VERSION)
#define SIMDE_BUG_INTEL_857088
#endif
#endif
/* GCC and Clang both have the same issue:
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144
* https://bugs.llvm.org/show_bug.cgi?id=45931
* This is just an easy way to work around it.
*/
#if HEDLEY_HAS_WARNING("-Wsign-conversion") || HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
#if (HEDLEY_HAS_WARNING("-Wsign-conversion") && \
SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)) || \
HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
#define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) \
(__extension__({ \
HEDLEY_DIAGNOSTIC_PUSH \

View File

@ -0,0 +1,925 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
*/
/* Constify macros. For internal use only.
*
* These are used to make it possible to call a function which takes
* an Integer Constant Expression (ICE) using a compile time constant.
* Technically it would also be possible to use a value not trivially
* known by the compiler, but there would be a siginficant performance
* hit (a switch switch is used).
*
* The basic idea is pretty simple; we just emit a do while loop which
* contains a switch with a case for every possible value of the
* constant.
*
* As long as the value you pass to the function in constant, pretty
* much any copmiler shouldn't have a problem generating exactly the
* same code as if you had used an ICE.
*
* This is intended to be used in the SIMDe implementations of
* functions the compilers require to be an ICE, but the other benefit
* is that if we also disable the warnings from
* SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests
* to use non-ICE parameters
*/
#if !defined(SIMDE_CONSTIFY_H)
#define SIMDE_CONSTIFY_H
#include "simde-diagnostic.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_
SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_
#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \
do { \
switch (imm) { \
case 0: \
result = func_name(__VA_ARGS__, 0); \
break; \
case 1: \
result = func_name(__VA_ARGS__, 1); \
break; \
default: \
result = default_case; \
break; \
} \
} while (0)
#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \
do { \
switch (imm) { \
case 0: \
result = func_name(__VA_ARGS__, 0); \
break; \
case 1: \
result = func_name(__VA_ARGS__, 1); \
break; \
case 2: \
result = func_name(__VA_ARGS__, 2); \
break; \
case 3: \
result = func_name(__VA_ARGS__, 3); \
break; \
default: \
result = default_case; \
break; \
} \
} while (0)
#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \
do { \
switch (imm) { \
case 0: \
result = func_name(__VA_ARGS__, 0); \
break; \
case 1: \
result = func_name(__VA_ARGS__, 1); \
break; \
case 2: \
result = func_name(__VA_ARGS__, 2); \
break; \
case 3: \
result = func_name(__VA_ARGS__, 3); \
break; \
case 4: \
result = func_name(__VA_ARGS__, 4); \
break; \
case 5: \
result = func_name(__VA_ARGS__, 5); \
break; \
case 6: \
result = func_name(__VA_ARGS__, 6); \
break; \
case 7: \
result = func_name(__VA_ARGS__, 7); \
break; \
default: \
result = default_case; \
break; \
} \
} while (0)
#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \
do { \
switch (imm) { \
case 0: \
result = func_name(__VA_ARGS__, 0); \
break; \
case 1: \
result = func_name(__VA_ARGS__, 1); \
break; \
case 2: \
result = func_name(__VA_ARGS__, 2); \
break; \
case 3: \
result = func_name(__VA_ARGS__, 3); \
break; \
case 4: \
result = func_name(__VA_ARGS__, 4); \
break; \
case 5: \
result = func_name(__VA_ARGS__, 5); \
break; \
case 6: \
result = func_name(__VA_ARGS__, 6); \
break; \
case 7: \
result = func_name(__VA_ARGS__, 7); \
break; \
case 8: \
result = func_name(__VA_ARGS__, 8); \
break; \
case 9: \
result = func_name(__VA_ARGS__, 9); \
break; \
case 10: \
result = func_name(__VA_ARGS__, 10); \
break; \
case 11: \
result = func_name(__VA_ARGS__, 11); \
break; \
case 12: \
result = func_name(__VA_ARGS__, 12); \
break; \
case 13: \
result = func_name(__VA_ARGS__, 13); \
break; \
case 14: \
result = func_name(__VA_ARGS__, 14); \
break; \
case 15: \
result = func_name(__VA_ARGS__, 15); \
break; \
default: \
result = default_case; \
break; \
} \
} while (0)
#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \
do { \
switch (imm) { \
case 0: \
result = func_name(__VA_ARGS__, 0); \
break; \
case 1: \
result = func_name(__VA_ARGS__, 1); \
break; \
case 2: \
result = func_name(__VA_ARGS__, 2); \
break; \
case 3: \
result = func_name(__VA_ARGS__, 3); \
break; \
case 4: \
result = func_name(__VA_ARGS__, 4); \
break; \
case 5: \
result = func_name(__VA_ARGS__, 5); \
break; \
case 6: \
result = func_name(__VA_ARGS__, 6); \
break; \
case 7: \
result = func_name(__VA_ARGS__, 7); \
break; \
case 8: \
result = func_name(__VA_ARGS__, 8); \
break; \
case 9: \
result = func_name(__VA_ARGS__, 9); \
break; \
case 10: \
result = func_name(__VA_ARGS__, 10); \
break; \
case 11: \
result = func_name(__VA_ARGS__, 11); \
break; \
case 12: \
result = func_name(__VA_ARGS__, 12); \
break; \
case 13: \
result = func_name(__VA_ARGS__, 13); \
break; \
case 14: \
result = func_name(__VA_ARGS__, 14); \
break; \
case 15: \
result = func_name(__VA_ARGS__, 15); \
break; \
case 16: \
result = func_name(__VA_ARGS__, 16); \
break; \
case 17: \
result = func_name(__VA_ARGS__, 17); \
break; \
case 18: \
result = func_name(__VA_ARGS__, 18); \
break; \
case 19: \
result = func_name(__VA_ARGS__, 19); \
break; \
case 20: \
result = func_name(__VA_ARGS__, 20); \
break; \
case 21: \
result = func_name(__VA_ARGS__, 21); \
break; \
case 22: \
result = func_name(__VA_ARGS__, 22); \
break; \
case 23: \
result = func_name(__VA_ARGS__, 23); \
break; \
case 24: \
result = func_name(__VA_ARGS__, 24); \
break; \
case 25: \
result = func_name(__VA_ARGS__, 25); \
break; \
case 26: \
result = func_name(__VA_ARGS__, 26); \
break; \
case 27: \
result = func_name(__VA_ARGS__, 27); \
break; \
case 28: \
result = func_name(__VA_ARGS__, 28); \
break; \
case 29: \
result = func_name(__VA_ARGS__, 29); \
break; \
case 30: \
result = func_name(__VA_ARGS__, 30); \
break; \
case 31: \
result = func_name(__VA_ARGS__, 31); \
break; \
default: \
result = default_case; \
break; \
} \
} while (0)
#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \
do { \
switch (imm) { \
case 0: \
result = func_name(__VA_ARGS__, 0); \
break; \
case 1: \
result = func_name(__VA_ARGS__, 1); \
break; \
case 2: \
result = func_name(__VA_ARGS__, 2); \
break; \
case 3: \
result = func_name(__VA_ARGS__, 3); \
break; \
case 4: \
result = func_name(__VA_ARGS__, 4); \
break; \
case 5: \
result = func_name(__VA_ARGS__, 5); \
break; \
case 6: \
result = func_name(__VA_ARGS__, 6); \
break; \
case 7: \
result = func_name(__VA_ARGS__, 7); \
break; \
case 8: \
result = func_name(__VA_ARGS__, 8); \
break; \
case 9: \
result = func_name(__VA_ARGS__, 9); \
break; \
case 10: \
result = func_name(__VA_ARGS__, 10); \
break; \
case 11: \
result = func_name(__VA_ARGS__, 11); \
break; \
case 12: \
result = func_name(__VA_ARGS__, 12); \
break; \
case 13: \
result = func_name(__VA_ARGS__, 13); \
break; \
case 14: \
result = func_name(__VA_ARGS__, 14); \
break; \
case 15: \
result = func_name(__VA_ARGS__, 15); \
break; \
case 16: \
result = func_name(__VA_ARGS__, 16); \
break; \
case 17: \
result = func_name(__VA_ARGS__, 17); \
break; \
case 18: \
result = func_name(__VA_ARGS__, 18); \
break; \
case 19: \
result = func_name(__VA_ARGS__, 19); \
break; \
case 20: \
result = func_name(__VA_ARGS__, 20); \
break; \
case 21: \
result = func_name(__VA_ARGS__, 21); \
break; \
case 22: \
result = func_name(__VA_ARGS__, 22); \
break; \
case 23: \
result = func_name(__VA_ARGS__, 23); \
break; \
case 24: \
result = func_name(__VA_ARGS__, 24); \
break; \
case 25: \
result = func_name(__VA_ARGS__, 25); \
break; \
case 26: \
result = func_name(__VA_ARGS__, 26); \
break; \
case 27: \
result = func_name(__VA_ARGS__, 27); \
break; \
case 28: \
result = func_name(__VA_ARGS__, 28); \
break; \
case 29: \
result = func_name(__VA_ARGS__, 29); \
break; \
case 30: \
result = func_name(__VA_ARGS__, 30); \
break; \
case 31: \
result = func_name(__VA_ARGS__, 31); \
break; \
case 32: \
result = func_name(__VA_ARGS__, 32); \
break; \
case 33: \
result = func_name(__VA_ARGS__, 33); \
break; \
case 34: \
result = func_name(__VA_ARGS__, 34); \
break; \
case 35: \
result = func_name(__VA_ARGS__, 35); \
break; \
case 36: \
result = func_name(__VA_ARGS__, 36); \
break; \
case 37: \
result = func_name(__VA_ARGS__, 37); \
break; \
case 38: \
result = func_name(__VA_ARGS__, 38); \
break; \
case 39: \
result = func_name(__VA_ARGS__, 39); \
break; \
case 40: \
result = func_name(__VA_ARGS__, 40); \
break; \
case 41: \
result = func_name(__VA_ARGS__, 41); \
break; \
case 42: \
result = func_name(__VA_ARGS__, 42); \
break; \
case 43: \
result = func_name(__VA_ARGS__, 43); \
break; \
case 44: \
result = func_name(__VA_ARGS__, 44); \
break; \
case 45: \
result = func_name(__VA_ARGS__, 45); \
break; \
case 46: \
result = func_name(__VA_ARGS__, 46); \
break; \
case 47: \
result = func_name(__VA_ARGS__, 47); \
break; \
case 48: \
result = func_name(__VA_ARGS__, 48); \
break; \
case 49: \
result = func_name(__VA_ARGS__, 49); \
break; \
case 50: \
result = func_name(__VA_ARGS__, 50); \
break; \
case 51: \
result = func_name(__VA_ARGS__, 51); \
break; \
case 52: \
result = func_name(__VA_ARGS__, 52); \
break; \
case 53: \
result = func_name(__VA_ARGS__, 53); \
break; \
case 54: \
result = func_name(__VA_ARGS__, 54); \
break; \
case 55: \
result = func_name(__VA_ARGS__, 55); \
break; \
case 56: \
result = func_name(__VA_ARGS__, 56); \
break; \
case 57: \
result = func_name(__VA_ARGS__, 57); \
break; \
case 58: \
result = func_name(__VA_ARGS__, 58); \
break; \
case 59: \
result = func_name(__VA_ARGS__, 59); \
break; \
case 60: \
result = func_name(__VA_ARGS__, 60); \
break; \
case 61: \
result = func_name(__VA_ARGS__, 61); \
break; \
case 62: \
result = func_name(__VA_ARGS__, 62); \
break; \
case 63: \
result = func_name(__VA_ARGS__, 63); \
break; \
default: \
result = default_case; \
break; \
} \
} while (0)
#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \
do { \
switch (imm) { \
case 0: \
func_name(__VA_ARGS__, 0); \
break; \
case 1: \
func_name(__VA_ARGS__, 1); \
break; \
default: \
default_case; \
break; \
} \
} while (0)
#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \
do { \
switch (imm) { \
case 0: \
func_name(__VA_ARGS__, 0); \
break; \
case 1: \
func_name(__VA_ARGS__, 1); \
break; \
case 2: \
func_name(__VA_ARGS__, 2); \
break; \
case 3: \
func_name(__VA_ARGS__, 3); \
break; \
default: \
default_case; \
break; \
} \
} while (0)
#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \
do { \
switch (imm) { \
case 0: \
func_name(__VA_ARGS__, 0); \
break; \
case 1: \
func_name(__VA_ARGS__, 1); \
break; \
case 2: \
func_name(__VA_ARGS__, 2); \
break; \
case 3: \
func_name(__VA_ARGS__, 3); \
break; \
case 4: \
func_name(__VA_ARGS__, 4); \
break; \
case 5: \
func_name(__VA_ARGS__, 5); \
break; \
case 6: \
func_name(__VA_ARGS__, 6); \
break; \
case 7: \
func_name(__VA_ARGS__, 7); \
break; \
default: \
default_case; \
break; \
} \
} while (0)
#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \
do { \
switch (imm) { \
case 0: \
func_name(__VA_ARGS__, 0); \
break; \
case 1: \
func_name(__VA_ARGS__, 1); \
break; \
case 2: \
func_name(__VA_ARGS__, 2); \
break; \
case 3: \
func_name(__VA_ARGS__, 3); \
break; \
case 4: \
func_name(__VA_ARGS__, 4); \
break; \
case 5: \
func_name(__VA_ARGS__, 5); \
break; \
case 6: \
func_name(__VA_ARGS__, 6); \
break; \
case 7: \
func_name(__VA_ARGS__, 7); \
break; \
case 8: \
func_name(__VA_ARGS__, 8); \
break; \
case 9: \
func_name(__VA_ARGS__, 9); \
break; \
case 10: \
func_name(__VA_ARGS__, 10); \
break; \
case 11: \
func_name(__VA_ARGS__, 11); \
break; \
case 12: \
func_name(__VA_ARGS__, 12); \
break; \
case 13: \
func_name(__VA_ARGS__, 13); \
break; \
case 14: \
func_name(__VA_ARGS__, 14); \
break; \
case 15: \
func_name(__VA_ARGS__, 15); \
break; \
default: \
default_case; \
break; \
} \
} while (0)
#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \
do { \
switch (imm) { \
case 0: \
func_name(__VA_ARGS__, 0); \
break; \
case 1: \
func_name(__VA_ARGS__, 1); \
break; \
case 2: \
func_name(__VA_ARGS__, 2); \
break; \
case 3: \
func_name(__VA_ARGS__, 3); \
break; \
case 4: \
func_name(__VA_ARGS__, 4); \
break; \
case 5: \
func_name(__VA_ARGS__, 5); \
break; \
case 6: \
func_name(__VA_ARGS__, 6); \
break; \
case 7: \
func_name(__VA_ARGS__, 7); \
break; \
case 8: \
func_name(__VA_ARGS__, 8); \
break; \
case 9: \
func_name(__VA_ARGS__, 9); \
break; \
case 10: \
func_name(__VA_ARGS__, 10); \
break; \
case 11: \
func_name(__VA_ARGS__, 11); \
break; \
case 12: \
func_name(__VA_ARGS__, 12); \
break; \
case 13: \
func_name(__VA_ARGS__, 13); \
break; \
case 14: \
func_name(__VA_ARGS__, 14); \
break; \
case 15: \
func_name(__VA_ARGS__, 15); \
break; \
case 16: \
func_name(__VA_ARGS__, 16); \
break; \
case 17: \
func_name(__VA_ARGS__, 17); \
break; \
case 18: \
func_name(__VA_ARGS__, 18); \
break; \
case 19: \
func_name(__VA_ARGS__, 19); \
break; \
case 20: \
func_name(__VA_ARGS__, 20); \
break; \
case 21: \
func_name(__VA_ARGS__, 21); \
break; \
case 22: \
func_name(__VA_ARGS__, 22); \
break; \
case 23: \
func_name(__VA_ARGS__, 23); \
break; \
case 24: \
func_name(__VA_ARGS__, 24); \
break; \
case 25: \
func_name(__VA_ARGS__, 25); \
break; \
case 26: \
func_name(__VA_ARGS__, 26); \
break; \
case 27: \
func_name(__VA_ARGS__, 27); \
break; \
case 28: \
func_name(__VA_ARGS__, 28); \
break; \
case 29: \
func_name(__VA_ARGS__, 29); \
break; \
case 30: \
func_name(__VA_ARGS__, 30); \
break; \
case 31: \
func_name(__VA_ARGS__, 31); \
break; \
default: \
default_case; \
break; \
} \
} while (0)
#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \
do { \
switch (imm) { \
case 0: \
func_name(__VA_ARGS__, 0); \
break; \
case 1: \
func_name(__VA_ARGS__, 1); \
break; \
case 2: \
func_name(__VA_ARGS__, 2); \
break; \
case 3: \
func_name(__VA_ARGS__, 3); \
break; \
case 4: \
func_name(__VA_ARGS__, 4); \
break; \
case 5: \
func_name(__VA_ARGS__, 5); \
break; \
case 6: \
func_name(__VA_ARGS__, 6); \
break; \
case 7: \
func_name(__VA_ARGS__, 7); \
break; \
case 8: \
func_name(__VA_ARGS__, 8); \
break; \
case 9: \
func_name(__VA_ARGS__, 9); \
break; \
case 10: \
func_name(__VA_ARGS__, 10); \
break; \
case 11: \
func_name(__VA_ARGS__, 11); \
break; \
case 12: \
func_name(__VA_ARGS__, 12); \
break; \
case 13: \
func_name(__VA_ARGS__, 13); \
break; \
case 14: \
func_name(__VA_ARGS__, 14); \
break; \
case 15: \
func_name(__VA_ARGS__, 15); \
break; \
case 16: \
func_name(__VA_ARGS__, 16); \
break; \
case 17: \
func_name(__VA_ARGS__, 17); \
break; \
case 18: \
func_name(__VA_ARGS__, 18); \
break; \
case 19: \
func_name(__VA_ARGS__, 19); \
break; \
case 20: \
func_name(__VA_ARGS__, 20); \
break; \
case 21: \
func_name(__VA_ARGS__, 21); \
break; \
case 22: \
func_name(__VA_ARGS__, 22); \
break; \
case 23: \
func_name(__VA_ARGS__, 23); \
break; \
case 24: \
func_name(__VA_ARGS__, 24); \
break; \
case 25: \
func_name(__VA_ARGS__, 25); \
break; \
case 26: \
func_name(__VA_ARGS__, 26); \
break; \
case 27: \
func_name(__VA_ARGS__, 27); \
break; \
case 28: \
func_name(__VA_ARGS__, 28); \
break; \
case 29: \
func_name(__VA_ARGS__, 29); \
break; \
case 30: \
func_name(__VA_ARGS__, 30); \
break; \
case 31: \
func_name(__VA_ARGS__, 31); \
break; \
case 32: \
func_name(__VA_ARGS__, 32); \
break; \
case 33: \
func_name(__VA_ARGS__, 33); \
break; \
case 34: \
func_name(__VA_ARGS__, 34); \
break; \
case 35: \
func_name(__VA_ARGS__, 35); \
break; \
case 36: \
func_name(__VA_ARGS__, 36); \
break; \
case 37: \
func_name(__VA_ARGS__, 37); \
break; \
case 38: \
func_name(__VA_ARGS__, 38); \
break; \
case 39: \
func_name(__VA_ARGS__, 39); \
break; \
case 40: \
func_name(__VA_ARGS__, 40); \
break; \
case 41: \
func_name(__VA_ARGS__, 41); \
break; \
case 42: \
func_name(__VA_ARGS__, 42); \
break; \
case 43: \
func_name(__VA_ARGS__, 43); \
break; \
case 44: \
func_name(__VA_ARGS__, 44); \
break; \
case 45: \
func_name(__VA_ARGS__, 45); \
break; \
case 46: \
func_name(__VA_ARGS__, 46); \
break; \
case 47: \
func_name(__VA_ARGS__, 47); \
break; \
case 48: \
func_name(__VA_ARGS__, 48); \
break; \
case 49: \
func_name(__VA_ARGS__, 49); \
break; \
case 50: \
func_name(__VA_ARGS__, 50); \
break; \
case 51: \
func_name(__VA_ARGS__, 51); \
break; \
case 52: \
func_name(__VA_ARGS__, 52); \
break; \
case 53: \
func_name(__VA_ARGS__, 53); \
break; \
case 54: \
func_name(__VA_ARGS__, 54); \
break; \
case 55: \
func_name(__VA_ARGS__, 55); \
break; \
case 56: \
func_name(__VA_ARGS__, 56); \
break; \
case 57: \
func_name(__VA_ARGS__, 57); \
break; \
case 58: \
func_name(__VA_ARGS__, 58); \
break; \
case 59: \
func_name(__VA_ARGS__, 59); \
break; \
case 60: \
func_name(__VA_ARGS__, 60); \
break; \
case 61: \
func_name(__VA_ARGS__, 61); \
break; \
case 62: \
func_name(__VA_ARGS__, 62); \
break; \
case 63: \
func_name(__VA_ARGS__, 63); \
break; \
default: \
default_case; \
break; \
} \
} while (0)
HEDLEY_DIAGNOSTIC_POP
#endif

View File

@ -0,0 +1,114 @@
/* Detect Clang Version
* Created by Evan Nemerson <evan@nemerson.com>
*
* To the extent possible under law, the author(s) have dedicated all
* copyright and related and neighboring rights to this software to
* the public domain worldwide. This software is distributed without
* any warranty.
*
* For details, see <http://creativecommons.org/publicdomain/zero/1.0/>.
* SPDX-License-Identifier: CC0-1.0
*/
/* This file was originally part of SIMDe
* (<https://github.com/simd-everywhere/simde>). You're free to do with it as
* you please, but I do have a few small requests:
*
* * If you make improvements, please submit them back to SIMDe
* (at <https://github.com/simd-everywhere/simde/issues>) so others can
* benefit from them.
* * Please keep a link to SIMDe intact so people know where to submit
* improvements.
* * If you expose it publicly, please change the SIMDE_ prefix to
* something specific to your project.
*
* The version numbers clang exposes (in the ___clang_major__,
* __clang_minor__, and __clang_patchlevel__ macros) are unreliable.
* Vendors such as Apple will define these values to their version
* numbers; for example, "Apple Clang 4.0" is really clang 3.1, but
* __clang_major__ and __clang_minor__ are defined to 4 and 0
* respectively, instead of 3 and 1.
*
* The solution is *usually* to use clang's feature detection macros
* (<https://clang.llvm.org/docs/LanguageExtensions.html#feature-checking-macros>)
* to determine if the feature you're interested in is available. This
* generally works well, and it should probably be the first thing you
* try. Unfortunately, it's not possible to check for everything. In
* particular, compiler bugs.
*
* This file just uses the feature checking macros to detect features
* added in specific versions of clang to identify which version of
* clang the compiler is based on.
*
* Right now it only goes back to 3.6, but I'm happy to accept patches
* to go back further. And, of course, newer versions are welcome if
* they're not already present, and if you find a way to detect a point
* release that would be great, too!
*/
#if !defined(SIMDE_DETECT_CLANG_H)
#define SIMDE_DETECT_CLANG_H 1
/* Attempt to detect the upstream clang version number. I usually only
* worry about major version numbers (at least for 4.0+), but if you
* need more resolution I'm happy to accept patches that are able to
* detect minor versions as well. That said, you'll probably have a
* hard time with detection since AFAIK most minor releases don't add
* anything we can detect. */
#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION)
#if __has_warning("-Wformat-insufficient-args")
#define SIMDE_DETECT_CLANG_VERSION 120000
#elif __has_warning("-Wimplicit-const-int-float-conversion")
#define SIMDE_DETECT_CLANG_VERSION 110000
#elif __has_warning("-Wmisleading-indentation")
#define SIMDE_DETECT_CLANG_VERSION 100000
#elif defined(__FILE_NAME__)
#define SIMDE_DETECT_CLANG_VERSION 90000
#elif __has_warning("-Wextra-semi-stmt") || \
__has_builtin(__builtin_rotateleft32)
#define SIMDE_DETECT_CLANG_VERSION 80000
#elif __has_warning("-Wc++98-compat-extra-semi")
#define SIMDE_DETECT_CLANG_VERSION 70000
#elif __has_warning("-Wpragma-pack")
#define SIMDE_DETECT_CLANG_VERSION 60000
#elif __has_warning("-Wbitfield-enum-conversion")
#define SIMDE_DETECT_CLANG_VERSION 50000
#elif __has_attribute(diagnose_if)
#define SIMDE_DETECT_CLANG_VERSION 40000
#elif __has_warning("-Wcast-calling-convention")
#define SIMDE_DETECT_CLANG_VERSION 30900
#elif __has_warning("-WCL4")
#define SIMDE_DETECT_CLANG_VERSION 30800
#elif __has_warning("-WIndependentClass-attribute")
#define SIMDE_DETECT_CLANG_VERSION 30700
#elif __has_warning("-Wambiguous-ellipsis")
#define SIMDE_DETECT_CLANG_VERSION 30600
#else
#define SIMDE_DETECT_CLANG_VERSION 1
#endif
#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */
/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty
* straightforward; it returns true if the compiler is a derivative
* of clang >= the specified version.
*
* Since this file is often (primarily?) useful for working around bugs
* it is also helpful to have a macro which returns true if only if the
* compiler is a version of clang *older* than the specified version to
* make it a bit easier to ifdef regions to add code for older versions,
* such as pragmas to disable a specific warning. */
#if defined(SIMDE_DETECT_CLANG_VERSION)
#define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) \
(SIMDE_DETECT_CLANG_VERSION >= \
((major * 10000) + (minor * 1000) + (revision)))
#define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) \
(SIMDE_DETECT_CLANG_VERSION < \
((major * 10000) + (minor * 1000) + (revision)))
#else
#define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0)
#define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (1)
#endif
#endif /* !defined(SIMDE_DETECT_CLANG_H) */

View File

@ -45,8 +45,10 @@
*/
#if !defined(SIMDE_DIAGNOSTIC_H)
#define SIMDE_DIAGNOSTIC_H
#include "hedley.h"
#include "simde-detect-clang.h"
/* This is only to help us implement functions like _mm_undefined_ps. */
#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_)
@ -119,6 +121,9 @@
#define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_
#endif
/* MSVC emits a diagnostic when we call a function (like
* simde_mm_set_epi32) while initializing a struct. We currently do
* this a *lot* in the tests. */
#if defined(HEDLEY_MSVC_VERSION)
#define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \
__pragma(warning(disable : 4204))
@ -183,6 +188,32 @@
#define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_
#endif
/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro
* before we can access certain SIMD intrinsics, but this diagnostic
* warns about it being a reserved name. It is a reserved name, but
* it's reserved for the compiler and we are using it to convey
* information to the compiler.
*
* This is also used when enabling native aliases since we don't get to
* choose the macro names. */
#if HEDLEY_HAS_WARNING("-Wdouble-promotion")
#define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ \
_Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"")
#else
#define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_
#endif
/* clang 3.8 warns about the packed attribute being unnecessary when
* used in the _mm_loadu_* functions. That *may* be true for version
* 3.8, but for later versions it is crucial in order to make unaligned
* access safe. */
#if HEDLEY_HAS_WARNING("-Wpacked")
#define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ \
_Pragma("clang diagnostic ignored \"-Wpacked\"")
#else
#define SIMDE_DIAGNOSTIC_DISABLE_PACKED_
#endif
/* Triggered when assigning a float to a double implicitly. We use
* explicit casts in SIMDe, this is only used in the test suite. */
#if HEDLEY_HAS_WARNING("-Wdouble-promotion")
@ -194,7 +225,7 @@
/* Several compilers treat conformant array parameters as VLAs. We
* test to make sure we're in C mode (C++ doesn't support CAPs), and
* that the version of the standard supports CAPs. We also blacklist
* that the version of the standard supports CAPs. We also reject
* some buggy compilers like MSVC (the logic is in Hedley if you want
* to take a look), but with certain warnings enabled some compilers
* still like to emit a diagnostic. */
@ -221,6 +252,9 @@
#elif HEDLEY_GCC_VERSION_CHECK(3, 4, 0)
#define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ \
_Pragma("GCC diagnostic ignored \"-Wunused-function\"")
#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0) /* Likely goes back further */
#define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ \
__pragma(warning(disable : 4505))
#else
#define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_
#endif
@ -232,13 +266,63 @@
#define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_
#endif
/* https://github.com/nemequ/simde/issues/277 */
#if HEDLEY_HAS_WARNING("-Wpadded")
#define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ \
_Pragma("clang diagnostic ignored \"-Wpadded\"")
#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0) /* Likely goes back further */
#define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable : 4324))
#else
#define SIMDE_DIAGNOSTIC_DISABLE_PADDED_
#endif
#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant")
#define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ \
_Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"")
#else
#define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_
#endif
#if HEDLEY_HAS_WARNING("-Wold-style-cast")
#define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ \
_Pragma("clang diagnostic ignored \"-Wold-style-cast\"")
#else
#define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_
#endif
#if HEDLEY_HAS_WARNING("-Wcast-function-type") || \
HEDLEY_GCC_VERSION_CHECK(8, 0, 0)
#define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ \
_Pragma("GCC diagnostic ignored \"-Wcast-function-type\"")
#else
#define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_
#endif
/* clang will emit this warning when we use C99 extensions whan not in
* C99 mode, even though it does support this. In such cases we check
* the compiler and version first, so we know it's not a problem. */
#if HEDLEY_HAS_WARNING("-Wc99-extensions")
#define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ \
_Pragma("clang diagnostic ignored \"-Wc99-extensions\"")
#else
#define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_
#endif
/* https://github.com/simd-everywhere/simde/issues/277 */
#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4, 6, 0) && \
!HEDLEY_GCC_VERSION_CHECK(6, 0, 0) && defined(__cplusplus)
#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE \
!HEDLEY_GCC_VERSION_CHECK(6, 4, 0) && defined(__cplusplus)
#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \
_Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"")
#else
#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE
#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_
#endif
/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS
* to silence, but you have to do that before including anything and
* that would require reordering includes. */
#if defined(_MSC_VER)
#define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable : 4996))
#else
#define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_
#endif
/* Some compilers, such as clang, may use `long long` for 64-bit
@ -246,13 +330,104 @@
* -Wc++98-compat-pedantic which says 'long long' is incompatible with
* C++98. */
#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic")
#define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC \
#define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \
_Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"")
#else
#define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC
#define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_
#endif
/* Some problem as above */
#if HEDLEY_HAS_WARNING("-Wc++11-long-long")
#define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \
_Pragma("clang diagnostic ignored \"-Wc++11-long-long\"")
#else
#define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_
#endif
/* emscripten emits this whenever stdin/stdout/stderr is used in a
* macro. */
#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion")
#define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ \
_Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"")
#else
#define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_
#endif
/* Clang uses C11 generic selections to implement some AltiVec
* functions, which triggers this diagnostic when not compiling
* in C11 mode */
#if HEDLEY_HAS_WARNING("-Wc11-extensions")
#define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ \
_Pragma("clang diagnostic ignored \"-Wc11-extensions\"")
#else
#define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_
#endif
/* Clang sometimes triggers this warning in macros in the AltiVec and
* NEON headers, or due to missing functions. */
#if HEDLEY_HAS_WARNING("-Wvector-conversion")
#define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ \
_Pragma("clang diagnostic ignored \"-Wvector-conversion\"")
/* For NEON, the situation with -Wvector-conversion in clang < 10 is
* bad enough that we just disable the warning altogether. */
#if defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \
SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_
#endif
#else
#define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_
#endif
#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_)
#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_
#endif
/* SLEEF triggers this a *lot* in their headers */
#if HEDLEY_HAS_WARNING("-Wignored-qualifiers")
#define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ \
_Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"")
#elif HEDLEY_GCC_VERSION_CHECK(4, 3, 0)
#define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ \
_Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"")
#else
#define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_
#endif
/* GCC emits this under some circumstances when using __int128 */
#if HEDLEY_GCC_VERSION_CHECK(4, 8, 0)
#define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ \
_Pragma("GCC diagnostic ignored \"-Wpedantic\"")
#else
#define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_
#endif
/* MSVC doesn't like (__assume(0), code) and will warn about code being
* unreachable, but we want it there because not all compilers
* understand the unreachable macro and will complain if it is missing.
* I'm planning on adding a new macro to Hedley to handle this a bit
* more elegantly, but until then... */
#if defined(HEDLEY_MSVC_VERSION)
#define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable : 4702))
#else
#define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_
#endif
/* This is a false positive from GCC in a few places. */
#if HEDLEY_GCC_VERSION_CHECK(4, 7, 0)
#define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ \
_Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"")
#else
#define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_
#endif
#if defined(SIMDE_ENABLE_NATIVE_ALIASES)
#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \
SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_
#else
#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_
#endif
#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \
SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \
SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \
SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \
@ -264,7 +439,9 @@
SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \
SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ \
SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \
SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC \
SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE
SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \
SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \
SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \
SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_
#endif
#endif /* !defined(SIMDE_DIAGNOSTIC_H) */

View File

@ -32,6 +32,7 @@
#define SIMDE_FEATURES_H
#include "simde-arch.h"
#include "simde-diagnostic.h"
#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && \
!defined(SIMDE_NO_NATIVE)
@ -43,6 +44,28 @@
#define SIMDE_X86_AVX512F_NATIVE
#endif
#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && \
!defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && \
!defined(SIMDE_NO_NATIVE)
#if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT)
#define SIMDE_X86_AVX512VP2INTERSECT_NATIVE
#endif
#endif
#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && \
!defined(SIMDE_X86_AVX512F_NATIVE)
#define SIMDE_X86_AVX512F_NATIVE
#endif
#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && \
!defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
#if defined(SIMDE_ARCH_X86_AVX512VBMI)
#define SIMDE_X86_AVX512VBMI_NATIVE
#endif
#endif
#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE)
#define SIMDE_X86_AVX512F_NATIVE
#endif
#if !defined(SIMDE_X86_AVX512CD_NATIVE) && \
!defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
#if defined(SIMDE_ARCH_X86_AVX512CD)
@ -194,6 +217,20 @@
#endif
#endif
#if !defined(SIMDE_X86_PCLMUL_NATIVE) && \
!defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
#if defined(SIMDE_ARCH_X86_PCLMUL)
#define SIMDE_X86_PCLMUL_NATIVE
#endif
#endif
#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && \
!defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
#if defined(SIMDE_ARCH_X86_VPCLMULQDQ)
#define SIMDE_X86_VPCLMULQDQ_NATIVE
#endif
#endif
#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && \
!defined(SIMDE_NO_NATIVE)
#if defined(__INTEL_COMPILER)
@ -206,8 +243,7 @@
#pragma warning(disable : 4799)
#endif
#if defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) || \
defined(SIMDE_X86_SVML_NATIVE)
#if defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE)
#include <immintrin.h>
#elif defined(SIMDE_X86_SSE4_2_NATIVE)
#include <nmmintrin.h>
@ -243,7 +279,8 @@
#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \
!defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
#if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(80)
#if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(80) && \
(__ARM_NEON_FP & 0x02)
#define SIMDE_ARM_NEON_A32V8_NATIVE
#endif
#endif
@ -262,6 +299,14 @@
#include <arm_neon.h>
#endif
#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && \
!defined(SIMDE_NO_NATIVE)
#if defined(SIMDE_ARCH_ARM_SVE)
#define SIMDE_ARM_SVE_NATIVE
#include <arm_sve.h>
#endif
#endif
#if !defined(SIMDE_WASM_SIMD128_NATIVE) && \
!defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
#if defined(SIMDE_ARCH_WASM_SIMD128)
@ -270,7 +315,10 @@
#endif
#if defined(SIMDE_WASM_SIMD128_NATIVE)
#if !defined(__wasm_unimplemented_simd128__)
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_
#define __wasm_unimplemented_simd128__
HEDLEY_DIAGNOSTIC_POP
#endif
#include <wasm_simd128.h>
#endif
@ -326,15 +374,28 @@
#define SIMDE_POWER_ALTIVEC_P5_NATIVE
#endif
#endif
#if defined(SIMDE_POWER_ALTIVEC_P5_NATIVE)
/* stdbool.h conflicts with the bool in altivec.h */
#if defined(bool) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF_BOOL_)
#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
/* AltiVec conflicts with lots of stuff. The bool keyword conflicts
* with the bool keyword in C++ and the bool macro in C99+ (defined
* in stdbool.h). The vector keyword conflicts with std::vector in
* C++ if you are `using std;`.
*
* Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel`
* instead, but altivec.h will unconditionally define
* `vector`/`bool`/`pixel` so we need to work around that.
*
* Unfortunately this means that if your code uses AltiVec directly
* it may break. If this is the case you'll want to define
* `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even
* better, port your code to use the double-underscore versions. */
#if defined(bool)
#undef bool
#endif
#include <altivec.h>
/* GCC allows you to undefine these macros to prevent conflicts with
* standard types as they become context-sensitive keywords. */
#if defined(__cplusplus)
#if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF)
#if defined(vector)
#undef vector
#endif
@ -344,14 +405,146 @@
#if defined(bool)
#undef bool
#endif
#define SIMDE_POWER_ALTIVEC_VECTOR(T) vector T
#define SIMDE_POWER_ALTIVEC_PIXEL pixel
#define SIMDE_POWER_ALTIVEC_BOOL bool
#else
#endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */
/* Use these intsead of vector/pixel/bool in SIMDe. */
#define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T
#define SIMDE_POWER_ALTIVEC_PIXEL __pixel
#define SIMDE_POWER_ALTIVEC_BOOL __bool
#endif /* defined(__cplusplus) */
/* Re-define bool if we're using stdbool.h */
#if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && \
!defined(SIMDE_POWER_ALTIVEC_NO_UNDEF)
#define bool _Bool
#endif
#endif
#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && \
!defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && \
!defined(SIMDE_NO_NATIVE)
#if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI)
#define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1
#endif
#endif
#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
#include <loongson-mmiintrin.h>
#endif
/* This is used to determine whether or not to fall back on a vector
* function in an earlier ISA extensions, as well as whether
* we expected any attempts at vectorization to be fruitful or if we
* expect to always be running serial code. */
#if !defined(SIMDE_NATURAL_VECTOR_SIZE)
#if defined(SIMDE_X86_AVX512F_NATIVE)
#define SIMDE_NATURAL_VECTOR_SIZE (512)
#elif defined(SIMDE_X86_AVX_NATIVE)
#define SIMDE_NATURAL_VECTOR_SIZE (256)
#elif defined(SIMDE_X86_SSE_NATIVE) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \
defined(SIMDE_WASM_SIMD128_NATIVE) || \
defined(SIMDE_POWER_ALTIVEC_P5_NATIVE)
#define SIMDE_NATURAL_VECTOR_SIZE (128)
#endif
#if !defined(SIMDE_NATURAL_VECTOR_SIZE)
#define SIMDE_NATURAL_VECTOR_SIZE (0)
#endif
#endif
#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) \
((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x)))
#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) \
((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x)))
/* Native aliases */
#if defined(SIMDE_ENABLE_NATIVE_ALIASES)
#if !defined(SIMDE_X86_MMX_NATIVE)
#define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_SSE_NATIVE)
#define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_SSE2_NATIVE)
#define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_SSE3_NATIVE)
#define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_SSSE3_NATIVE)
#define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_SSE4_1_NATIVE)
#define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_SSE4_2_NATIVE)
#define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_AVX_NATIVE)
#define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_AVX2_NATIVE)
#define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_FMA_NATIVE)
#define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_AVX512F_NATIVE)
#define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_AVX512VL_NATIVE)
#define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_AVX512BW_NATIVE)
#define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_AVX512DQ_NATIVE)
#define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_AVX512CD_NATIVE)
#define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_GFNI_NATIVE)
#define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_PCLMUL_NATIVE)
#define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE)
#define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE)
#define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES
#endif
#endif
/* Are floating point values stored using IEEE 754? Knowing
* this at during preprocessing is a bit tricky, mostly because what
* we're curious about is how values are stored and not whether the
* implementation is fully conformant in terms of rounding, NaN
* handling, etc.
*
* For example, if you use -ffast-math or -Ofast on
* GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754
* support is not advertised (by defining __STDC_IEC_559__).
*
* However, what we care about is whether it is safe to assume that
* floating point values are stored in IEEE 754 format, in which case
* we can provide faster implementations of some functions.
*
* Luckily every vaugely modern architecture I'm aware of uses IEEE 754-
* so we just assume IEEE 754 for now. There is a test which verifies
* this, if that test fails sowewhere please let us know and we'll add
* an exception for that platform. Meanwhile, you can define
* SIMDE_NO_IEEE754_STORAGE. */
#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE)
#define SIMDE_IEEE754_STORAGE
#endif
#endif /* !defined(SIMDE_FEATURES_H) */

View File

@ -34,6 +34,58 @@
#include "hedley.h"
#include "simde-features.h"
#include <stdint.h>
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#include <arm_neon.h>
#endif
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
/* SLEEF support
* https://sleef.org/
*
* If you include <sleef.h> prior to including SIMDe, SIMDe will use
* SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to
* including SIMDe to force the issue.
*
* Note that SLEEF does requires linking to libsleef.
*
* By default, SIMDe will use the 1 ULP functions, but if you use
* SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is
* only the case for the simde_math_* functions; for code in other
* SIMDe headers which calls SLEEF directly we may use functions with
* greater error if the API we're implementing is less precise (for
* example, SVML guarantees 4 ULP, so we will generally use the 3.5
* ULP functions from SLEEF). */
#if !defined(SIMDE_MATH_SLEEF_DISABLE)
#if defined(__SLEEF_H__)
#define SIMDE_MATH_SLEEF_ENABLE
#endif
#endif
#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__)
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_
#include <sleef.h>
HEDLEY_DIAGNOSTIC_POP
#endif
#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__)
#if defined(SLEEF_VERSION_MAJOR)
#define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) \
(HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, \
SLEEF_VERSION_PATCHLEVEL) >= \
HEDLEY_VERSION_ENCODE(major, minor, patch))
#else
#define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) \
(HEDLEY_VERSION_ENCODE(3, 0, 0) >= \
HEDLEY_VERSION_ENCODE(major, minor, patch))
#endif
#else
#define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0)
#endif
#if defined(__has_builtin)
#define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func)
#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
@ -82,11 +134,35 @@ HEDLEY_DIAGNOSTIC_POP
#endif
#endif
#if !defined(__cplusplus)
/* If this is a problem we *might* be able to avoid including
* <complex.h> on some compilers (gcc, clang, and others which
* implement builtins like __builtin_cexpf). If you don't have
* a <complex.h> please file an issue and we'll take a look. */
/* Try to avoid including <complex> since it pulls in a *lot* of code. */
#if HEDLEY_HAS_BUILTIN(__builtin_creal) || \
HEDLEY_GCC_VERSION_CHECK(4, 7, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_
typedef __complex__ float simde_cfloat32;
typedef __complex__ double simde_cfloat64;
HEDLEY_DIAGNOSTIC_POP
#define SIMDE_MATH_CMPLX(x, y) \
(HEDLEY_STATIC_CAST(double, x) + \
HEDLEY_STATIC_CAST(double, y) * (__extension__ 1.0j))
#define SIMDE_MATH_CMPLXF(x, y) \
(HEDLEY_STATIC_CAST(float, x) + \
HEDLEY_STATIC_CAST(float, y) * (__extension__ 1.0fj))
#if !defined(simde_math_creal)
#define simde_math_crealf(z) __builtin_crealf(z)
#endif
#if !defined(simde_math_crealf)
#define simde_math_creal(z) __builtin_creal(z)
#endif
#if !defined(simde_math_cimag)
#define simde_math_cimagf(z) __builtin_cimagf(z)
#endif
#if !defined(simde_math_cimagf)
#define simde_math_cimag(z) __builtin_cimag(z)
#endif
#elif !defined(__cplusplus)
#include <complex.h>
#if !defined(HEDLEY_MSVC_VERSION)
@ -96,20 +172,14 @@ typedef double _Complex simde_cfloat64;
typedef _Fcomplex simde_cfloat32;
typedef _Dcomplex simde_cfloat64;
#endif
#if HEDLEY_HAS_BUILTIN(__builtin_complex) || \
HEDLEY_GCC_VERSION_CHECK(4, 7, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
#define SIMDE_MATH_CMPLX(x, y) __builtin_complex((double)(x), (double)(y))
#define SIMDE_MATH_CMPLXF(x, y) __builtin_complex((float)(x), (float)(y))
#elif defined(HEDLEY_MSVC_VERSION)
#if defined(HEDLEY_MSVC_VERSION)
#define SIMDE_MATH_CMPLX(x, y) ((simde_cfloat64){(x), (y)})
#define SIMDE_MATH_CMPLXF(x, y) ((simde_cfloat32){(x), (y)})
#elif defined(CMPLX) && defined(CMPLXF)
#define SIMDE_MATH_CMPLX(x, y) CMPLX(x, y)
#define SIMDE_MATH_CMPLXF(x, y) CMPLXF(x, y)
#else
/* CMPLX / CMPLXF are in C99, but these seem to be necessary in
* some compilers that aren't even MSVC. */
#define SIMDE_MATH_CMPLX(x, y) \
(HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * I)
#define SIMDE_MATH_CMPLXF(x, y) \
@ -117,38 +187,18 @@ typedef _Dcomplex simde_cfloat64;
#endif
#if !defined(simde_math_creal)
#if SIMDE_MATH_BUILTIN_LIBM(creal)
#define simde_math_creal(z) __builtin_creal(z)
#else
#define simde_math_creal(z) creal(z)
#endif
#endif
#if !defined(simde_math_crealf)
#if SIMDE_MATH_BUILTIN_LIBM(crealf)
#define simde_math_crealf(z) __builtin_crealf(z)
#else
#define simde_math_crealf(z) crealf(z)
#endif
#endif
#if !defined(simde_math_cimag)
#if SIMDE_MATH_BUILTIN_LIBM(cimag)
#define simde_math_cimag(z) __builtin_cimag(z)
#else
#define simde_math_cimag(z) cimag(z)
#endif
#endif
#if !defined(simde_math_cimagf)
#if SIMDE_MATH_BUILTIN_LIBM(cimagf)
#define simde_math_cimagf(z) __builtin_cimagf(z)
#else
#define simde_math_cimagf(z) cimagf(z)
#endif
#endif
#else
HEDLEY_DIAGNOSTIC_PUSH
#if defined(HEDLEY_MSVC_VERSION)
#pragma warning(disable : 4530)
@ -240,6 +290,26 @@ typedef std::complex<double> simde_cfloat64;
#endif
#endif
#if !defined(SIMDE_MATH_PI_OVER_180)
#define SIMDE_MATH_PI_OVER_180 \
0.0174532925199432957692369076848861271344287188854172545609719144
#endif
#if !defined(SIMDE_MATH_PI_OVER_180F)
#define SIMDE_MATH_PI_OVER_180F \
0.0174532925199432957692369076848861271344287188854172545609719144f
#endif
#if !defined(SIMDE_MATH_180_OVER_PI)
#define SIMDE_MATH_180_OVER_PI \
57.295779513082320876798154814105170332405472466564321549160243861
#endif
#if !defined(SIMDE_MATH_180_OVER_PIF)
#define SIMDE_MATH_180_OVER_PIF \
57.295779513082320876798154814105170332405472466564321549160243861f
#endif
#if !defined(SIMDE_MATH_FLT_MIN)
#if defined(FLT_MIN)
#define SIMDE_MATH_FLT_MIN FLT_MIN
@ -341,6 +411,36 @@ typedef std::complex<double> simde_cfloat64;
#endif
#endif
/*** Manipulation functions ***/
#if !defined(simde_math_nextafter)
#if (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && \
!defined(HEDLEY_IBM_VERSION)) || \
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
HEDLEY_GCC_VERSION_CHECK(3, 4, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
#define simde_math_nextafter(x, y) __builtin_nextafter(x, y)
#elif defined(SIMDE_MATH_HAVE_CMATH)
#define simde_math_nextafter(x, y) std::nextafter(x, y)
#elif defined(SIMDE_MATH_HAVE_MATH_H)
#define simde_math_nextafter(x, y) nextafter(x, y)
#endif
#endif
#if !defined(simde_math_nextafterf)
#if (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && \
!defined(HEDLEY_IBM_VERSION)) || \
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
HEDLEY_GCC_VERSION_CHECK(3, 4, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0)
#define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y)
#elif defined(SIMDE_MATH_HAVE_CMATH)
#define simde_math_nextafterf(x, y) std::nextafter(x, y)
#elif defined(SIMDE_MATH_HAVE_MATH_H)
#define simde_math_nextafterf(x, y) nextafterf(x, y)
#endif
#endif
/*** Functions from C99 ***/
#if !defined(simde_math_abs)
@ -353,13 +453,13 @@ typedef std::complex<double> simde_cfloat64;
#endif
#endif
#if !defined(simde_math_absf)
#if SIMDE_MATH_BUILTIN_LIBM(absf)
#define simde_math_absf(v) __builtin_absf(v)
#if !defined(simde_math_fabsf)
#if SIMDE_MATH_BUILTIN_LIBM(fabsf)
#define simde_math_fabsf(v) __builtin_fabsf(v)
#elif defined(SIMDE_MATH_HAVE_CMATH)
#define simde_math_absf(v) std::abs(v)
#define simde_math_fabsf(v) std::abs(v)
#elif defined(SIMDE_MATH_HAVE_MATH_H)
#define simde_math_absf(v) absf(v)
#define simde_math_fabsf(v) fabsf(v)
#endif
#endif
@ -574,7 +674,13 @@ typedef std::complex<double> simde_cfloat64;
#endif
#if !defined(simde_math_cosf)
#if SIMDE_MATH_BUILTIN_LIBM(cosf)
#if defined(SIMDE_MATH_SLEEF_ENABLE)
#if SIMDE_ACCURACY_PREFERENCE < 1
#define simde_math_cosf(v) Sleef_cosf_u35(v)
#else
#define simde_math_cosf(v) Sleef_cosf_u10(v)
#endif
#elif SIMDE_MATH_BUILTIN_LIBM(cosf)
#define simde_math_cosf(v) __builtin_cosf(v)
#elif defined(SIMDE_MATH_HAVE_CMATH)
#define simde_math_cosf(v) std::cos(v)
@ -755,6 +861,46 @@ typedef std::complex<double> simde_cfloat64;
#endif
#endif
#if !defined(simde_math_fma)
#if SIMDE_MATH_BUILTIN_LIBM(fma)
#define simde_math_fma(x, y, z) __builtin_fma(x, y, z)
#elif defined(SIMDE_MATH_HAVE_CMATH)
#define simde_math_fma(x, y, z) std::fma(x, y, z)
#elif defined(SIMDE_MATH_HAVE_MATH_H)
#define simde_math_fma(x, y, z) fma(x, y, z)
#endif
#endif
#if !defined(simde_math_fmaf)
#if SIMDE_MATH_BUILTIN_LIBM(fmaf)
#define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z)
#elif defined(SIMDE_MATH_HAVE_CMATH)
#define simde_math_fmaf(x, y, z) std::fma(x, y, z)
#elif defined(SIMDE_MATH_HAVE_MATH_H)
#define simde_math_fmaf(x, y, z) fmaf(x, y, z)
#endif
#endif
#if !defined(simde_math_fmax)
#if SIMDE_MATH_BUILTIN_LIBM(fmax)
#define simde_math_fmax(x, y, z) __builtin_fmax(x, y, z)
#elif defined(SIMDE_MATH_HAVE_CMATH)
#define simde_math_fmax(x, y, z) std::fmax(x, y, z)
#elif defined(SIMDE_MATH_HAVE_MATH_H)
#define simde_math_fmax(x, y, z) fmax(x, y, z)
#endif
#endif
#if !defined(simde_math_fmaxf)
#if SIMDE_MATH_BUILTIN_LIBM(fmaxf)
#define simde_math_fmaxf(x, y, z) __builtin_fmaxf(x, y, z)
#elif defined(SIMDE_MATH_HAVE_CMATH)
#define simde_math_fmaxf(x, y, z) std::fmax(x, y, z)
#elif defined(SIMDE_MATH_HAVE_MATH_H)
#define simde_math_fmaxf(x, y, z) fmaxf(x, y, z)
#endif
#endif
#if !defined(simde_math_hypot)
#if SIMDE_MATH_BUILTIN_LIBM(hypot)
#define simde_math_hypot(y, x) __builtin_hypot(y, x)
@ -875,6 +1021,26 @@ typedef std::complex<double> simde_cfloat64;
#endif
#endif
#if !defined(simde_math_modf)
#if SIMDE_MATH_BUILTIN_LIBM(modf)
#define simde_math_modf(x, iptr) __builtin_modf(x, iptr)
#elif defined(SIMDE_MATH_HAVE_CMATH)
#define simde_math_modf(x, iptr) std::modf(x, iptr)
#elif defined(SIMDE_MATH_HAVE_MATH_H)
#define simde_math_modf(x, iptr) modf(x, iptr)
#endif
#endif
#if !defined(simde_math_modff)
#if SIMDE_MATH_BUILTIN_LIBM(modff)
#define simde_math_modff(x, iptr) __builtin_modff(x, iptr)
#elif defined(SIMDE_MATH_HAVE_CMATH)
#define simde_math_modff(x, iptr) std::modf(x, iptr)
#elif defined(SIMDE_MATH_HAVE_MATH_H)
#define simde_math_modff(x, iptr) modff(x, iptr)
#endif
#endif
#if !defined(simde_math_nearbyint)
#if SIMDE_MATH_BUILTIN_LIBM(nearbyint)
#define simde_math_nearbyint(v) __builtin_nearbyint(v)
@ -955,6 +1121,44 @@ typedef std::complex<double> simde_cfloat64;
#endif
#endif
#if !defined(simde_math_roundeven)
#if HEDLEY_HAS_BUILTIN(__builtin_roundeven) || \
HEDLEY_GCC_VERSION_CHECK(10, 0, 0)
#define simde_math_roundeven(v) __builtin_roundeven(v)
#elif defined(simde_math_round) && defined(simde_math_fabs)
static HEDLEY_INLINE double simde_math_roundeven(double v)
{
double rounded = simde_math_round(v);
double diff = rounded - v;
if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) &&
(HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) {
rounded = v - diff;
}
return rounded;
}
#define simde_math_roundeven simde_math_roundeven
#endif
#endif
#if !defined(simde_math_roundevenf)
#if HEDLEY_HAS_BUILTIN(__builtin_roundevenf) || \
HEDLEY_GCC_VERSION_CHECK(10, 0, 0)
#define simde_math_roundevenf(v) __builtin_roundevenf(v)
#elif defined(simde_math_roundf) && defined(simde_math_fabsf)
static HEDLEY_INLINE float simde_math_roundevenf(float v)
{
float rounded = simde_math_roundf(v);
float diff = rounded - v;
if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) &&
(HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) {
rounded = v - diff;
}
return rounded;
}
#define simde_math_roundevenf simde_math_roundevenf
#endif
#endif
#if !defined(simde_math_sin)
#if SIMDE_MATH_BUILTIN_LIBM(sin)
#define simde_math_sin(v) __builtin_sin(v)
@ -1078,20 +1282,20 @@ typedef std::complex<double> simde_cfloat64;
/*** Complex functions ***/
#if !defined(simde_math_cexp)
#if defined(__cplusplus)
#define simde_math_cexp(v) std::cexp(v)
#elif SIMDE_MATH_BUILTIN_LIBM(cexp)
#if SIMDE_MATH_BUILTIN_LIBM(cexp)
#define simde_math_cexp(v) __builtin_cexp(v)
#elif defined(__cplusplus)
#define simde_math_cexp(v) std::cexp(v)
#elif defined(SIMDE_MATH_HAVE_MATH_H)
#define simde_math_cexp(v) cexp(v)
#endif
#endif
#if !defined(simde_math_cexpf)
#if defined(__cplusplus)
#define simde_math_cexpf(v) std::exp(v)
#elif SIMDE_MATH_BUILTIN_LIBM(cexpf)
#if SIMDE_MATH_BUILTIN_LIBM(cexpf)
#define simde_math_cexpf(v) __builtin_cexpf(v)
#elif defined(__cplusplus)
#define simde_math_cexpf(v) std::exp(v)
#elif defined(SIMDE_MATH_HAVE_MATH_H)
#define simde_math_cexpf(v) cexpf(v)
#endif
@ -1393,22 +1597,262 @@ HEDLEY_DIAGNOSTIC_POP
static HEDLEY_INLINE double simde_math_rad2deg(double radians)
{
return radians * (180.0 / SIMDE_MATH_PI);
return radians * SIMDE_MATH_180_OVER_PI;
}
static HEDLEY_INLINE float simde_math_rad2degf(float radians)
{
return radians * (180.0f / SIMDE_MATH_PIF);
return radians * SIMDE_MATH_180_OVER_PIF;
}
static HEDLEY_INLINE double simde_math_deg2rad(double degrees)
{
return degrees * (SIMDE_MATH_PI / 180.0);
return degrees * SIMDE_MATH_PI_OVER_180;
}
static HEDLEY_INLINE float simde_math_deg2radf(float degrees)
{
return degrees * (SIMDE_MATH_PIF / 180.0f);
return degrees * (SIMDE_MATH_PI_OVER_180F);
}
/*** Saturated arithmetic ***/
static HEDLEY_INLINE int8_t simde_math_adds_i8(int8_t a, int8_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqaddb_s8(a, b);
#else
uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a);
uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b);
uint8_t r_ = a_ + b_;
a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX;
if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {
r_ = a_;
}
return HEDLEY_STATIC_CAST(int8_t, r_);
#endif
}
static HEDLEY_INLINE int16_t simde_math_adds_i16(int16_t a, int16_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqaddh_s16(a, b);
#else
uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a);
uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b);
uint16_t r_ = a_ + b_;
a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX;
if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {
r_ = a_;
}
return HEDLEY_STATIC_CAST(int16_t, r_);
#endif
}
static HEDLEY_INLINE int32_t simde_math_adds_i32(int32_t a, int32_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqadds_s32(a, b);
#else
uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a);
uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b);
uint32_t r_ = a_ + b_;
a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX;
if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {
r_ = a_;
}
return HEDLEY_STATIC_CAST(int32_t, r_);
#endif
}
static HEDLEY_INLINE int64_t simde_math_adds_i64(int64_t a, int64_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqaddd_s64(a, b);
#else
uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a);
uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b);
uint64_t r_ = a_ + b_;
a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX;
if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) {
r_ = a_;
}
return HEDLEY_STATIC_CAST(int64_t, r_);
#endif
}
static HEDLEY_INLINE uint8_t simde_math_adds_u8(uint8_t a, uint8_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqaddb_u8(a, b);
#else
uint8_t r = a + b;
r |= -(r < a);
return r;
#endif
}
static HEDLEY_INLINE uint16_t simde_math_adds_u16(uint16_t a, uint16_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqaddh_u16(a, b);
#else
uint16_t r = a + b;
r |= -(r < a);
return r;
#endif
}
static HEDLEY_INLINE uint32_t simde_math_adds_u32(uint32_t a, uint32_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqadds_u32(a, b);
#else
uint32_t r = a + b;
r |= -(r < a);
return r;
#endif
}
static HEDLEY_INLINE uint64_t simde_math_adds_u64(uint64_t a, uint64_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqaddd_u64(a, b);
#else
uint64_t r = a + b;
r |= -(r < a);
return r;
#endif
}
static HEDLEY_INLINE int8_t simde_math_subs_i8(int8_t a, int8_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqsubb_s8(a, b);
#else
uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a);
uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b);
uint8_t r_ = a_ - b_;
a_ = (a_ >> 7) + INT8_MAX;
if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {
r_ = a_;
}
return HEDLEY_STATIC_CAST(int8_t, r_);
#endif
}
static HEDLEY_INLINE int16_t simde_math_subs_i16(int16_t a, int16_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqsubh_s16(a, b);
#else
uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a);
uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b);
uint16_t r_ = a_ - b_;
a_ = (a_ >> 15) + INT16_MAX;
if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {
r_ = a_;
}
return HEDLEY_STATIC_CAST(int16_t, r_);
#endif
}
static HEDLEY_INLINE int32_t simde_math_subs_i32(int32_t a, int32_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqsubs_s32(a, b);
#else
uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a);
uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b);
uint32_t r_ = a_ - b_;
a_ = (a_ >> 31) + INT32_MAX;
if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {
r_ = a_;
}
return HEDLEY_STATIC_CAST(int32_t, r_);
#endif
}
static HEDLEY_INLINE int64_t simde_math_subs_i64(int64_t a, int64_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqsubd_s64(a, b);
#else
uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a);
uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b);
uint64_t r_ = a_ - b_;
a_ = (a_ >> 63) + INT64_MAX;
if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) {
r_ = a_;
}
return HEDLEY_STATIC_CAST(int64_t, r_);
#endif
}
static HEDLEY_INLINE uint8_t simde_math_subs_u8(uint8_t a, uint8_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqsubb_u8(a, b);
#else
uint8_t res = a - b;
res &= -(res <= a);
return res;
#endif
}
static HEDLEY_INLINE uint16_t simde_math_subs_u16(uint16_t a, uint16_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqsubh_u16(a, b);
#else
uint16_t res = a - b;
res &= -(res <= a);
return res;
#endif
}
static HEDLEY_INLINE uint32_t simde_math_subs_u32(uint32_t a, uint32_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqsubs_u32(a, b);
#else
uint32_t res = a - b;
res &= -(res <= a);
return res;
#endif
}
static HEDLEY_INLINE uint64_t simde_math_subs_u64(uint64_t a, uint64_t b)
{
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqsubd_u64(a, b);
#else
uint64_t res = a - b;
res &= -(res <= a);
return res;
#endif
}
HEDLEY_DIAGNOSTIC_POP
#endif /* !defined(SIMDE_MATH_H) */

View File

@ -27,11 +27,7 @@
#if !defined(SIMDE_X86_MMX_H)
#define SIMDE_X86_MMX_H
#include "simde-common.h"
#if !defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
#define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES
#endif
#include "../simde-common.h"
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
@ -46,6 +42,8 @@ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
#include <mmintrin.h>
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
#include <arm_neon.h>
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
#include <loongson-mmiintrin.h>
#endif
#include <stdint.h>
@ -55,29 +53,29 @@ SIMDE_BEGIN_DECLS_
typedef union {
#if defined(SIMDE_VECTOR_SUBSCRIPT)
SIMDE_ALIGN(8) int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN(8) int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN(8) int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN(8) int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN(8) uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN(8) uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN(8) uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN(8) uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN(8) simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN(8) int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN(8) uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
#else
SIMDE_ALIGN(8) int8_t i8[8];
SIMDE_ALIGN(8) int16_t i16[4];
SIMDE_ALIGN(8) int32_t i32[2];
SIMDE_ALIGN(8) int64_t i64[1];
SIMDE_ALIGN(8) uint8_t u8[8];
SIMDE_ALIGN(8) uint16_t u16[4];
SIMDE_ALIGN(8) uint32_t u32[2];
SIMDE_ALIGN(8) uint64_t u64[1];
SIMDE_ALIGN(8) simde_float32 f32[2];
SIMDE_ALIGN(8) int_fast32_t i32f[8 / sizeof(int_fast32_t)];
SIMDE_ALIGN(8) uint_fast32_t u32f[8 / sizeof(uint_fast32_t)];
SIMDE_ALIGN_TO_8 int8_t i8[8];
SIMDE_ALIGN_TO_8 int16_t i16[4];
SIMDE_ALIGN_TO_8 int32_t i32[2];
SIMDE_ALIGN_TO_8 int64_t i64[1];
SIMDE_ALIGN_TO_8 uint8_t u8[8];
SIMDE_ALIGN_TO_8 uint16_t u16[4];
SIMDE_ALIGN_TO_8 uint32_t u32[2];
SIMDE_ALIGN_TO_8 uint64_t u64[1];
SIMDE_ALIGN_TO_8 simde_float32 f32[2];
SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)];
SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)];
#endif
#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
@ -94,14 +92,26 @@ typedef union {
uint64x1_t neon_u64;
float32x2_t neon_f32;
#endif
#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
int8x8_t mmi_i8;
int16x4_t mmi_i16;
int32x2_t mmi_i32;
int64_t mmi_i64;
uint8x8_t mmi_u8;
uint16x4_t mmi_u16;
uint32x2_t mmi_u32;
uint64_t mmi_u64;
#endif
} simde__m64_private;
#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE)
typedef __m64 simde__m64;
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
typedef int32x2_t simde__m64;
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
typedef int32x2_t simde__m64;
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
typedef int32_t simde__m64 SIMDE_ALIGN(8) SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS;
#else
typedef simde__m64_private simde__m64;
#endif
@ -169,6 +179,17 @@ SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64)
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32)
#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */
#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8)
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16)
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32)
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64)
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8)
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16)
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32)
SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64)
#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */
SIMDE_FUNCTION_ATTRIBUTES
simde__m64 simde_mm_add_pi8(simde__m64 a, simde__m64 b)
{
@ -181,6 +202,8 @@ simde__m64 simde_mm_add_pi8(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.i8 = a_.i8 + b_.i8;
#else
@ -211,6 +234,8 @@ simde__m64 simde_mm_add_pi16(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.i16 = a_.i16 + b_.i16;
#else
@ -226,7 +251,7 @@ simde__m64 simde_mm_add_pi16(simde__m64 a, simde__m64 b)
#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b)
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
#define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b)
#define _m_add_paddw(a, b) simde_mm_add_pi16(a, b)
#define _m_paddw(a, b) simde_mm_add_pi16(a, b)
#endif
SIMDE_FUNCTION_ATTRIBUTES
@ -241,6 +266,8 @@ simde__m64 simde_mm_add_pi32(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.i32 = a_.i32 + b_.i32;
#else
@ -256,7 +283,7 @@ simde__m64 simde_mm_add_pi32(simde__m64 a, simde__m64 b)
#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b)
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
#define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b)
#define _m_add_paddd(a, b) simde_mm_add_pi32(a, b)
#define _m_paddd(a, b) simde_mm_add_pi32(a, b)
#endif
SIMDE_FUNCTION_ATTRIBUTES
@ -270,6 +297,8 @@ simde__m64 simde_mm_adds_pi8(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
@ -291,7 +320,7 @@ simde__m64 simde_mm_adds_pi8(simde__m64 a, simde__m64 b)
#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b)
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
#define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b)
#define _m_add_paddsb(a, b) simde_mm_adds_pi8(a, b)
#define _m_paddsb(a, b) simde_mm_adds_pi8(a, b)
#endif
SIMDE_FUNCTION_ATTRIBUTES
@ -306,6 +335,8 @@ simde__m64 simde_mm_adds_pu8(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
@ -340,6 +371,8 @@ simde__m64 simde_mm_adds_pi16(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
@ -376,6 +409,8 @@ simde__m64 simde_mm_adds_pu16(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
@ -435,6 +470,8 @@ simde__m64 simde_mm_andnot_si64(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.i32f = ~a_.i32f & b_.i32f;
#else
@ -461,7 +498,9 @@ simde__m64 simde_mm_cmpeq_pi8(simde__m64 a, simde__m64 b)
simde__m64_private b_ = simde__m64_to_private(b);
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vreinterpret_s8_u8(vceq_s8(a_.neon_i8, b_.neon_i8));
r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
@ -489,7 +528,9 @@ simde__m64 simde_mm_cmpeq_pi16(simde__m64 a, simde__m64 b)
simde__m64_private b_ = simde__m64_to_private(b);
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vreinterpret_s16_u16(vceq_s16(a_.neon_i16, b_.neon_i16));
r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
@ -517,7 +558,9 @@ simde__m64 simde_mm_cmpeq_pi32(simde__m64 a, simde__m64 b)
simde__m64_private b_ = simde__m64_to_private(b);
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vreinterpret_s32_u32(vceq_s32(a_.neon_i32, b_.neon_i32));
r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
@ -545,7 +588,9 @@ simde__m64 simde_mm_cmpgt_pi8(simde__m64 a, simde__m64 b)
simde__m64_private b_ = simde__m64_to_private(b);
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vreinterpret_s8_u8(vcgt_s8(a_.neon_i8, b_.neon_i8));
r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
@ -573,7 +618,9 @@ simde__m64 simde_mm_cmpgt_pi16(simde__m64 a, simde__m64 b)
simde__m64_private b_ = simde__m64_to_private(b);
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vreinterpret_s16_u16(vcgt_s16(a_.neon_i16, b_.neon_i16));
r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
@ -601,7 +648,9 @@ simde__m64 simde_mm_cmpgt_pi32(simde__m64 a, simde__m64 b)
simde__m64_private b_ = simde__m64_to_private(b);
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vreinterpret_s32_u32(vcgt_s32(a_.neon_i32, b_.neon_i32));
r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
@ -628,7 +677,13 @@ int64_t simde_mm_cvtm64_si64(simde__m64 a)
simde__m64_private a_ = simde__m64_to_private(a);
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
HEDLEY_DIAGNOSTIC_PUSH
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
#pragma clang diagnostic ignored "-Wvector-conversion"
#endif
return vget_lane_s64(a_.neon_i64, 0);
HEDLEY_DIAGNOSTIC_POP
#else
return a_.i64[0];
#endif
@ -698,7 +753,13 @@ int32_t simde_mm_cvtsi64_si32(simde__m64 a)
simde__m64_private a_ = simde__m64_to_private(a);
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
HEDLEY_DIAGNOSTIC_PUSH
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
#pragma clang diagnostic ignored "-Wvector-conversion"
#endif
return vget_lane_s32(a_.neon_i32, 0);
HEDLEY_DIAGNOSTIC_POP
#else
return a_.i32[0];
#endif
@ -714,6 +775,7 @@ void simde_mm_empty(void)
#if defined(SIMDE_X86_MMX_NATIVE)
_mm_empty();
#else
/* noop */
#endif
}
#define simde_m_empty() simde_mm_empty()
@ -735,6 +797,8 @@ simde__m64 simde_mm_madd_pi16(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16);
r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1));
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i += 2) {
@ -766,7 +830,9 @@ simde__m64 simde_mm_mulhi_pi16(simde__m64 a, simde__m64 b)
const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16);
const uint16x4_t t3 = vmovn_u32(t2);
r_.neon_i16 = vreinterpret_s16_u16(t3);
r_.neon_u16 = t3;
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
@ -797,7 +863,9 @@ simde__m64 simde_mm_mullo_pi16(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16);
const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1));
r_.neon_i16 = vreinterpret_s16_u16(t2);
r_.neon_u16 = t2;
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
@ -854,6 +922,8 @@ simde__m64 simde_mm_packs_pi16(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16));
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
@ -884,7 +954,7 @@ simde__m64 simde_mm_packs_pi16(simde__m64 a, simde__m64 b)
#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b)
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
#define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b)
#define _m_packsswb(a, b) mm_packs_pi16(a, b)
#define _m_packsswb(a, b) simde_mm_packs_pi16(a, b)
#endif
SIMDE_FUNCTION_ATTRIBUTES
@ -899,6 +969,8 @@ simde__m64 simde_mm_packs_pi32(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32));
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (8 / sizeof(a_.i32[0])); i++) {
@ -950,7 +1022,8 @@ simde__m64 simde_mm_packs_pu16(simde__m64 a, simde__m64 b)
vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1)));
/* Vector with all s16 elements set to UINT8_MAX */
const int16x8_t vmax = vmovq_n_s16((int16_t)UINT8_MAX);
const int16x8_t vmax =
vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX));
/* Elements which are within the acceptable range */
const int16x8_t le_max =
@ -962,6 +1035,8 @@ simde__m64 simde_mm_packs_pu16(simde__m64 a, simde__m64 b)
const int16x8_t values = vorrq_s16(le_max, gt_max);
r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values));
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
@ -1074,6 +1149,7 @@ simde__m64 simde_mm_set_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0)
r_.i16[2] = e2;
r_.i16[3] = e3;
#endif
return simde__m64_from_private(r_);
#endif
}
@ -1285,6 +1361,36 @@ simde__m64 simde_mm_setzero_si64(void)
#define _mm_setzero_si64() simde_mm_setzero_si64()
#endif
SIMDE_FUNCTION_ATTRIBUTES
simde__m64 simde_x_mm_load_si64(const void *mem_addr)
{
simde__m64 r;
simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64),
sizeof(r));
return r;
}
SIMDE_FUNCTION_ATTRIBUTES
simde__m64 simde_x_mm_loadu_si64(const void *mem_addr)
{
simde__m64 r;
simde_memcpy(&r, mem_addr, sizeof(r));
return r;
}
SIMDE_FUNCTION_ATTRIBUTES
void simde_x_mm_store_si64(void *mem_addr, simde__m64 value)
{
simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value,
sizeof(value));
}
SIMDE_FUNCTION_ATTRIBUTES
void simde_x_mm_storeu_si64(void *mem_addr, simde__m64 value)
{
simde_memcpy(mem_addr, &value, sizeof(value));
}
SIMDE_FUNCTION_ATTRIBUTES
simde__m64 simde_x_mm_setone_si64(void)
{
@ -1302,8 +1408,22 @@ simde__m64 simde_mm_sll_pi16(simde__m64 a, simde__m64 count)
simde__m64_private count_ = simde__m64_to_private(count);
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t)vget_lane_u64(
count_.neon_u64, 0)));
HEDLEY_DIAGNOSTIC_PUSH
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
#pragma clang diagnostic ignored "-Wvector-conversion"
#endif
r_.neon_i16 =
vshl_s16(a_.neon_i16,
vmov_n_s16(HEDLEY_STATIC_CAST(
int16_t, vget_lane_u64(count_.neon_u64, 0))));
HEDLEY_DIAGNOSTIC_POP
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)
if (HEDLEY_UNLIKELY(count_.u64[0] > 15))
return simde_mm_setzero_si64();
r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.i16 = a_.i16 << count_.u64[0];
#else
@ -1339,8 +1459,16 @@ simde__m64 simde_mm_sll_pi32(simde__m64 a, simde__m64 count)
simde__m64_private count_ = simde__m64_to_private(count);
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t)vget_lane_u64(
count_.neon_u64, 0)));
HEDLEY_DIAGNOSTIC_PUSH
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
#pragma clang diagnostic ignored "-Wvector-conversion"
#endif
r_.neon_i32 =
vshl_s32(a_.neon_i32,
vmov_n_s32(HEDLEY_STATIC_CAST(
int32_t, vget_lane_u64(count_.neon_u64, 0))));
HEDLEY_DIAGNOSTIC_POP
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.i32 = a_.i32 << count_.u64[0];
#else
@ -1373,10 +1501,19 @@ simde__m64 simde_mm_slli_pi16(simde__m64 a, int count)
simde__m64_private r_;
simde__m64_private a_ = simde__m64_to_private(a);
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)
if (HEDLEY_UNLIKELY(count > 15))
return simde_mm_setzero_si64();
r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.i16 = a_.i16 << count;
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t)count));
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i16 = psllh_s(a_.mmi_i16, b_.mmi_i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
@ -1406,6 +1543,8 @@ simde__m64 simde_mm_slli_pi32(simde__m64 a, int count)
r_.i32 = a_.i32 << count;
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t)count));
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
@ -1490,7 +1629,13 @@ simde__m64 simde_mm_srl_pi16(simde__m64 a, simde__m64 count)
simde__m64_private a_ = simde__m64_to_private(a);
simde__m64_private count_ = simde__m64_to_private(count);
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \
defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT)
if (HEDLEY_UNLIKELY(count_.u64[0] > 15))
return simde_mm_setzero_si64();
r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, count_.u64[0]);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.u16 = a_.u16 >> count_.u64[0];
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vshl_u16(
@ -1567,6 +1712,8 @@ simde__m64 simde_mm_srli_pi16(simde__m64 a, int count)
r_.u16 = a_.u16 >> count;
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t)count)));
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
@ -1596,6 +1743,8 @@ simde__m64 simde_mm_srli_pi32(simde__m64 a, int count)
r_.u32 = a_.u32 >> count;
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t)count)));
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) {
@ -1682,7 +1831,10 @@ simde__m64 simde_mm_srai_pi16(simde__m64 a, int count)
#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
r_.i16 = a_.i16 >> (count & 0xff);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count));
r_.neon_i16 = vshl_s16(a_.neon_i16,
vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count)));
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i16 = psrah_s(a_.mmi_i16, count);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
@ -1713,6 +1865,8 @@ simde__m64 simde_mm_srai_pi32(simde__m64 a, int count)
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vshl_s32(a_.neon_i32,
vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count)));
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i32 = psraw_s(a_.mmi_i32, count);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
@ -1726,7 +1880,7 @@ simde__m64 simde_mm_srai_pi32(simde__m64 a, int count)
#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count)
#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES)
#define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count)
#define _m_srai_pi32(a, count) simde_mm_srai_pi32(a, count)
#define _m_psradi(a, count) simde_mm_srai_pi32(a, count)
#endif
SIMDE_FUNCTION_ATTRIBUTES
@ -1813,6 +1967,8 @@ simde__m64 simde_mm_sub_pi8(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.i8 = a_.i8 - b_.i8;
#else
@ -1843,6 +1999,8 @@ simde__m64 simde_mm_sub_pi16(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.i16 = a_.i16 - b_.i16;
#else
@ -1873,6 +2031,8 @@ simde__m64 simde_mm_sub_pi32(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.i32 = a_.i32 - b_.i32;
#else
@ -1903,6 +2063,8 @@ simde__m64 simde_mm_subs_pi8(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) {
@ -1938,6 +2100,8 @@ simde__m64 simde_mm_subs_pu8(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) {
@ -1973,6 +2137,8 @@ simde__m64 simde_mm_subs_pi16(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) {
@ -2008,6 +2174,8 @@ simde__m64 simde_mm_subs_pu16(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16);
#else
SIMDE_VECTORIZE
for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) {
@ -2046,6 +2214,8 @@ simde__m64 simde_mm_unpackhi_pi8(simde__m64 a, simde__m64 b)
#elif defined(SIMDE_SHUFFLE_VECTOR_)
r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14,
7, 15);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8);
#else
r_.i8[0] = a_.i8[4];
r_.i8[1] = b_.i8[4];
@ -2078,6 +2248,8 @@ simde__m64 simde_mm_unpackhi_pi16(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16);
#elif defined(SIMDE_SHUFFLE_VECTOR_)
r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7);
#else
@ -2108,6 +2280,8 @@ simde__m64 simde_mm_unpackhi_pi32(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32);
#elif defined(SIMDE_SHUFFLE_VECTOR_)
r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3);
#else
@ -2136,6 +2310,8 @@ simde__m64 simde_mm_unpacklo_pi8(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8);
#elif defined(SIMDE_SHUFFLE_VECTOR_)
r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3,
11);
@ -2171,6 +2347,8 @@ simde__m64 simde_mm_unpacklo_pi16(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16);
#elif defined(SIMDE_SHUFFLE_VECTOR_)
r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5);
#else
@ -2201,6 +2379,8 @@ simde__m64 simde_mm_unpacklo_pi32(simde__m64 a, simde__m64 b)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32);
#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE)
r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32);
#elif defined(SIMDE_SHUFFLE_VECTOR_)
r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2);
#else
@ -2253,7 +2433,13 @@ int32_t simde_m_to_int(simde__m64 a)
simde__m64_private a_ = simde__m64_to_private(a);
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
HEDLEY_DIAGNOSTIC_PUSH
#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \
SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0)
#pragma clang diagnostic ignored "-Wvector-conversion"
#endif
return vget_lane_s32(a_.neon_i32, 0);
HEDLEY_DIAGNOSTIC_POP
#else
return a_.i32[0];
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -17,55 +17,9 @@
#pragma once
#if NEEDS_SIMDE
#include "simde/sse2.h"
#define __m128 simde__m128
#define _mm_setzero_ps simde_mm_setzero_ps
#define _mm_set_ps simde_mm_set_ps
#define _mm_add_ps simde_mm_add_ps
#define _mm_sub_ps simde_mm_sub_ps
#define _mm_mul_ps simde_mm_mul_ps
#define _mm_div_ps simde_mm_div_ps
#define _mm_set1_ps simde_mm_set1_ps
#define _mm_movehl_ps simde_mm_movehl_ps
#define _mm_shuffle_ps simde_mm_shuffle_ps
#define _mm_min_ps simde_mm_min_ps
#define _mm_max_ps simde_mm_max_ps
#define _mm_movelh_ps simde_mm_movelh_ps
#define _mm_unpacklo_ps simde_mm_unpacklo_ps
#define _mm_unpackhi_ps simde_mm_unpackhi_ps
#define _mm_load_ps simde_mm_load_ps
#define _mm_andnot_ps simde_mm_andnot_ps
#define _mm_storeu_ps simde_mm_storeu_ps
#define _mm_loadu_ps simde_mm_loadu_ps
#define __m128i simde__m128i
#define _mm_set1_epi32 simde_mm_set1_epi32
#define _mm_set1_epi16 simde_mm_set1_epi16
#define _mm_load_si128 simde_mm_load_si128
#define _mm_packs_epi32 simde_mm_packs_epi32
#define _mm_srli_si128 simde_mm_srli_si128
#define _mm_and_si128 simde_mm_and_si128
#define _mm_packus_epi16 simde_mm_packus_epi16
#define _mm_add_epi64 simde_mm_add_epi64
#define _mm_shuffle_epi32 simde_mm_shuffle_epi32
#define _mm_srai_epi16 simde_mm_srai_epi16
#define _mm_shufflelo_epi16 simde_mm_shufflelo_epi16
#define _mm_storeu_si128 simde_mm_storeu_si128
#define _MM_SHUFFLE SIMDE_MM_SHUFFLE
#define _MM_TRANSPOSE4_PS SIMDE_MM_TRANSPOSE4_PS
#else
#if defined(__aarch64__) || defined(__arm__)
#include <arm_neon.h>
#include "sse2neon.h"
#else
#include <xmmintrin.h>
#if defined(_MSC_VER)
#include <emmintrin.h>
#endif
#else
#define SIMDE_ENABLE_NATIVE_ALIASES
#include "simde/x86/sse2.h"
#endif

File diff suppressed because it is too large Load Diff