Merge pull request #2069 from pgwipeout/aarch64

Enable Aarch64 support via SIMDe
master
Jim 2019-11-25 23:56:43 -08:00 committed by GitHub
commit 2a6baf9886
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 10750 additions and 11 deletions

View File

@ -28,6 +28,9 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64le")
PUBLIC
-mvsx)
add_compile_definitions(NO_WARN_X86_INTRINSICS)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
target_compile_options(media-playback
PUBLIC)
elseif(NOT MSVC)
target_compile_options(media-playback
PUBLIC

View File

@ -174,9 +174,20 @@ elseif(UNIX)
util/pipe-posix.c
util/platform-nix.c)
set(libobs_PLATFORM_HEADERS
util/threading-posix.h)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
set(libobs_PLATFORM_HEADERS
util/aarch/check.h
util/aarch/hedley.h
util/aarch/mmx.h
util/aarch/simde-arch.h
util/aarch/simde-common.h
util/aarch/sse.h
util/aarch/sse2.h
util/threading-posix.h)
else()
set(libobs_PLATFORM_HEADERS
util/threading-posix.h)
endif()
if(HAVE_PULSEAUDIO)
set(libobs_audio_monitoring_HEADERS
audio-monitoring/pulse/pulseaudio-wrapper.h)
@ -337,6 +348,7 @@ set(libobs_util_SOURCES
util/cf-parser.c
util/profiler.c)
set(libobs_util_HEADERS
util/sse-intrin.h
util/array-serializer.h
util/file-serializer.h
util/utf8.h
@ -470,6 +482,9 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64le")
PUBLIC
-mvsx)
add_compile_definitions(NO_WARN_X86_INTRINSICS)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
target_compile_options(libobs
PUBLIC)
elseif(NOT MSVC)
target_compile_options(libobs
PUBLIC

View File

@ -20,7 +20,8 @@
#include "../util/c99defs.h"
#include "math-defs.h"
#include "vec3.h"
#include <xmmintrin.h>
#include <util/sse-intrin.h>
/*
* Quaternion math

View File

@ -19,7 +19,8 @@
#include "math-defs.h"
#include "vec4.h"
#include <xmmintrin.h>
#include <util/sse-intrin.h>
#ifdef __cplusplus
extern "C" {

View File

@ -18,7 +18,8 @@
#pragma once
#include "math-defs.h"
#include <xmmintrin.h>
#include <util/sse-intrin.h>
#ifdef __cplusplus
extern "C" {

View File

@ -16,8 +16,8 @@
******************************************************************************/
#include "format-conversion.h"
#include <xmmintrin.h>
#include <emmintrin.h>
#include <util/sse-intrin.h>
/* ...surprisingly, if I don't use a macro to force inlining, it causes the
* CPU usage to boost by a tremendous amount in debug builds. */

View File

@ -16,7 +16,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <math.h>
#include <xmmintrin.h>
#include <util/sse-intrin.h>
#include "util/threading.h"
#include "util/bmem.h"

258
libobs/util/aarch/check.h Normal file
View File

@ -0,0 +1,258 @@
/* Check (assertions)
* Portable Snippets - https://gitub.com/nemequ/portable-snippets
* Created by Evan Nemerson <evan@nemerson.com>
*
* To the extent possible under law, the authors have waived all
* copyright and related or neighboring rights to this code. For
* details, see the Creative Commons Zero 1.0 Universal license at
* https://creativecommons.org/publicdomain/zero/1.0/
*/
#if !defined(SIMDE_CHECK_H)
#define SIMDE_CHECK_H
#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG)
#define SIMDE_NDEBUG 1
#endif
#include <stdint.h>
#if !defined(_WIN32)
#define SIMDE_SIZE_MODIFIER "z"
#define SIMDE_CHAR_MODIFIER "hh"
#define SIMDE_SHORT_MODIFIER "h"
#else
#if defined(_M_X64) || defined(__amd64__)
#define SIMDE_SIZE_MODIFIER "I64"
#else
#define SIMDE_SIZE_MODIFIER ""
#endif
#define SIMDE_CHAR_MODIFIER ""
#define SIMDE_SHORT_MODIFIER ""
#endif
#if defined(_MSC_VER) && (_MSC_VER >= 1500)
#define SIMDE__PUSH_DISABLE_MSVC_C4127 \
__pragma(warning(push)) __pragma(warning(disable : 4127))
#define SIMDE__POP_DISABLE_MSVC_C4127 __pragma(warning(pop))
#else
#define SIMDE__PUSH_DISABLE_MSVC_C4127
#define SIMDE__POP_DISABLE_MSVC_C4127
#endif
#if !defined(simde_errorf)
#include <stdio.h>
#include <stdlib.h>
#define simde_errorf(format, ...) \
(fprintf(stderr, format, __VA_ARGS__), abort())
#endif
#define simde_error(msg) simde_errorf("%s", msg)
#if defined(SIMDE_NDEBUG)
#if defined(SIMDE_CHECK_FAIL_DEFINED)
#define simde_assert(expr)
#else
#if defined(HEDLEY_ASSUME)
#define simde_assert(expr) HEDLEY_ASSUME(expr)
#elif HEDLEY_GCC_VERSION_CHECK(4, 5, 0)
#define simde_assert(expr) ((void)(!!(expr) ? 1 : (__builtin_unreachable(), 1)))
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0)
#define simde_assert(expr) __assume(expr)
#else
#define simde_assert(expr)
#endif
#endif
#define simde_assert_true(expr) simde_assert(expr)
#define simde_assert_false(expr) simde_assert(!(expr))
#define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \
simde_assert(((a)op(b)))
#define simde_assert_double_equal(a, b, precision)
#define simde_assert_string_equal(a, b)
#define simde_assert_string_not_equal(a, b)
#define simde_assert_memory_equal(size, a, b)
#define simde_assert_memory_not_equal(size, a, b)
#else
#define simde_assert(expr) \
do { \
if (!HEDLEY_LIKELY(expr)) { \
simde_error("assertion failed: " #expr "\n"); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
#define simde_assert_true(expr) \
do { \
if (!HEDLEY_LIKELY(expr)) { \
simde_error("assertion failed: " #expr \
" is not true\n"); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
#define simde_assert_false(expr) \
do { \
if (!HEDLEY_LIKELY(!(expr))) { \
simde_error("assertion failed: " #expr \
" is not false\n"); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
#define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \
do { \
T simde_tmp_a_ = (a); \
T simde_tmp_b_ = (b); \
if (!(simde_tmp_a_ op simde_tmp_b_)) { \
simde_errorf("assertion failed: %s %s %s (" prefix \
"%" fmt suffix " %s " prefix \
"%" fmt suffix ")\n", \
#a, #op, #b, simde_tmp_a_, #op, \
simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
#define simde_assert_double_equal(a, b, precision) \
do { \
const double simde_tmp_a_ = (a); \
const double simde_tmp_b_ = (b); \
const double simde_tmp_diff_ = \
((simde_tmp_a_ - simde_tmp_b_) < 0) \
? -(simde_tmp_a_ - simde_tmp_b_) \
: (simde_tmp_a_ - simde_tmp_b_); \
if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \
simde_errorf( \
"assertion failed: %s == %s (%0." #precision \
"g == %0." #precision "g)\n", \
#a, #b, simde_tmp_a_, simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
#include <string.h>
#define simde_assert_string_equal(a, b) \
do { \
const char *simde_tmp_a_ = a; \
const char *simde_tmp_b_ = b; \
if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != \
0)) { \
simde_errorf( \
"assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \
#a, #b, simde_tmp_a_, simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
#define simde_assert_string_not_equal(a, b) \
do { \
const char *simde_tmp_a_ = a; \
const char *simde_tmp_b_ = b; \
if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == \
0)) { \
simde_errorf( \
"assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \
#a, #b, simde_tmp_a_, simde_tmp_b_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
#define simde_assert_memory_equal(size, a, b) \
do { \
const unsigned char *simde_tmp_a_ = \
(const unsigned char *)(a); \
const unsigned char *simde_tmp_b_ = \
(const unsigned char *)(b); \
const size_t simde_tmp_size_ = (size); \
if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, \
simde_tmp_size_)) != 0) { \
size_t simde_tmp_pos_; \
for (simde_tmp_pos_ = 0; \
simde_tmp_pos_ < simde_tmp_size_; \
simde_tmp_pos_++) { \
if (simde_tmp_a_[simde_tmp_pos_] != \
simde_tmp_b_[simde_tmp_pos_]) { \
simde_errorf( \
"assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER \
"u\n", \
#a, #b, simde_tmp_pos_); \
break; \
} \
} \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
#define simde_assert_memory_not_equal(size, a, b) \
do { \
const unsigned char *simde_tmp_a_ = \
(const unsigned char *)(a); \
const unsigned char *simde_tmp_b_ = \
(const unsigned char *)(b); \
const size_t simde_tmp_size_ = (size); \
if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, \
simde_tmp_size_)) == 0) { \
simde_errorf( \
"assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER \
"u bytes)\n", \
#a, #b, simde_tmp_size_); \
} \
SIMDE__PUSH_DISABLE_MSVC_C4127 \
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
#endif
#define simde_assert_type(T, fmt, a, op, b) \
simde_assert_type_full("", "", T, fmt, a, op, b)
#define simde_assert_char(a, op, b) \
simde_assert_type_full("'\\x", "'", char, \
"02" SIMDE_CHAR_MODIFIER "x", a, op, b)
#define simde_assert_uchar(a, op, b) \
simde_assert_type_full("'\\x", "'", unsigned char, \
"02" SIMDE_CHAR_MODIFIER "x", a, op, b)
#define simde_assert_short(a, op, b) \
simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b)
#define simde_assert_ushort(a, op, b) \
simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b)
#define simde_assert_int(a, op, b) simde_assert_type(int, "d", a, op, b)
#define simde_assert_uint(a, op, b) \
simde_assert_type(unsigned int, "u", a, op, b)
#define simde_assert_long(a, op, b) simde_assert_type(long int, "ld", a, op, b)
#define simde_assert_ulong(a, op, b) \
simde_assert_type(unsigned long int, "lu", a, op, b)
#define simde_assert_llong(a, op, b) \
simde_assert_type(long long int, "lld", a, op, b)
#define simde_assert_ullong(a, op, b) \
simde_assert_type(unsigned long long int, "llu", a, op, b)
#define simde_assert_size(a, op, b) \
simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b)
#define simde_assert_float(a, op, b) simde_assert_type(float, "f", a, op, b)
#define simde_assert_double(a, op, b) simde_assert_type(double, "g", a, op, b)
#define simde_assert_ptr(a, op, b) \
simde_assert_type(const void *, "p", a, op, b)
#define simde_assert_int8(a, op, b) simde_assert_type(int8_t, PRIi8, a, op, b)
#define simde_assert_uint8(a, op, b) simde_assert_type(uint8_t, PRIu8, a, op, b)
#define simde_assert_int16(a, op, b) \
simde_assert_type(int16_t, PRIi16, a, op, b)
#define simde_assert_uint16(a, op, b) \
simde_assert_type(uint16_t, PRIu16, a, op, b)
#define simde_assert_int32(a, op, b) \
simde_assert_type(int32_t, PRIi32, a, op, b)
#define simde_assert_uint32(a, op, b) \
simde_assert_type(uint32_t, PRIu32, a, op, b)
#define simde_assert_int64(a, op, b) \
simde_assert_type(int64_t, PRIi64, a, op, b)
#define simde_assert_uint64(a, op, b) \
simde_assert_type(uint64_t, PRIu64, a, op, b)
#define simde_assert_ptr_equal(a, b) simde_assert_ptr(a, ==, b)
#define simde_assert_ptr_not_equal(a, b) simde_assert_ptr(a, !=, b)
#define simde_assert_null(ptr) simde_assert_ptr(ptr, ==, NULL)
#define simde_assert_not_null(ptr) simde_assert_ptr(ptr, !=, NULL)
#define simde_assert_ptr_null(ptr) simde_assert_ptr(ptr, ==, NULL)
#define simde_assert_ptr_not_null(ptr) simde_assert_ptr(ptr, !=, NULL)
#endif /* !defined(SIMDE_CHECK_H) */

1616
libobs/util/aarch/hedley.h Normal file

File diff suppressed because it is too large Load Diff

1356
libobs/util/aarch/mmx.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,355 @@
/* Architecture detection
* Created by Evan Nemerson <evan@nemerson.com>
*
* To the extent possible under law, the authors have waived all
* copyright and related or neighboring rights to this code. For
* details, see the Creative Commons Zero 1.0 Universal license at
* <https://creativecommons.org/publicdomain/zero/1.0/>
*
* Different compilers define different preprocessor macros for the
* same architecture. This is an attempt to provide a single
* interface which is usable on any compiler.
*
* In general, a macro named SIMDE_ARCH_* is defined for each
* architecture the CPU supports. When there are multiple possible
* versions, we try to define the macro to the target version. For
* example, if you want to check for i586+, you could do something
* like:
*
* #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5)
* ...
* #endif
*
* You could also just check that SIMDE_ARCH_X86 >= 5 without checking
* if it's defined first, but some compilers may emit a warning about
* an undefined macro being used (e.g., GCC with -Wundef).
*
* This was originally created for SIMDe
* <https://github.com/nemequ/simde> (hence the prefix), but this
* header has no dependencies and may be used anywhere. It is
* originally based on information from
* <https://sourceforge.net/p/predef/wiki/Architectures/>, though it
* has been enhanced with additional information.
*
* If you improve this file, or find a bug, please file the issue at
* <https://github.com/nemequ/simde/issues>. If you copy this into
* your project, even if you change the prefix, please keep the links
* to SIMDe intact so others know where to report issues, submit
* enhancements, and find the latest version. */
#if !defined(SIMDE_ARCH_H)
#define SIMDE_ARCH_H
/* Alpha
<https://en.wikipedia.org/wiki/DEC_Alpha> */
#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA)
#if defined(__alpha_ev6__)
#define SIMDE_ARCH_ALPHA 6
#elif defined(__alpha_ev5__)
#define SIMDE_ARCH_ALPHA 5
#elif defined(__alpha_ev4__)
#define SIMDE_ARCH_ALPHA 4
#else
#define SIMDE_ARCH_ALPHA 1
#endif
#endif
/* Atmel AVR
<https://en.wikipedia.org/wiki/Atmel_AVR> */
#if defined(__AVR_ARCH__)
#define SIMDE_ARCH_AVR __AVR_ARCH__
#endif
/* AMD64 / x86_64
<https://en.wikipedia.org/wiki/X86-64> */
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || \
defined(__x86_64) || defined(_M_X66) || defined(_M_AMD64)
#define SIMDE_ARCH_AMD64 1
#endif
/* ARM
<https://en.wikipedia.org/wiki/ARM_architecture> */
#if defined(__ARM_ARCH_8A__)
#define SIMDE_ARCH_ARM 82
#elif defined(__ARM_ARCH_8R__)
#define SIMDE_ARCH_ARM 81
#elif defined(__ARM_ARCH_8__)
#define SIMDE_ARCH_ARM 80
#elif defined(__ARM_ARCH_7S__)
#define SIMDE_ARCH_ARM 74
#elif defined(__ARM_ARCH_7M__)
#define SIMDE_ARCH_ARM 73
#elif defined(__ARM_ARCH_7R__)
#define SIMDE_ARCH_ARM 72
#elif defined(__ARM_ARCH_7A__)
#define SIMDE_ARCH_ARM 71
#elif defined(__ARM_ARCH_7__)
#define SIMDE_ARCH_ARM 70
#elif defined(__ARM_ARCH)
#define SIMDE_ARCH_ARM (__ARM_ARCH * 10)
#elif defined(_M_ARM)
#define SIMDE_ARCH_ARM (_M_ARM * 10)
#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || \
defined(_ARM) || defined(_M_ARM) || defined(_M_ARM)
#define SIMDE_ARCH_ARM 1
#endif
/* AArch64
<https://en.wikipedia.org/wiki/ARM_architecture> */
#if defined(__aarch64__) || defined(_M_ARM64)
#define SIMDE_ARCH_AARCH64 10
#endif
/* Blackfin
<https://en.wikipedia.org/wiki/Blackfin> */
#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__)
#define SIMDE_ARCH_BLACKFIN 1
#endif
/* CRIS
<https://en.wikipedia.org/wiki/ETRAX_CRIS> */
#if defined(__CRIS_arch_version)
#define SIMDE_ARCH_CRIS __CRIS_arch_version
#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || \
defined(__CRIS__)
#define SIMDE_ARCH_CRIS 1
#endif
/* Convex
<https://en.wikipedia.org/wiki/Convex_Computer> */
#if defined(__convex_c38__)
#define SIMDE_ARCH_CONVEX 38
#elif defined(__convex_c34__)
#define SIMDE_ARCH_CONVEX 34
#elif defined(__convex_c32__)
#define SIMDE_ARCH_CONVEX 32
#elif defined(__convex_c2__)
#define SIMDE_ARCH_CONVEX 2
#elif defined(__convex__)
#define SIMDE_ARCH_CONVEX 1
#endif
/* Adapteva Epiphany
<https://en.wikipedia.org/wiki/Adapteva_Epiphany> */
#if defined(__epiphany__)
#define SIMDE_ARCH_EPIPHANY 1
#endif
/* Fujitsu FR-V
<https://en.wikipedia.org/wiki/FR-V_(microprocessor)> */
#if defined(__frv__)
#define SIMDE_ARCH_FRV 1
#endif
/* H8/300
<https://en.wikipedia.org/wiki/H8_Family> */
#if defined(__H8300__)
#define SIMDE_ARCH_H8300
#endif
/* HP/PA / PA-RISC
<https://en.wikipedia.org/wiki/PA-RISC> */
#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || \
defined(_PA_RISC2_0)
#define SIMDE_ARCH_HPPA 20
#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1)
#define SIMDE_ARCH_HPPA 11
#elif defined(_PA_RISC1_0)
#define SIMDE_ARCH_HPPA 10
#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa)
#define SIMDE_ARCH_HPPA 1
#endif
/* x86
<https://en.wikipedia.org/wiki/X86> */
#if defined(_M_IX86)
#define SIMDE_ARCH_X86 (_M_IX86 / 100)
#elif defined(__I86__)
#define SIMDE_ARCH_X86 __I86__
#elif defined(i686) || defined(__i686) || defined(__i686__)
#define SIMDE_ARCH_X86 6
#elif defined(i586) || defined(__i586) || defined(__i586__)
#define SIMDE_ARCH_X86 5
#elif defined(i486) || defined(__i486) || defined(__i486__)
#define SIMDE_ARCH_X86 4
#elif defined(i386) || defined(__i386) || defined(__i386__)
#define SIMDE_ARCH_X86 3
#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__)
#define SIMDE_ARCH_X86 3
#endif
/* Itanium
<https://en.wikipedia.org/wiki/Itanium> */
#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || \
defined(__ia64) || defined(_M_IA64) || defined(__itanium__)
#define SIMDE_ARCH_IA64 1
#endif
/* Renesas M32R
<https://en.wikipedia.org/wiki/M32R> */
#if defined(__m32r__) || defined(__M32R__)
#define SIMDE_ARCH_M32R
#endif
/* Motorola 68000
<https://en.wikipedia.org/wiki/Motorola_68000> */
#if defined(__mc68060__) || defined(__MC68060__)
#define SIMDE_ARCH_M68K 68060
#elif defined(__mc68040__) || defined(__MC68040__)
#define SIMDE_ARCH_M68K 68040
#elif defined(__mc68030__) || defined(__MC68030__)
#define SIMDE_ARCH_M68K 68030
#elif defined(__mc68020__) || defined(__MC68020__)
#define SIMDE_ARCH_M68K 68020
#elif defined(__mc68010__) || defined(__MC68010__)
#define SIMDE_ARCH_M68K 68010
#elif defined(__mc68000__) || defined(__MC68000__)
#define SIMDE_ARCH_M68K 68000
#endif
/* Xilinx MicroBlaze
<https://en.wikipedia.org/wiki/MicroBlaze> */
#if defined(__MICROBLAZE__) || defined(__microblaze__)
#define SIMDE_ARCH_MICROBLAZE
#endif
/* MIPS
<https://en.wikipedia.org/wiki/MIPS_architecture> */
#if defined(_MIPS_ISA_MIPS64R2)
#define SIMDE_ARCH_MIPS 642
#elif defined(_MIPS_ISA_MIPS64)
#define SIMDE_ARCH_MIPS 640
#elif defined(_MIPS_ISA_MIPS32R2)
#define SIMDE_ARCH_MIPS 322
#elif defined(_MIPS_ISA_MIPS32)
#define SIMDE_ARCH_MIPS 320
#elif defined(_MIPS_ISA_MIPS4)
#define SIMDE_ARCH_MIPS 4
#elif defined(_MIPS_ISA_MIPS3)
#define SIMDE_ARCH_MIPS 3
#elif defined(_MIPS_ISA_MIPS2)
#define SIMDE_ARCH_MIPS 2
#elif defined(_MIPS_ISA_MIPS1)
#define SIMDE_ARCH_MIPS 1
#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__)
#define SIMDE_ARCH_MIPS 1
#endif
/* Matsushita MN10300
<https://en.wikipedia.org/wiki/MN103> */
#if defined(__MN10300__) || defined(__mn10300__)
#define SIMDE_ARCH_MN10300 1
#endif
/* POWER
<https://en.wikipedia.org/wiki/IBM_POWER_Instruction_Set_Architecture> */
#if defined(_M_PPC)
#define SIMDE_ARCH_POWER _M_PPC
#elif defined(_ARCH_PWR8)
#define SIMDE_ARCH_POWER 800
#elif defined(_ARCH_PWR7)
#define SIMDE_ARCH_POWER 700
#elif defined(_ARCH_PWR6)
#define SIMDE_ARCH_POWER 600
#elif defined(_ARCH_PWR5)
#define SIMDE_ARCH_POWER 500
#elif defined(_ARCH_PWR4)
#define SIMDE_ARCH_POWER 400
#elif defined(_ARCH_440) || defined(__ppc440__)
#define SIMDE_ARCH_POWER 440
#elif defined(_ARCH_450) || defined(__ppc450__)
#define SIMDE_ARCH_POWER 450
#elif defined(_ARCH_601) || defined(__ppc601__)
#define SIMDE_ARCH_POWER 601
#elif defined(_ARCH_603) || defined(__ppc603__)
#define SIMDE_ARCH_POWER 603
#elif defined(_ARCH_604) || defined(__ppc604__)
#define SIMDE_ARCH_POWER 604
#elif defined(_ARCH_605) || defined(__ppc605__)
#define SIMDE_ARCH_POWER 605
#elif defined(_ARCH_620) || defined(__ppc620__)
#define SIMDE_ARCH_POWER 620
#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || \
defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || \
defined(__ppc)
#define SIMDE_ARCH_POWER 1
#endif
/* SPARC
<https://en.wikipedia.org/wiki/SPARC> */
#if defined(__sparc_v9__) || defined(__sparcv9)
#define SIMDE_ARCH_SPARC 9
#elif defined(__sparc_v8__) || defined(__sparcv8)
#define SIMDE_ARCH_SPARC 8
#elif defined(__sparc_v7__) || defined(__sparcv7)
#define SIMDE_ARCH_SPARC 7
#elif defined(__sparc_v6__) || defined(__sparcv6)
#define SIMDE_ARCH_SPARC 6
#elif defined(__sparc_v5__) || defined(__sparcv5)
#define SIMDE_ARCH_SPARC 5
#elif defined(__sparc_v4__) || defined(__sparcv4)
#define SIMDE_ARCH_SPARC 4
#elif defined(__sparc_v3__) || defined(__sparcv3)
#define SIMDE_ARCH_SPARC 3
#elif defined(__sparc_v2__) || defined(__sparcv2)
#define SIMDE_ARCH_SPARC 2
#elif defined(__sparc_v1__) || defined(__sparcv1)
#define SIMDE_ARCH_SPARC 1
#elif defined(__sparc__) || defined(__sparc)
#define SIMDE_ARCH_SPARC 1
#endif
/* SuperH
<https://en.wikipedia.org/wiki/SuperH> */
#if defined(__sh5__) || defined(__SH5__)
#define SIMDE_ARCH_SUPERH 5
#elif defined(__sh4__) || defined(__SH4__)
#define SIMDE_ARCH_SUPERH 4
#elif defined(__sh3__) || defined(__SH3__)
#define SIMDE_ARCH_SUPERH 3
#elif defined(__sh2__) || defined(__SH2__)
#define SIMDE_ARCH_SUPERH 2
#elif defined(__sh1__) || defined(__SH1__)
#define SIMDE_ARCH_SUPERH 1
#elif defined(__sh__) || defined(__SH__)
#define SIMDE_ARCH_SUPERH 1
#endif
/* IBM System z
<https://en.wikipedia.org/wiki/IBM_System_z> */
#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || \
defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__)
#define SIMDE_ARCH_SYSTEMZ
#endif
/* TMS320 DSP
<https://en.wikipedia.org/wiki/Texas_Instruments_TMS320> */
#if defined(_TMS320C6740) || defined(__TMS320C6740__)
#define SIMDE_ARCH_TMS320 6740
#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__)
#define SIMDE_ARCH_TMS320 6701
#elif defined(_TMS320C6700) || defined(__TMS320C6700__)
#define SIMDE_ARCH_TMS320 6700
#elif defined(_TMS320C6600) || defined(__TMS320C6600__)
#define SIMDE_ARCH_TMS320 6600
#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__)
#define SIMDE_ARCH_TMS320 6401
#elif defined(_TMS320C6400) || defined(__TMS320C6400__)
#define SIMDE_ARCH_TMS320 6400
#elif defined(_TMS320C6200) || defined(__TMS320C6200__)
#define SIMDE_ARCH_TMS320 6200
#elif defined(_TMS320C55X) || defined(__TMS320C55X__)
#define SIMDE_ARCH_TMS320 550
#elif defined(_TMS320C54X) || defined(__TMS320C54X__)
#define SIMDE_ARCH_TMS320 540
#elif defined(_TMS320C28X) || defined(__TMS320C28X__)
#define SIMDE_ARCH_TMS320 280
#endif
/* Xtensa
<https://en.wikipedia.org/wiki/> */
#if defined(__xtensa__) || defined(__XTENSA__)
#define SIMDE_ARCH_XTENSA 1
#endif
#endif /* !defined(SIMDE_ARCH_H) */

View File

@ -0,0 +1,278 @@
/* Copyright (c) 2017-2019 Evan Nemerson <evan@nemerson.com>
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if !defined(SIMDE_COMMON_H)
#define SIMDE_COMMON_H
#include "hedley.h"
#include "check.h"
#include "simde-arch.h"
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
#define SIMDE_ALIGN(alignment) _Alignas(alignment)
#elif (defined(__cplusplus) && (__cplusplus >= 201103L))
#define SIMDE_ALIGN(alignment) alignas(alignment)
#elif HEDLEY_GCC_VERSION_CHECK(2, 95, 0) || \
HEDLEY_CRAY_VERSION_CHECK(8, 4, 0) || \
HEDLEY_IBM_VERSION_CHECK(11, 1, 0) || \
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
HEDLEY_PGI_VERSION_CHECK(19, 4, 0) || \
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
HEDLEY_TINYC_VERSION_CHECK(0, 9, 24) || \
HEDLEY_TI_VERSION_CHECK(8, 1, 0)
#define SIMDE_ALIGN(alignment) __attribute__((aligned(alignment)))
#elif defined(_MSC_VER) && (!defined(_M_IX86) || defined(_M_AMD64))
#define SIMDE_ALIGN(alignment) __declspec(align(alignment))
#else
#define SIMDE_ALIGN(alignment)
#endif
#define simde_assert_aligned(alignment, val) \
simde_assert_int(((uintptr_t)(val)) % (alignment), ==, 0)
#if HEDLEY_GCC_HAS_ATTRIBUTE(vector_size, 4, 6, 0)
#define SIMDE__ENABLE_GCC_VEC_EXT
#endif
#if !defined(SIMDE_ENABLE_OPENMP) && \
((defined(_OPENMP) && (_OPENMP >= 201307L)) || \
(defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L)))
#define SIMDE_ENABLE_OPENMP
#endif
#if !defined(SIMDE_ENABLE_CILKPLUS) && defined(__cilk)
#define SIMDE_ENABLE_CILKPLUS
#endif
#if defined(SIMDE_ENABLE_OPENMP)
#define SIMDE__VECTORIZE _Pragma("omp simd")
#define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l))
#define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r))
#define SIMDE__VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a))
#elif defined(SIMDE_ENABLE_CILKPLUS)
#define SIMDE__VECTORIZE _Pragma("simd")
#define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
#define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
#define SIMDE__VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a))
#elif defined(__INTEL_COMPILER)
#define SIMDE__VECTORIZE _Pragma("simd")
#define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
#define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
#define SIMDE__VECTORIZE_ALIGNED(a)
#elif defined(__clang__)
#define SIMDE__VECTORIZE _Pragma("clang loop vectorize(enable)")
#define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l))
#define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
#define SIMDE__VECTORIZE_ALIGNED(a)
#elif HEDLEY_GCC_VERSION_CHECK(4, 9, 0)
#define SIMDE__VECTORIZE _Pragma("GCC ivdep")
#define SIMDE__VECTORIZE_SAFELEN(l) SIMDE__VECTORIZE
#define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
#define SIMDE__VECTORIZE_ALIGNED(a)
#elif HEDLEY_CRAY_VERSION_CHECK(5, 0, 0)
#define SIMDE__VECTORIZE _Pragma("_CRI ivdep")
#define SIMDE__VECTORIZE_SAFELEN(l) SIMDE__VECTORIZE
#define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
#define SIMDE__VECTORIZE_ALIGNED(a)
#else
#define SIMDE__VECTORIZE
#define SIMDE__VECTORIZE_SAFELEN(l)
#define SIMDE__VECTORIZE_REDUCTION(r)
#define SIMDE__VECTORIZE_ALIGNED(a)
#endif
#if HEDLEY_GCC_HAS_ATTRIBUTE(unused, 3, 1, 0)
#define SIMDE__UNUSED __attribute__((__unused__))
#else
#define SIMDE__UNUSED
#endif
#if HEDLEY_GCC_HAS_ATTRIBUTE(artificial, 4, 3, 0)
#define SIMDE__ARTIFICIAL __attribute__((__artificial__))
#else
#define SIMDE__ARTIFICIAL
#endif
/* Intended for checking coverage, you should never use this in
production. */
#if defined(SIMDE_NO_INLINE)
#define SIMDE__FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE SIMDE__UNUSED static
#else
#define SIMDE__FUNCTION_ATTRIBUTES HEDLEY_INLINE SIMDE__ARTIFICIAL static
#endif
#if defined(_MSC_VER)
#define SIMDE__BEGIN_DECLS \
HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable : 4996 4204)) \
HEDLEY_BEGIN_C_DECLS
#define SIMDE__END_DECLS HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS
#else
#define SIMDE__BEGIN_DECLS HEDLEY_BEGIN_C_DECLS
#define SIMDE__END_DECLS HEDLEY_END_C_DECLS
#endif
#if defined(__SIZEOF_INT128__)
#define SIMDE__HAVE_INT128
typedef __int128 simde_int128;
typedef unsigned __int128 simde_uint128;
#endif
/* TODO: we should at least make an attempt to detect the correct
types for simde_float32/float64 instead of just assuming float and
double. */
#if !defined(SIMDE_FLOAT32_TYPE)
#define SIMDE_FLOAT32_TYPE float
#define SIMDE_FLOAT32_C(value) value##f
#else
#define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE)value)
#endif
typedef SIMDE_FLOAT32_TYPE simde_float32;
HEDLEY_STATIC_ASSERT(sizeof(simde_float32) == 4,
"Unable to find 32-bit floating-point type.");
#if !defined(SIMDE_FLOAT64_TYPE)
#define SIMDE_FLOAT64_TYPE double
#define SIMDE_FLOAT64_C(value) value
#else
#define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT64_TYPE)value)
#endif
typedef SIMDE_FLOAT64_TYPE simde_float64;
HEDLEY_STATIC_ASSERT(sizeof(simde_float64) == 8,
"Unable to find 64-bit floating-point type.");
/* Whether to assume that the compiler can auto-vectorize reasonably
well. This will cause SIMDe to attempt to compose vector
operations using more simple vector operations instead of minimize
serial work.
As an example, consider the _mm_add_ss(a, b) function from SSE,
which returns { a0 + b0, a1, a2, a3 }. This pattern is repeated
for other operations (sub, mul, etc.).
The naïve implementation would result in loading a0 and b0, adding
them into a temporary variable, then splicing that value into a new
vector with the remaining elements from a.
On platforms which support vectorization, it's generally faster to
simply perform the operation on the entire vector to avoid having
to move data between SIMD registers and non-SIMD registers.
Basically, instead of the temporary variable being (a0 + b0) it
would be a vector of (a + b), which is then combined with a to form
the result.
By default, SIMDe will prefer the pure-vector versions if we detect
a vector ISA extension, but this can be overridden by defining
SIMDE_NO_ASSUME_VECTORIZATION. You can also define
SIMDE_ASSUME_VECTORIZATION if you want to force SIMDe to use the
vectorized version. */
#if !defined(SIMDE_NO_ASSUME_VECTORIZATION) && \
!defined(SIMDE_ASSUME_VECTORIZATION)
#if defined(__SSE__) || defined(__ARM_NEON) || defined(__mips_msa) || \
defined(__ALTIVEC__)
#define SIMDE_ASSUME_VECTORIZATION
#endif
#endif
/* GCC and clang have built-in functions to handle shuffling of
vectors, but the implementations are slightly different. This
macro is just an abstraction over them. Note that elem_size is in
bits but vec_size is in bytes. */
#if HEDLEY_CLANG_HAS_BUILTIN(__builtin_shufflevector)
#define SIMDE__SHUFFLE_VECTOR(elem_size, vec_size, a, b, ...) \
__builtin_shufflevector(a, b, __VA_ARGS__)
#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle, 4, 7, 0) && \
!defined(__INTEL_COMPILER)
#define SIMDE__SHUFFLE_VECTOR(elem_size, vec_size, a, b, ...) \
__builtin_shuffle(a, b, \
(int##elem_size##_t __attribute__( \
(__vector_size__(vec_size)))){__VA_ARGS__})
#endif
/* Some algorithms are iterative, and fewer iterations means less
accuracy. Lower values here will result in faster, but less
accurate, calculations for some functions. */
#if !defined(SIMDE_ACCURACY_ITERS)
#define SIMDE_ACCURACY_ITERS 2
#endif
/* This will probably move into Hedley at some point, but I'd like to
more thoroughly check for other compilers which define __GNUC__
first. */
#if defined(SIMDE__REALLY_GCC)
#undef SIMDE__REALLY_GCC
#endif
#if !defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
#define SIMDE__REALLY_GCC 0
#else
#define SIMDE__REALLY_GCC 1
#endif
#if defined(SIMDE__ASSUME_ALIGNED)
#undef SIMDE__ASSUME_ALIGNED
#endif
#if HEDLEY_INTEL_VERSION_CHECK(9, 0, 0)
#define SIMDE__ASSUME_ALIGNED(ptr, align) __assume_aligned(ptr, align)
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0)
#define SIMDE__ASSUME_ALIGNED(ptr, align) \
__assume((((char *)ptr) - ((char *)0)) % (align) == 0)
#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_assume_aligned, 4, 7, 0)
#define SIMDE__ASSUME_ALIGNED(ptr, align) \
(ptr = (__typeof__(ptr))__builtin_assume_aligned((ptr), align))
#elif HEDLEY_CLANG_HAS_BUILTIN(__builtin_assume)
#define SIMDE__ASSUME_ALIGNED(ptr, align) \
__builtin_assume((((char *)ptr) - ((char *)0)) % (align) == 0)
#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_unreachable, 4, 5, 0)
#define SIMDE__ASSUME_ALIGNED(ptr, align) \
((((char *)ptr) - ((char *)0)) % (align) == 0) \
? (1) \
: (__builtin_unreachable(), 0)
#else
#define SIMDE__ASSUME_ALIGNED(ptr, align)
#endif
/* Sometimes we run into problems with specific versions of compilers
which make the native versions unusable for us. Often this is due
to missing functions, sometimes buggy implementations, etc. These
macros are how we check for specific bugs. As they are fixed we'll
start only defining them for problematic compiler versions. */
#if !defined(SIMDE_IGNORE_COMPILER_BUGS)
#if SIMDE__REALLY_GCC
#if !HEDLEY_GCC_VERSION_CHECK(4, 9, 0)
#define SIMDE_BUG_GCC_REV_208793
#endif
#if !HEDLEY_GCC_VERSION_CHECK(5, 0, 0)
#define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */
#endif
#if !HEDLEY_GCC_VERSION_CHECK(4, 6, 0)
#define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */
#endif
#endif
#if defined(__EMSCRIPTEN__)
#define SIMDE_BUG_EMSCRIPTEN_MISSING_IMPL /* Placeholder for (as yet) unfiled issues. */
#define SIMDE_BUG_EMSCRIPTEN_5242
#endif
#endif
#endif /* !defined(SIMDE_COMMON_H) */

2591
libobs/util/aarch/sse.h Normal file

File diff suppressed because it is too large Load Diff

4197
libobs/util/aarch/sse2.h Normal file

File diff suppressed because it is too large Load Diff

66
libobs/util/sse-intrin.h Normal file
View File

@ -0,0 +1,66 @@
/******************************************************************************
Copyright (C) 2019 by Peter Geis <pgwipeout@gmail.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
#pragma once
#ifdef __aarch64__
#include "aarch/sse2.h"
#define __m128 simde__m128
#define _mm_setzero_ps simde_mm_setzero_ps
#define _mm_set_ps simde_mm_set_ps
#define _mm_add_ps simde_mm_add_ps
#define _mm_sub_ps simde_mm_sub_ps
#define _mm_mul_ps simde_mm_mul_ps
#define _mm_div_ps simde_mm_div_ps
#define _mm_set1_ps simde_mm_set1_ps
#define _mm_movehl_ps simde_mm_movehl_ps
#define _mm_shuffle_ps simde_mm_shuffle_ps
#define _mm_min_ps simde_mm_min_ps
#define _mm_max_ps simde_mm_max_ps
#define _mm_movelh_ps simde_mm_movelh_ps
#define _mm_unpacklo_ps simde_mm_unpacklo_ps
#define _mm_unpackhi_ps simde_mm_unpackhi_ps
#define _mm_load_ps simde_mm_load_ps
#define _mm_andnot_ps simde_mm_andnot_ps
#define _mm_storeu_ps simde_mm_storeu_ps
#define _mm_loadu_ps simde_mm_loadu_ps
#define __m128i simde__m128i
#define _mm_set1_epi32 simde_mm_set1_epi32
#define _mm_set1_epi16 simde_mm_set1_epi16
#define _mm_load_si128 simde_mm_load_si128
#define _mm_packs_epi32 simde_mm_packs_epi32
#define _mm_srli_si128 simde_mm_srli_si128
#define _mm_and_si128 simde_mm_and_si128
#define _mm_packus_epi16 simde_mm_packus_epi16
#define _mm_add_epi64 simde_mm_add_epi64
#define _mm_shuffle_epi32 simde_mm_shuffle_epi32
#define _mm_srai_epi16 simde_mm_srai_epi16
#define _mm_shufflelo_epi16 simde_mm_shufflelo_epi16
#define _mm_storeu_si128 simde_mm_storeu_si128
#define _MM_SHUFFLE SIMDE_MM_SHUFFLE
#define _MM_TRANSPOSE4_PS SIMDE_MM_TRANSPOSE4_PS
#else
#include <xmmintrin.h>
#include <emmintrin.h>
#endif

View File

@ -1,6 +1,6 @@
#include "audio-repack.h"
#include <emmintrin.h>
#include <util/sse-intrin.h>
int check_buffer(struct audio_repack *repack, uint32_t frame_count)
{

View File

@ -5,7 +5,7 @@
#include <util/threading.h>
#include <windows.h>
#include <dxgi.h>
#include <emmintrin.h>
#include <util/sse-intrin.h>
#include <ipc-util/pipe.h>
#include "obfuscate.h"
#include "inject-library.h"