Avoid compiling different sources for different targets

Simplifies configuration and fixes a potential problem with inline functions.
Inline functions that fail to inline will have a callable body generated. If
such a body is generated with the SSE4 source, for example, it can generate
SSE4 instructions. Calls for that function in other sources can then end up
calling the SSE4-generated body outside of any CPU capability check.
This commit is contained in:
Chris Robinson 2020-12-13 16:49:46 -08:00
parent 225d42538d
commit 783904e414
5 changed files with 23 additions and 32 deletions

View File

@ -344,8 +344,6 @@ endif()
set(SSE2_SWITCH "") set(SSE2_SWITCH "")
set(SSE3_SWITCH "")
set(SSE4_1_SWITCH "")
set(FPU_NEON_SWITCH "") set(FPU_NEON_SWITCH "")
set(OLD_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) set(OLD_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
@ -356,14 +354,6 @@ endif()
check_c_compiler_flag(-msse2 HAVE_MSSE2_SWITCH) check_c_compiler_flag(-msse2 HAVE_MSSE2_SWITCH)
if(HAVE_MSSE2_SWITCH) if(HAVE_MSSE2_SWITCH)
set(SSE2_SWITCH "-msse2") set(SSE2_SWITCH "-msse2")
check_c_compiler_flag(-msse3 HAVE_MSSE3_SWITCH)
if(HAVE_MSSE3_SWITCH)
set(SSE3_SWITCH "-msse3")
check_c_compiler_flag(-msse4.1 HAVE_MSSE4_1_SWITCH)
if(HAVE_MSSE4_1_SWITCH)
set(SSE4_1_SWITCH "-msse4.1")
endif()
endif()
endif() endif()
check_c_compiler_flag(-mfpu=neon HAVE_MFPU_NEON_SWITCH) check_c_compiler_flag(-mfpu=neon HAVE_MFPU_NEON_SWITCH)
if(HAVE_MFPU_NEON_SWITCH) if(HAVE_MFPU_NEON_SWITCH)
@ -372,11 +362,11 @@ endif()
set(CMAKE_REQUIRED_FLAGS ${OLD_REQUIRED_FLAGS}) set(CMAKE_REQUIRED_FLAGS ${OLD_REQUIRED_FLAGS})
unset(OLD_REQUIRED_FLAGS) unset(OLD_REQUIRED_FLAGS)
check_include_file(xmmintrin.h HAVE_XMMINTRIN_H ${SSE2_SWITCH}) check_include_file(xmmintrin.h HAVE_XMMINTRIN_H)
check_include_file(emmintrin.h HAVE_EMMINTRIN_H ${SSE2_SWITCH}) check_include_file(emmintrin.h HAVE_EMMINTRIN_H)
check_include_file(pmmintrin.h HAVE_PMMINTRIN_H ${SSE3_SWITCH}) check_include_file(pmmintrin.h HAVE_PMMINTRIN_H)
check_include_file(smmintrin.h HAVE_SMMINTRIN_H ${SSE4_1_SWITCH}) check_include_file(smmintrin.h HAVE_SMMINTRIN_H)
check_include_file(arm_neon.h HAVE_ARM_NEON_H ${FPU_NEON_SWITCH}) check_include_file(arm_neon.h HAVE_ARM_NEON_H)
set(SSE_FLAGS ) set(SSE_FLAGS )
set(FPMATH_SET "0") set(FPMATH_SET "0")
@ -726,10 +716,6 @@ if(HAVE_XMMINTRIN_H AND HAVE_EMMINTRIN_H)
set(HAVE_SSE 1) set(HAVE_SSE 1)
set(HAVE_SSE2 1) set(HAVE_SSE2 1)
set(CORE_OBJS ${CORE_OBJS} core/mixer/mixer_sse.cpp core/mixer/mixer_sse2.cpp) set(CORE_OBJS ${CORE_OBJS} core/mixer/mixer_sse.cpp core/mixer/mixer_sse2.cpp)
if(SSE2_SWITCH)
set_source_files_properties(core/mixer/mixer_sse.cpp core/mixer/mixer_sse2.cpp
PROPERTIES COMPILE_FLAGS "${SSE2_SWITCH}")
endif()
set(CPU_EXTS "${CPU_EXTS}, SSE, SSE2") set(CPU_EXTS "${CPU_EXTS}, SSE, SSE2")
endif() endif()
endif() endif()
@ -741,15 +727,11 @@ if(ALSOFT_REQUIRE_SSE2 AND NOT HAVE_SSE2)
endif() endif()
option(ALSOFT_REQUIRE_SSE3 "Require SSE3 support" OFF) option(ALSOFT_REQUIRE_SSE3 "Require SSE3 support" OFF)
if(HAVE_EMMINTRIN_H) if(HAVE_PMMINTRIN_H)
option(ALSOFT_CPUEXT_SSE3 "Enable SSE3 support" ON) option(ALSOFT_CPUEXT_SSE3 "Enable SSE3 support" ON)
if(HAVE_SSE2 AND ALSOFT_CPUEXT_SSE3) if(HAVE_SSE2 AND ALSOFT_CPUEXT_SSE3)
set(HAVE_SSE3 1) set(HAVE_SSE3 1)
set(CORE_OBJS ${CORE_OBJS} core/mixer/mixer_sse3.cpp) set(CORE_OBJS ${CORE_OBJS} core/mixer/mixer_sse3.cpp)
if(SSE2_SWITCH)
set_source_files_properties(core/mixer/mixer_sse3.cpp PROPERTIES
COMPILE_FLAGS "${SSE3_SWITCH}")
endif()
set(CPU_EXTS "${CPU_EXTS}, SSE3") set(CPU_EXTS "${CPU_EXTS}, SSE3")
endif() endif()
endif() endif()
@ -763,10 +745,6 @@ if(HAVE_SMMINTRIN_H)
if(HAVE_SSE3 AND ALSOFT_CPUEXT_SSE4_1) if(HAVE_SSE3 AND ALSOFT_CPUEXT_SSE4_1)
set(HAVE_SSE4_1 1) set(HAVE_SSE4_1 1)
set(CORE_OBJS ${CORE_OBJS} core/mixer/mixer_sse41.cpp) set(CORE_OBJS ${CORE_OBJS} core/mixer/mixer_sse41.cpp)
if(SSE4_1_SWITCH)
set_source_files_properties(core/mixer/mixer_sse41.cpp PROPERTIES
COMPILE_FLAGS "${SSE4_1_SWITCH}")
endif()
set(CPU_EXTS "${CPU_EXTS}, SSE4.1") set(CPU_EXTS "${CPU_EXTS}, SSE4.1")
endif() endif()
endif() endif()
@ -781,10 +759,6 @@ if(HAVE_ARM_NEON_H)
if(ALSOFT_CPUEXT_NEON) if(ALSOFT_CPUEXT_NEON)
set(HAVE_NEON 1) set(HAVE_NEON 1)
set(CORE_OBJS ${CORE_OBJS} core/mixer/mixer_neon.cpp) set(CORE_OBJS ${CORE_OBJS} core/mixer/mixer_neon.cpp)
if(FPU_NEON_SWITCH)
set_source_files_properties(core/mixer/mixer_neon.cpp PROPERTIES
COMPILE_FLAGS "${FPU_NEON_SWITCH}")
endif()
set(CPU_EXTS "${CPU_EXTS}, Neon") set(CPU_EXTS "${CPU_EXTS}, Neon")
endif() endif()
endif() endif()

View File

@ -16,6 +16,10 @@ struct BSincTag;
struct FastBSincTag; struct FastBSincTag;
#if defined(__GNUC__) && !defined(__clang__) && !defined(__ARM_NEON)
#pragma GCC target("fpu=neon")
#endif
namespace { namespace {
inline float32x4_t set_f4(float l0, float l1, float l2, float l3) inline float32x4_t set_f4(float l0, float l1, float l2, float l3)

View File

@ -15,6 +15,11 @@ struct BSincTag;
struct FastBSincTag; struct FastBSincTag;
/* SSE2 is required for any SSE support. */
#if defined(__GNUC__) && !defined(__clang__) && !defined(__SSE2__)
#pragma GCC target("sse2")
#endif
namespace { namespace {
constexpr uint FracPhaseBitDiff{MixerFracBits - BSincPhaseBits}; constexpr uint FracPhaseBitDiff{MixerFracBits - BSincPhaseBits};

View File

@ -30,6 +30,10 @@ struct SSE2Tag;
struct LerpTag; struct LerpTag;
#if defined(__GNUC__) && !defined(__clang__) && !defined(__SSE2__)
#pragma GCC target("sse2")
#endif
template<> template<>
const float *Resample_<LerpTag,SSE2Tag>(const InterpState*, const float *RESTRICT src, uint frac, const float *Resample_<LerpTag,SSE2Tag>(const InterpState*, const float *RESTRICT src, uint frac,
uint increment, const al::span<float> dst) uint increment, const al::span<float> dst)

View File

@ -31,6 +31,10 @@ struct SSE4Tag;
struct LerpTag; struct LerpTag;
#if defined(__GNUC__) && !defined(__clang__) && !defined(__SSE4_1__)
#pragma GCC target("sse4.1")
#endif
template<> template<>
const float *Resample_<LerpTag,SSE4Tag>(const InterpState*, const float *RESTRICT src, uint frac, const float *Resample_<LerpTag,SSE4Tag>(const InterpState*, const float *RESTRICT src, uint frac,
uint increment, const al::span<float> dst) uint increment, const al::span<float> dst)