diff --git a/al/auxeffectslot.cpp b/al/auxeffectslot.cpp index 4af2acf2..8b31ab80 100644 --- a/al/auxeffectslot.cpp +++ b/al/auxeffectslot.cpp @@ -35,6 +35,7 @@ #include "AL/alc.h" #include "AL/efx.h" +#include "albit.h" #include "alcmain.h" #include "alcontext.h" #include "almalloc.h" @@ -240,7 +241,7 @@ bool EnsureEffectSlots(ALCcontext *context, size_t needed) size_t count{std::accumulate(context->mEffectSlotList.cbegin(), context->mEffectSlotList.cend(), size_t{0}, [](size_t cur, const EffectSlotSubList &sublist) noexcept -> size_t - { return cur + static_cast(PopCount(sublist.FreeMask)); })}; + { return cur + static_cast(al::popcount(sublist.FreeMask)); })}; while(needed > count) { @@ -268,7 +269,7 @@ ALeffectslot *AllocEffectSlot(ALCcontext *context) [](const EffectSlotSubList &entry) noexcept -> bool { return entry.FreeMask != 0; }); auto lidx = static_cast(std::distance(context->mEffectSlotList.begin(), sublist)); - auto slidx = static_cast(CountTrailingZeros(sublist->FreeMask)); + auto slidx = static_cast(al::countr_zero(sublist->FreeMask)); ALeffectslot *slot{::new(sublist->EffectSlots + slidx) ALeffectslot{}}; aluInitEffectPanning(&slot->mSlot, context); @@ -991,7 +992,7 @@ void UpdateAllEffectSlotProps(ALCcontext *context) uint64_t usemask{~sublist.FreeMask}; while(usemask) { - const int idx{CountTrailingZeros(usemask)}; + const int idx{al::countr_zero(usemask)}; ALeffectslot *slot{sublist.EffectSlots + idx}; usemask &= ~(1_u64 << idx); @@ -1007,7 +1008,7 @@ EffectSlotSubList::~EffectSlotSubList() uint64_t usemask{~FreeMask}; while(usemask) { - const ALsizei idx{CountTrailingZeros(usemask)}; + const int idx{al::countr_zero(usemask)}; al::destroy_at(EffectSlots+idx); usemask &= ~(1_u64 << idx); } diff --git a/al/buffer.cpp b/al/buffer.cpp index cdbedf70..14fae1ee 100644 --- a/al/buffer.cpp +++ b/al/buffer.cpp @@ -41,6 +41,7 @@ #include "AL/alc.h" #include "AL/alext.h" +#include "albit.h" #include "albyte.h" #include "alcmain.h" #include "alcontext.h" @@ -326,7 +327,7 @@ bool EnsureBuffers(ALCdevice *device, size_t needed) { size_t count{std::accumulate(device->BufferList.cbegin(), device->BufferList.cend(), size_t{0}, [](size_t cur, const BufferSubList &sublist) noexcept -> size_t - { return cur + static_cast(PopCount(sublist.FreeMask)); })}; + { return cur + static_cast(al::popcount(sublist.FreeMask)); })}; while(needed > count) { @@ -355,7 +356,7 @@ ALbuffer *AllocBuffer(ALCdevice *device) ); auto lidx = static_cast(std::distance(device->BufferList.begin(), sublist)); - auto slidx = static_cast(CountTrailingZeros(sublist->FreeMask)); + auto slidx = static_cast(al::countr_zero(sublist->FreeMask)); ALbuffer *buffer{::new (sublist->Buffers + slidx) ALbuffer{}}; @@ -1621,7 +1622,7 @@ BufferSubList::~BufferSubList() uint64_t usemask{~FreeMask}; while(usemask) { - const ALsizei idx{CountTrailingZeros(usemask)}; + const int idx{al::countr_zero(usemask)}; al::destroy_at(Buffers+idx); usemask &= ~(1_u64 << idx); } diff --git a/al/effect.cpp b/al/effect.cpp index 02b31fac..93aa5547 100644 --- a/al/effect.cpp +++ b/al/effect.cpp @@ -38,6 +38,7 @@ #include "AL/efx-presets.h" #include "AL/efx.h" +#include "albit.h" #include "alcmain.h" #include "alcontext.h" #include "almalloc.h" @@ -155,7 +156,7 @@ bool EnsureEffects(ALCdevice *device, size_t needed) { size_t count{std::accumulate(device->EffectList.cbegin(), device->EffectList.cend(), size_t{0}, [](size_t cur, const EffectSubList &sublist) noexcept -> size_t - { return cur + static_cast(PopCount(sublist.FreeMask)); })}; + { return cur + static_cast(al::popcount(sublist.FreeMask)); })}; while(needed > count) { @@ -183,7 +184,7 @@ ALeffect *AllocEffect(ALCdevice *device) { return entry.FreeMask != 0; } ); auto lidx = static_cast(std::distance(device->EffectList.begin(), sublist)); - auto slidx = static_cast(CountTrailingZeros(sublist->FreeMask)); + auto slidx = static_cast(al::countr_zero(sublist->FreeMask)); ALeffect *effect{::new (sublist->Effects + slidx) ALeffect{}}; InitEffectParams(effect, AL_EFFECT_NULL); @@ -543,7 +544,7 @@ EffectSubList::~EffectSubList() uint64_t usemask{~FreeMask}; while(usemask) { - const ALsizei idx{CountTrailingZeros(usemask)}; + const int idx{al::countr_zero(usemask)}; al::destroy_at(Effects+idx); usemask &= ~(1_u64 << idx); } diff --git a/al/filter.cpp b/al/filter.cpp index e549b1c9..0bcfe408 100644 --- a/al/filter.cpp +++ b/al/filter.cpp @@ -36,6 +36,7 @@ #include "AL/alc.h" #include "AL/efx.h" +#include "albit.h" #include "alcmain.h" #include "alcontext.h" #include "almalloc.h" @@ -324,7 +325,7 @@ bool EnsureFilters(ALCdevice *device, size_t needed) { size_t count{std::accumulate(device->FilterList.cbegin(), device->FilterList.cend(), size_t{0}, [](size_t cur, const FilterSubList &sublist) noexcept -> size_t - { return cur + static_cast(PopCount(sublist.FreeMask)); })}; + { return cur + static_cast(al::popcount(sublist.FreeMask)); })}; while(needed > count) { @@ -353,7 +354,7 @@ ALfilter *AllocFilter(ALCdevice *device) { return entry.FreeMask != 0; } ); auto lidx = static_cast(std::distance(device->FilterList.begin(), sublist)); - auto slidx = static_cast(CountTrailingZeros(sublist->FreeMask)); + auto slidx = static_cast(al::countr_zero(sublist->FreeMask)); ALfilter *filter{::new(sublist->Filters + slidx) ALfilter{}}; InitFilterParams(filter, AL_FILTER_NULL); @@ -704,7 +705,7 @@ FilterSubList::~FilterSubList() uint64_t usemask{~FreeMask}; while(usemask) { - const ALsizei idx{CountTrailingZeros(usemask)}; + const int idx{al::countr_zero(usemask)}; al::destroy_at(Filters+idx); usemask &= ~(1_u64 << idx); } diff --git a/al/source.cpp b/al/source.cpp index 770e3778..a4e59e59 100644 --- a/al/source.cpp +++ b/al/source.cpp @@ -45,6 +45,7 @@ #include "AL/alext.h" #include "AL/efx.h" +#include "albit.h" #include "alcmain.h" #include "alcontext.h" #include "almalloc.h" @@ -684,7 +685,7 @@ bool EnsureSources(ALCcontext *context, size_t needed) size_t count{std::accumulate(context->mSourceList.cbegin(), context->mSourceList.cend(), size_t{0}, [](size_t cur, const SourceSubList &sublist) noexcept -> size_t - { return cur + static_cast(PopCount(sublist.FreeMask)); })}; + { return cur + static_cast(al::popcount(sublist.FreeMask)); })}; while(needed > count) { @@ -712,7 +713,7 @@ ALsource *AllocSource(ALCcontext *context) { return entry.FreeMask != 0; } ); auto lidx = static_cast(std::distance(context->mSourceList.begin(), sublist)); - auto slidx = static_cast(CountTrailingZeros(sublist->FreeMask)); + auto slidx = static_cast(al::countr_zero(sublist->FreeMask)); ALsource *source{::new(sublist->Sources + slidx) ALsource{}}; @@ -3484,7 +3485,7 @@ SourceSubList::~SourceSubList() uint64_t usemask{~FreeMask}; while(usemask) { - const ALsizei idx{CountTrailingZeros(usemask)}; + const int idx{al::countr_zero(usemask)}; al::destroy_at(Sources+idx); usemask &= ~(1_u64 << idx); } diff --git a/alc/alc.cpp b/alc/alc.cpp index e7c85525..02aaaa18 100644 --- a/alc/alc.cpp +++ b/alc/alc.cpp @@ -64,6 +64,7 @@ #include "al/filter.h" #include "al/listener.h" #include "al/source.h" +#include "albit.h" #include "alcmain.h" #include "albyte.h" #include "alconfig.h" @@ -2128,7 +2129,7 @@ static ALCenum UpdateDeviceParams(ALCdevice *device, const int *attrList) uint64_t usemask{~sublist.FreeMask}; while(usemask) { - const int idx{CountTrailingZeros(usemask)}; + const int idx{al::countr_zero(usemask)}; ALeffectslot *slot{sublist.EffectSlots + idx}; usemask &= ~(1_u64 << idx); @@ -2149,7 +2150,7 @@ static ALCenum UpdateDeviceParams(ALCdevice *device, const int *attrList) uint64_t usemask{~sublist.FreeMask}; while(usemask) { - const int idx{CountTrailingZeros(usemask)}; + const int idx{al::countr_zero(usemask)}; ALsource *source{sublist.Sources + idx}; usemask &= ~(1_u64 << idx); @@ -2292,19 +2293,19 @@ ALCdevice::~ALCdevice() size_t count{std::accumulate(BufferList.cbegin(), BufferList.cend(), size_t{0u}, [](size_t cur, const BufferSubList &sublist) noexcept -> size_t - { return cur + static_cast(PopCount(~sublist.FreeMask)); })}; + { return cur + static_cast(al::popcount(~sublist.FreeMask)); })}; if(count > 0) WARN("%zu Buffer%s not deleted\n", count, (count==1)?"":"s"); count = std::accumulate(EffectList.cbegin(), EffectList.cend(), size_t{0u}, [](size_t cur, const EffectSubList &sublist) noexcept -> size_t - { return cur + static_cast(PopCount(~sublist.FreeMask)); }); + { return cur + static_cast(al::popcount(~sublist.FreeMask)); }); if(count > 0) WARN("%zu Effect%s not deleted\n", count, (count==1)?"":"s"); count = std::accumulate(FilterList.cbegin(), FilterList.cend(), size_t{0u}, [](size_t cur, const FilterSubList &sublist) noexcept -> size_t - { return cur + static_cast(PopCount(~sublist.FreeMask)); }); + { return cur + static_cast(al::popcount(~sublist.FreeMask)); }); if(count > 0) WARN("%zu Filter%s not deleted\n", count, (count==1)?"":"s"); @@ -2356,7 +2357,7 @@ ALCcontext::~ALCcontext() count = std::accumulate(mSourceList.cbegin(), mSourceList.cend(), size_t{0u}, [](size_t cur, const SourceSubList &sublist) noexcept -> size_t - { return cur + static_cast(PopCount(~sublist.FreeMask)); }); + { return cur + static_cast(al::popcount(~sublist.FreeMask)); }); if(count > 0) WARN("%zu Source%s not deleted\n", count, (count==1)?"":"s"); mSourceList.clear(); @@ -2381,7 +2382,7 @@ ALCcontext::~ALCcontext() count = std::accumulate(mEffectSlotList.cbegin(), mEffectSlotList.cend(), size_t{0u}, [](size_t cur, const EffectSlotSubList &sublist) noexcept -> size_t - { return cur + static_cast(PopCount(~sublist.FreeMask)); }); + { return cur + static_cast(al::popcount(~sublist.FreeMask)); }); if(count > 0) WARN("%zu AuxiliaryEffectSlot%s not deleted\n", count, (count==1)?"":"s"); mEffectSlotList.clear(); diff --git a/alc/backends/base.cpp b/alc/backends/base.cpp index 8642f40c..c4a4abeb 100644 --- a/alc/backends/base.cpp +++ b/alc/backends/base.cpp @@ -12,6 +12,7 @@ #include #endif +#include "albit.h" #include "alcmain.h" #include "alnumeric.h" #include "aloptional.h" @@ -180,7 +181,7 @@ void BackendBase::setChannelOrderFromWFXMask(uint chanmask) uint idx{0}; while(chanmask) { - const int bit{CountTrailingZeros(chanmask)}; + const int bit{al::countr_zero(chanmask)}; const uint mask{1u << bit}; chanmask &= ~mask; diff --git a/alc/backends/wasapi.cpp b/alc/backends/wasapi.cpp index 1d1b93c4..0786a7d7 100644 --- a/alc/backends/wasapi.cpp +++ b/alc/backends/wasapi.cpp @@ -55,6 +55,7 @@ #include #include +#include "albit.h" #include "alcmain.h" #include "alu.h" #include "compat.h" @@ -1643,7 +1644,7 @@ HRESULT WasapiCapture::resetProxy() if((InputType.dwChannelMask&SPEAKER_LOW_FREQUENCY)) { constexpr auto lfemask = MaskFromTopBits(SPEAKER_LOW_FREQUENCY); - const int lfeidx{PopCount(uint32_t{InputType.dwChannelMask&lfemask}) - 1}; + const int lfeidx{al::popcount(InputType.dwChannelMask&lfemask) - 1}; chanmask &= ~(1u << lfeidx); } diff --git a/alc/converter.cpp b/alc/converter.cpp index 5016b373..f7d4fc46 100644 --- a/alc/converter.cpp +++ b/alc/converter.cpp @@ -9,6 +9,7 @@ #include #include +#include "albit.h" #include "albyte.h" #include "alnumeric.h" #include "core/fpu_ctrl.h" @@ -338,7 +339,7 @@ void ChannelConverter::convert(const void *src, float *dst, uint frames) const { if(mDstChans == DevFmtMono) { - const float scale{std::sqrt(1.0f / static_cast(PopCount(mChanMask)))}; + const float scale{std::sqrt(1.0f / static_cast(al::popcount(mChanMask)))}; switch(mSrcType) { #define HANDLE_FMT(T) case T: Multi2Mono(mChanMask, mSrcStep, scale, dst, src, frames); break diff --git a/common/albit.h b/common/albit.h index 225c0b89..c54bb31a 100644 --- a/common/albit.h +++ b/common/albit.h @@ -1,6 +1,12 @@ #ifndef AL_BIT_H #define AL_BIT_H +#include +#include +#if !defined(__GNUC__) && (defined(_WIN32) || defined(_WIN64)) +#include +#endif + namespace al { #ifdef __BYTE_ORDER__ @@ -30,6 +36,108 @@ enum class endian { }; #endif + +/* Define popcount (population count/count 1 bits) and countr_zero (count + * trailing zero bits, starting from the lsb) methods, for various integer + * types. + */ +#ifdef __GNUC__ + +namespace detail_ { + inline int popcount(unsigned long long val) noexcept { return __builtin_popcountll(val); } + inline int popcount(unsigned long val) noexcept { return __builtin_popcountl(val); } + inline int popcount(unsigned int val) noexcept { return __builtin_popcount(val); } + + inline int countr_zero(unsigned long long val) noexcept { return __builtin_ctzll(val); } + inline int countr_zero(unsigned long val) noexcept { return __builtin_ctzl(val); } + inline int countr_zero(unsigned int val) noexcept { return __builtin_ctz(val); } +} // namespace detail_ + +template +inline std::enable_if_t::value && std::is_unsigned::value, +int> popcount(T v) noexcept { return detail_::popcount(v); } + +template +inline std::enable_if_t::value && std::is_unsigned::value, +int> countr_zero(T val) noexcept +{ return val ? detail_::countr_zero(val) : std::numeric_limits::digits; } + +#else + +/* There be black magics here. The popcount method is derived from + * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + * while the ctz-utilizing-popcount algorithm is shown here + * http://www.hackersdelight.org/hdcodetxt/ntz.c.txt + * as the ntz2 variant. These likely aren't the most efficient methods, but + * they're good enough if the GCC built-ins aren't available. + */ +namespace detail_ { + template + constexpr T repbits(unsigned char bits) noexcept + { + T ret{bits}; + for(size_t i{1};i < sizeof(T);++i) + ret = (ret<<8) | bits; + return ret; + } +} // namespace detail_ + +template +constexpr std::enable_if_t::value && std::is_unsigned::value, +int> popcount(T v) noexcept +{ + constexpr T m55{detail_::repbits(0x55)}; + constexpr T m33{detail_::repbits(0x33)}; + constexpr T m0f{detail_::repbits(0x0f)}; + constexpr T m01{detail_::repbits(0x01)}; + + v = v - ((v >> 1) & m55); + v = (v & m33) + ((v >> 2) & m33); + v = (v + (v >> 4)) & m0f; + return static_cast((v * m01) >> ((sizeof(T)-1)*8)); +} + +#if defined(_WIN64) + +template +inline std::enable_if_t::value && std::is_unsigned::value, +int> countr_zero(T v) +{ + unsigned long idx{std::numeric_limits::digits}; + if /*constexpr*/(std::numeric_limits::digits <= 32) + _BitScanForward(&idx, static_cast(v)); + else // std::numeric_limits::digits > 32 + _BitScanForward64(&idx, v); + return static_cast(idx); +} + +#elif defined(_WIN32) + +template +inline std::enable_if_t::value && std::is_unsigned::value, +int> countr_zero(T v) +{ + unsigned long idx{std::numeric_limits::digits}; + if /*constexpr*/(std::numeric_limits::digits <= 32) + _BitScanForward(&idx, static_cast(v)); + else if(!_BitScanForward(&idx, static_cast(v))) + { + if(_BitScanForward(&idx, static_cast(v>>32))) + idx += 32; + } + return static_cast(idx); +} + +#else + +template +constexpr std::enable_if_t::value && std::is_unsigned::value, +int> countr_zero(T value) +{ return popcount(static_cast(~value & (value - 1))); } + +#endif +#endif + } // namespace al #endif /* AL_BIT_H */ diff --git a/common/alcomplex.cpp b/common/alcomplex.cpp index 8a823b01..de10ede2 100644 --- a/common/alcomplex.cpp +++ b/common/alcomplex.cpp @@ -8,6 +8,7 @@ #include #include +#include "albit.h" #include "alnumeric.h" #include "math_defs.h" @@ -18,7 +19,7 @@ void complex_fft(const al::span> buffer, const double sign) /* Get the number of bits used for indexing. Simplifies bit-reversal and * the main loop count. */ - const size_t log2_size{static_cast(CountTrailingZeros(fftsize))}; + const size_t log2_size{static_cast(al::countr_zero(fftsize))}; /* Bit-reversal permutation applied to a sequence of fftsize items. */ for(size_t idx{1u};idx < fftsize-1;++idx) diff --git a/common/alnumeric.h b/common/alnumeric.h index b9384a7f..c16f3e62 100644 --- a/common/alnumeric.h +++ b/common/alnumeric.h @@ -103,92 +103,6 @@ inline size_t RoundUp(size_t value, size_t r) noexcept } -/* Define CountTrailingZeros (count trailing zero bits, starting from the lsb) - * and PopCount (population count/count 1 bits) methods, for 32- and 64-bit - * integers. The CountTrailingZeros results are *UNDEFINED* if the value is 0. - */ -#ifdef __GNUC__ - -/* Define variations for unsigned (long (long)) int, since we don't know what - * uint32/64_t are typedef'd to. - */ -inline int PopCount(unsigned long long val) { return __builtin_popcountll(val); } -inline int PopCount(unsigned long val) { return __builtin_popcountl(val); } -inline int PopCount(unsigned int val) { return __builtin_popcount(val); } - -inline int CountTrailingZeros(unsigned long long val) { return __builtin_ctzll(val); } -inline int CountTrailingZeros(unsigned long val) { return __builtin_ctzl(val); } -inline int CountTrailingZeros(unsigned int val) { return __builtin_ctz(val); } - -#else - -/* There be black magics here. The popcnt method is derived from - * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel - * while the ctz-utilizing-popcnt algorithm is shown here - * http://www.hackersdelight.org/hdcodetxt/ntz.c.txt - * as the ntz2 variant. These likely aren't the most efficient methods, but - * they're good enough if the GCC built-ins aren't available. - */ -inline int PopCount(uint32_t v) -{ - v = v - ((v >> 1) & 0x55555555u); - v = (v & 0x33333333u) + ((v >> 2) & 0x33333333u); - v = (v + (v >> 4)) & 0x0f0f0f0fu; - return static_cast((v * 0x01010101u) >> 24); -} -inline int PopCount(uint64_t v) -{ - v = v - ((v >> 1) & 0x5555555555555555_u64); - v = (v & 0x3333333333333333_u64) + ((v >> 2) & 0x3333333333333333_u64); - v = (v + (v >> 4)) & 0x0f0f0f0f0f0f0f0f_u64; - return static_cast((v * 0x0101010101010101_u64) >> 56); -} - -#if defined(_WIN64) - -inline int CountTrailingZeros(uint32_t v) -{ - unsigned long idx = 32; - _BitScanForward(&idx, v); - return static_cast(idx); -} -inline int CountTrailingZeros(uint64_t v) -{ - unsigned long idx = 64; - _BitScanForward64(&idx, v); - return static_cast(idx); -} - -#elif defined(_WIN32) - -inline int CountTrailingZeros(uint32_t v) -{ - unsigned long idx = 32; - _BitScanForward(&idx, v); - return static_cast(idx); -} -inline int CountTrailingZeros(uint64_t v) -{ - unsigned long idx = 64; - if(!_BitScanForward(&idx, static_cast(v&0xffffffff))) - { - if(_BitScanForward(&idx, static_cast(v>>32))) - idx += 32; - } - return static_cast(idx); -} - -#else - -inline int CountTrailingZeros(uint32_t value) -{ return PopCount(~value & (value - 1)); } -inline int CountTrailingZeros(uint64_t value) -{ return PopCount(~value & (value - 1)); } - -#endif -#endif - - /** * Fast float-to-int conversion. No particular rounding mode is assumed; the * IEEE-754 default is round-to-nearest with ties-to-even, though an app could