Make the resampler increment unsigned

2019-09-13 03:25:13 -07:00 · 2019-09-13 03:25:13 -07:00 · c1690178ec
parent 5f862a5b49
commit c1690178ec
9 changed files with 77 additions and 82 deletions
--- a/alc/alu.h
+++ b/alc/alu.h
@ -81,7 +81,7 @@ union InterpState {
 };

 using ResamplerFunc = const ALfloat*(*)(const InterpState *state, const ALfloat *RESTRICT src,
-    ALuint frac, ALint increment, const al::span<float> dst);
+    ALuint frac, ALuint increment, const al::span<float> dst);

 void BsincPrepare(const ALuint increment, BsincState *state, const BSincTable *table);

--- a/alc/converter.cpp
+++ b/alc/converter.cpp
@ -299,7 +299,7 @@ ALuint SampleConverter::convert(const ALvoid **src, ALuint *srcframes, ALvoid *d

            /* Now resample, and store the result in the output buffer. */
            const ALfloat *ResampledData{mResample(&mState, SrcData+MAX_RESAMPLE_PADDING,
-                DataPosFrac, static_cast<ALint>(increment), {DstData, DstSize})};
+                DataPosFrac, increment, {DstData, DstSize})};

            StoreSamples(DstSamples, ResampledData, mChan.size(), mDstType, DstSize);
        }
--- a/alc/mixer/defs.h
+++ b/alc/mixer/defs.h
@ -28,7 +28,7 @@ enum ResampleType {

 template<ResampleType TypeTag, InstSetType InstTag>
 const ALfloat *Resample_(const InterpState *state, const ALfloat *RESTRICT src, ALuint frac,
-    ALint increment, const al::span<float> dst);
+    ALuint increment, const al::span<float> dst);

 template<InstSetType InstTag>
 void Mix_(const al::span<const float> InSamples, const al::span<FloatBufferLine> OutBuffer,
@ -45,13 +45,14 @@ template<InstSetType InstTag>
 void MixDirectHrtf_(FloatBufferLine &LeftOut, FloatBufferLine &RightOut, const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, DirectHrtfState *State, const size_t BufferSize);

 /* Vectorized resampler helpers */
-inline void InitiatePositionArrays(ALsizei frac, ALint increment, ALsizei *RESTRICT frac_arr, ALsizei *RESTRICT pos_arr, ALsizei size)
+inline void InitPosArrays(ALuint frac, ALuint increment, ALuint *frac_arr, ALuint *pos_arr,
+    size_t size)
 {
    pos_arr[0] = 0;
    frac_arr[0] = frac;
-    for(ALsizei i{1};i < size;i++)
+    for(size_t i{1};i < size;i++)
    {
-        ALint frac_tmp = frac_arr[i-1] + increment;
+        const ALuint frac_tmp{frac_arr[i-1] + increment};
        pos_arr[i] = pos_arr[i-1] + (frac_tmp>>FRACTIONBITS);
        frac_arr[i] = frac_tmp&FRACTIONMASK;
    }
--- a/alc/mixer/mixer_c.cpp
+++ b/alc/mixer/mixer_c.cpp
@ -44,10 +44,8 @@ inline ALfloat do_bsinc(const InterpState &istate, const ALfloat *RESTRICT vals,
 using SamplerT = ALfloat(const InterpState&, const ALfloat*RESTRICT, const ALuint);
 template<SamplerT &Sampler>
 const ALfloat *DoResample(const InterpState *state, const ALfloat *RESTRICT src,
-    ALuint frac, ALint increment, const al::span<float> dst)
+    ALuint frac, ALuint increment, const al::span<float> dst)
 {
-    ASSUME(increment > 0);
-
    const InterpState istate{*state};
    auto proc_sample = [&src,&frac,istate,increment]() -> ALfloat
    {
@ -68,7 +66,7 @@ const ALfloat *DoResample(const InterpState *state, const ALfloat *RESTRICT src,

 template<>
 const ALfloat *Resample_<CopyTag,CTag>(const InterpState*, const ALfloat *RESTRICT src, ALuint,
-    ALint, const al::span<float> dst)
+    ALuint, const al::span<float> dst)
 {
 #if defined(HAVE_SSE) || defined(HAVE_NEON)
    /* Avoid copying the source data if it's aligned like the destination. */
@ -81,22 +79,22 @@ const ALfloat *Resample_<CopyTag,CTag>(const InterpState*, const ALfloat *RESTRI

 template<>
 const ALfloat *Resample_<PointTag,CTag>(const InterpState *state, const ALfloat *RESTRICT src,
-    ALuint frac, ALint increment, const al::span<float> dst)
+    ALuint frac, ALuint increment, const al::span<float> dst)
 { return DoResample<do_point>(state, src, frac, increment, dst); }

 template<>
 const ALfloat *Resample_<LerpTag,CTag>(const InterpState *state, const ALfloat *RESTRICT src,
-    ALuint frac, ALint increment, const al::span<float> dst)
+    ALuint frac, ALuint increment, const al::span<float> dst)
 { return DoResample<do_lerp>(state, src, frac, increment, dst); }

 template<>
 const ALfloat *Resample_<CubicTag,CTag>(const InterpState *state, const ALfloat *RESTRICT src,
-    ALuint frac, ALint increment, const al::span<float> dst)
+    ALuint frac, ALuint increment, const al::span<float> dst)
 { return DoResample<do_cubic>(state, src-1, frac, increment, dst); }

 template<>
 const ALfloat *Resample_<BSincTag,CTag>(const InterpState *state, const ALfloat *RESTRICT src,
-    ALuint frac, ALint increment, const al::span<float> dst)
+    ALuint frac, ALuint increment, const al::span<float> dst)
 { return DoResample<do_bsinc>(state, src-state->bsinc.l, frac, increment, dst); }


--- a/alc/mixer/mixer_neon.cpp
+++ b/alc/mixer/mixer_neon.cpp
@ -16,22 +16,20 @@

 template<>
 const ALfloat *Resample_<LerpTag,NEONTag>(const InterpState*, const ALfloat *RESTRICT src,
-    ALuint frac, ALint increment, const al::span<float> dst)
+    ALuint frac, ALuint increment, const al::span<float> dst)
 {
-    const int32x4_t increment4 = vdupq_n_s32(increment*4);
+    const int32x4_t increment4 = vdupq_n_s32(static_cast<int>(increment*4));
    const float32x4_t fracOne4 = vdupq_n_f32(1.0f/FRACTIONONE);
    const int32x4_t fracMask4 = vdupq_n_s32(FRACTIONMASK);
-    alignas(16) ALsizei pos_[4], frac_[4];
+    alignas(16) ALuint pos_[4], frac_[4];
    int32x4_t pos4, frac4;

-    ASSUME(increment > 0);
-
-    InitiatePositionArrays(frac, increment, frac_, pos_, 4);
-    frac4 = vld1q_s32(frac_);
-    pos4 = vld1q_s32(pos_);
+    InitPosArrays(frac, increment, frac_, pos_, 4);
+    frac4 = vld1q_s32(reinterpret_cast<int*>(frac_));
+    pos4 = vld1q_s32(reinterpret_cast<int*>(pos_));

    auto dst_iter = dst.begin();
-    const auto aligned_end = (dst.size()&~3) + dst_iter;
+    const auto aligned_end = (dst.size()&~3u) + dst_iter;
    while(dst_iter != aligned_end)
    {
        const int pos0{vgetq_lane_s32(pos4, 0)};
@ -54,33 +52,31 @@ const ALfloat *Resample_<LerpTag,NEONTag>(const InterpState*, const ALfloat *RES
        frac4 = vandq_s32(frac4, fracMask4);
    }

-    /* NOTE: These four elements represent the position *after* the last four
-     * samples, so the lowest element is the next position to resample.
-     */
-    src += static_cast<ALuint>(vgetq_lane_s32(pos4, 0));
-    frac = vgetq_lane_s32(frac4, 0);
-
-    while(dst_iter != dst.end())
+    if(dst_iter != dst.end())
    {
-        *(dst_iter++) = lerp(src[0], src[1], frac * (1.0f/FRACTIONONE));
+        src += static_cast<ALuint>(vgetq_lane_s32(pos4, 0));
+        frac = vgetq_lane_s32(frac4, 0);

-        frac += increment;
-        src  += frac>>FRACTIONBITS;
-        frac &= FRACTIONMASK;
+        do {
+            *(dst_iter++) = lerp(src[0], src[1], frac * (1.0f/FRACTIONONE));
+
+            frac += increment;
+            src  += frac>>FRACTIONBITS;
+            frac &= FRACTIONMASK;
+        } while(dst_iter != dst.end());
    }
    return dst.begin();
 }

 template<>
 const ALfloat *Resample_<BSincTag,NEONTag>(const InterpState *state, const ALfloat *RESTRICT src,
-    ALuint frac, ALint increment, const al::span<float> dst)
+    ALuint frac, ALuint increment, const al::span<float> dst)
 {
    const ALfloat *const filter{state->bsinc.filter};
    const float32x4_t sf4{vdupq_n_f32(state->bsinc.sf)};
    const ptrdiff_t m{state->bsinc.m};

    ASSUME(m > 0);
-    ASSUME(increment > 0);

    src -= state->bsinc.l;
    for(float &out_sample : dst)
@ -183,7 +179,7 @@ void Mix_<NEONTag>(const al::span<const float> InSamples, const al::span<FloatBu
    const ALfloat delta{(Counter > 0) ? 1.0f / static_cast<ALfloat>(Counter) : 0.0f};
    const bool reached_target{InSamples.size() >= Counter};
    const auto min_end = reached_target ? InSamples.begin() + Counter : InSamples.end();
-    const auto aligned_end = minz(InSamples.size(), (min_end-InSamples.begin()+3) & ~3) +
+    const auto aligned_end = minz(InSamples.size(), (min_end-InSamples.begin()+3) & ~3u) +
        InSamples.begin();
    for(FloatBufferLine &output : OutBuffer)
    {
--- a/alc/mixer/mixer_sse.cpp
+++ b/alc/mixer/mixer_sse.cpp
@ -15,14 +15,13 @@

 template<>
 const ALfloat *Resample_<BSincTag,SSETag>(const InterpState *state, const ALfloat *RESTRICT src,
-    ALuint frac, ALint increment, const al::span<float> dst)
+    ALuint frac, ALuint increment, const al::span<float> dst)
 {
    const ALfloat *const filter{state->bsinc.filter};
    const __m128 sf4{_mm_set1_ps(state->bsinc.sf)};
    const ptrdiff_t m{state->bsinc.m};

    ASSUME(m > 0);
-    ASSUME(increment > 0);

    src -= state->bsinc.l;
    for(float &out_sample : dst)
@ -146,7 +145,7 @@ void Mix_<SSETag>(const al::span<const float> InSamples, const al::span<FloatBuf
    const ALfloat delta{(Counter > 0) ? 1.0f / static_cast<ALfloat>(Counter) : 0.0f};
    const bool reached_target{InSamples.size() >= Counter};
    const auto min_end = reached_target ? InSamples.begin() + Counter : InSamples.end();
-    const auto aligned_end = minz(InSamples.size(), (min_end-InSamples.begin()+3) & ~3) +
+    const auto aligned_end = minz(InSamples.size(), (min_end-InSamples.begin()+3) & ~3u) +
        InSamples.begin();
    for(FloatBufferLine &output : OutBuffer)
    {
--- a/alc/mixer/mixer_sse2.cpp
+++ b/alc/mixer/mixer_sse2.cpp
@ -29,21 +29,21 @@

 template<>
 const ALfloat *Resample_<LerpTag,SSE2Tag>(const InterpState*, const ALfloat *RESTRICT src,
-    ALuint frac, ALint increment, const al::span<float> dst)
+    ALuint frac, ALuint increment, const al::span<float> dst)
 {
-    const __m128i increment4{_mm_set1_epi32(increment*4)};
+    const __m128i increment4{_mm_set1_epi32(static_cast<int>(increment*4))};
    const __m128 fracOne4{_mm_set1_ps(1.0f/FRACTIONONE)};
    const __m128i fracMask4{_mm_set1_epi32(FRACTIONMASK)};

-    ASSUME(increment > 0);
-
-    alignas(16) ALsizei pos_[4], frac_[4];
-    InitiatePositionArrays(frac, increment, frac_, pos_, 4);
-    __m128i frac4{_mm_setr_epi32(frac_[0], frac_[1], frac_[2], frac_[3])};
-    __m128i pos4{_mm_setr_epi32(pos_[0], pos_[1], pos_[2], pos_[3])};
+    alignas(16) ALuint pos_[4], frac_[4];
+    InitPosArrays(frac, increment, frac_, pos_, 4);
+    __m128i frac4{_mm_setr_epi32(static_cast<int>(frac_[0]), static_cast<int>(frac_[1]),
+        static_cast<int>(frac_[2]), static_cast<int>(frac_[3]))};
+    __m128i pos4{_mm_setr_epi32(static_cast<int>(pos_[0]), static_cast<int>(pos_[1]),
+        static_cast<int>(pos_[2]), static_cast<int>(pos_[3]))};

    auto dst_iter = dst.begin();
-    const auto aligned_end = (dst.size()&~3) + dst_iter;
+    const auto aligned_end = (dst.size()&~3u) + dst_iter;
    while(dst_iter != aligned_end)
    {
        const int pos0{_mm_cvtsi128_si32(_mm_shuffle_epi32(pos4, _MM_SHUFFLE(0, 0, 0, 0)))};
@ -66,19 +66,18 @@ const ALfloat *Resample_<LerpTag,SSE2Tag>(const InterpState*, const ALfloat *RES
        frac4 = _mm_and_si128(frac4, fracMask4);
    }

-    /* NOTE: These four elements represent the position *after* the last four
-     * samples, so the lowest element is the next position to resample.
-     */
-    src += static_cast<ALuint>(_mm_cvtsi128_si32(pos4));
-    frac = _mm_cvtsi128_si32(frac4);
-
-    while(dst_iter != dst.end())
+    if(dst_iter != dst.end())
    {
-        *(dst_iter++) = lerp(src[0], src[1], frac * (1.0f/FRACTIONONE));
+        src += static_cast<ALuint>(_mm_cvtsi128_si32(pos4));
+        frac = static_cast<ALuint>(_mm_cvtsi128_si32(frac4));

-        frac += increment;
-        src  += frac>>FRACTIONBITS;
-        frac &= FRACTIONMASK;
+        do {
+            *(dst_iter++) = lerp(src[0], src[1], frac * (1.0f/FRACTIONONE));
+
+            frac += increment;
+            src  += frac>>FRACTIONBITS;
+            frac &= FRACTIONMASK;
+        } while(dst_iter != dst.end());
    }
    return dst.begin();
 }
--- a/alc/mixer/mixer_sse41.cpp
+++ b/alc/mixer/mixer_sse41.cpp
@ -30,21 +30,21 @@

 template<>
 const ALfloat *Resample_<LerpTag,SSE4Tag>(const InterpState*, const ALfloat *RESTRICT src,
-    ALuint frac, ALint increment, const al::span<float> dst)
+    ALuint frac, ALuint increment, const al::span<float> dst)
 {
-    const __m128i increment4{_mm_set1_epi32(increment*4)};
+    const __m128i increment4{_mm_set1_epi32(static_cast<int>(increment*4))};
    const __m128 fracOne4{_mm_set1_ps(1.0f/FRACTIONONE)};
    const __m128i fracMask4{_mm_set1_epi32(FRACTIONMASK)};

-    ASSUME(increment > 0);
-
-    alignas(16) ALsizei pos_[4], frac_[4];
-    InitiatePositionArrays(frac, increment, frac_, pos_, 4);
-    __m128i frac4{_mm_setr_epi32(frac_[0], frac_[1], frac_[2], frac_[3])};
-    __m128i pos4{_mm_setr_epi32(pos_[0], pos_[1], pos_[2], pos_[3])};
+    alignas(16) ALuint pos_[4], frac_[4];
+    InitPosArrays(frac, increment, frac_, pos_, 4);
+    __m128i frac4{_mm_setr_epi32(static_cast<int>(frac_[0]), static_cast<int>(frac_[1]),
+        static_cast<int>(frac_[2]), static_cast<int>(frac_[3]))};
+    __m128i pos4{_mm_setr_epi32(static_cast<int>(pos_[0]), static_cast<int>(pos_[1]),
+        static_cast<int>(pos_[2]), static_cast<int>(pos_[3]))};

    auto dst_iter = dst.begin();
-    const auto aligned_end = (dst.size()&~3) + dst_iter;
+    const auto aligned_end = (dst.size()&~3u) + dst_iter;
    while(dst_iter != aligned_end)
    {
        const int pos0{_mm_extract_epi32(pos4, 0)};
@ -67,19 +67,22 @@ const ALfloat *Resample_<LerpTag,SSE4Tag>(const InterpState*, const ALfloat *RES
        frac4 = _mm_and_si128(frac4, fracMask4);
    }

-    /* NOTE: These four elements represent the position *after* the last four
-     * samples, so the lowest element is the next position to resample.
-     */
-    src += static_cast<ALuint>(_mm_cvtsi128_si32(pos4));
-    frac = _mm_cvtsi128_si32(frac4);
-
-    while(dst_iter != dst.end())
+    if(dst_iter != dst.end())
    {
-        *(dst_iter++) = lerp(src[0], src[1], frac * (1.0f/FRACTIONONE));
+        /* NOTE: These four elements represent the position *after* the last
+         * four samples, so the lowest element is the next position to
+         * resample.
+         */
+        src += static_cast<ALuint>(_mm_cvtsi128_si32(pos4));
+        frac = static_cast<ALuint>(_mm_cvtsi128_si32(frac4));

-        frac += increment;
-        src  += frac>>FRACTIONBITS;
-        frac &= FRACTIONMASK;
+        do {
+            *(dst_iter++) = lerp(src[0], src[1], frac * (1.0f/FRACTIONONE));
+
+            frac += increment;
+            src  += frac>>FRACTIONBITS;
+            frac &= FRACTIONMASK;
+        } while(dst_iter != dst.end());
    }
    return dst.begin();
 }
--- a/alc/mixvoice.cpp
+++ b/alc/mixvoice.cpp
@ -617,8 +617,7 @@ void ALvoice::mix(State vstate, ALCcontext *Context, const ALuint SamplesToDo)

            /* Resample, then apply ambisonic upsampling as needed. */
            const ALfloat *ResampledData{Resample(&mResampleState, &SrcData[MAX_RESAMPLE_PADDING],
-                DataPosFrac, static_cast<ALint>(increment),
-                {Device->ResampledData, DstBufferSize})};
+                DataPosFrac, increment, {Device->ResampledData, DstBufferSize})};
            if((mFlags&VOICE_IS_AMBISONIC))
            {
                const ALfloat hfscale{chandata.mAmbiScale};