From a271484e7c5056a868767a344e63f8f3feb9437e Mon Sep 17 00:00:00 2001 From: Chris Robinson Date: Sat, 1 Jan 2022 00:10:21 -0800 Subject: [PATCH] Pass a span of pointers to the UHJ/SuperStereo decoder --- core/device.h | 3 ++- core/uhjfilter.cpp | 32 ++++++++++++++++---------------- core/uhjfilter.h | 10 +++++----- core/voice.cpp | 7 ++++++- 4 files changed, 29 insertions(+), 23 deletions(-) diff --git a/core/device.h b/core/device.h index 111e26ce..e9596bb4 100644 --- a/core/device.h +++ b/core/device.h @@ -183,8 +183,9 @@ struct DeviceBase { /* Temp storage used for mixer processing. */ static constexpr size_t MixerLineSize{BufferLineSize + MaxResamplerPadding + UhjDecoder::sFilterDelay}; + static constexpr size_t MixerChannelsMax{16}; using MixerBufferLine = std::array; - alignas(16) std::array mSampleData; + alignas(16) std::array mSampleData; alignas(16) float ResampledData[BufferLineSize]; alignas(16) float FilteredData[BufferLineSize]; diff --git a/core/uhjfilter.cpp b/core/uhjfilter.cpp index 584718e3..a56e25bf 100644 --- a/core/uhjfilter.cpp +++ b/core/uhjfilter.cpp @@ -101,15 +101,15 @@ void UhjEncoder::encode(float *LeftOut, float *RightOut, const FloatBufferLine * * where j is a +90 degree phase shift. 3-channel UHJ excludes Q, while 2- * channel excludes Q and T. */ -void UhjDecoder::decode(const al::span samples, const size_t offset, - const size_t samplesToDo, const size_t forwardSamples) +void UhjDecoder::decode(const al::span samples, const size_t samplesToDo, + const size_t forwardSamples) { ASSUME(samplesToDo > 0); { - const float *RESTRICT left{samples[0].data() + offset}; - const float *RESTRICT right{samples[1].data() + offset}; - const float *RESTRICT t{samples[2].data() + offset}; + const float *RESTRICT left{al::assume_aligned<16>(samples[0])}; + const float *RESTRICT right{al::assume_aligned<16>(samples[1])}; + const float *RESTRICT t{al::assume_aligned<16>(samples[2])}; /* S = Left + Right */ for(size_t i{0};i < samplesToDo+sFilterDelay;++i) @@ -124,9 +124,9 @@ void UhjDecoder::decode(const al::span samples, const size_t offset, mT[i] = t[i]; } - float *RESTRICT woutput{samples[0].data() + offset}; - float *RESTRICT xoutput{samples[1].data() + offset}; - float *RESTRICT youtput{samples[2].data() + offset}; + float *RESTRICT woutput{al::assume_aligned<16>(samples[0])}; + float *RESTRICT xoutput{al::assume_aligned<16>(samples[1])}; + float *RESTRICT youtput{al::assume_aligned<16>(samples[2])}; /* Precompute j(0.828331*D + 0.767820*T) and store in xoutput. */ auto tmpiter = std::copy(mDTHistory.cbegin(), mDTHistory.cend(), mTemp.begin()); @@ -154,7 +154,7 @@ void UhjDecoder::decode(const al::span samples, const size_t offset, if(samples.size() > 3) { - float *RESTRICT zoutput{samples[3].data() + offset}; + float *RESTRICT zoutput{al::assume_aligned<16>(samples[3])}; /* Z = 1.023332*Q */ for(size_t i{0};i < samplesToDo;++i) zoutput[i] = 1.023332f*zoutput[i]; @@ -174,14 +174,14 @@ void UhjDecoder::decode(const al::span samples, const size_t offset, * where j is a +90 degree phase shift. w is a variable control for the * resulting stereo width, with the range 0 <= w <= 0.7. */ -void UhjDecoder::decodeStereo(const al::span samples, const size_t offset, - const size_t samplesToDo, const size_t forwardSamples) +void UhjDecoder::decodeStereo(const al::span samples, const size_t samplesToDo, + const size_t forwardSamples) { ASSUME(samplesToDo > 0); { - const float *RESTRICT left{samples[0].data() + offset}; - const float *RESTRICT right{samples[1].data() + offset}; + const float *RESTRICT left{al::assume_aligned<16>(samples[0])}; + const float *RESTRICT right{al::assume_aligned<16>(samples[1])}; for(size_t i{0};i < samplesToDo+sFilterDelay;++i) mS[i] = left[i] + right[i]; @@ -212,9 +212,9 @@ void UhjDecoder::decodeStereo(const al::span samples, const size_t o mCurrentWidth = wtarget; } - float *RESTRICT woutput{samples[0].data() + offset}; - float *RESTRICT xoutput{samples[1].data() + offset}; - float *RESTRICT youtput{samples[2].data() + offset}; + float *RESTRICT woutput{al::assume_aligned<16>(samples[0])}; + float *RESTRICT xoutput{al::assume_aligned<16>(samples[1])}; + float *RESTRICT youtput{al::assume_aligned<16>(samples[2])}; /* Precompute j*D and store in xoutput. */ auto tmpiter = std::copy(mDTHistory.cbegin(), mDTHistory.cend(), mTemp.begin()); diff --git a/core/uhjfilter.h b/core/uhjfilter.h index 9e692599..11058700 100644 --- a/core/uhjfilter.h +++ b/core/uhjfilter.h @@ -66,7 +66,7 @@ struct UhjDecoder : public UhjFilterBase { * reconstructed from 2-channel UHJ should not be run through a normal * B-Format decoder, as it needs different shelf filters. */ - void decode(const al::span samples, const size_t offset, const size_t samplesToDo, + void decode(const al::span samples, const size_t samplesToDo, const size_t forwardSamples); /** @@ -75,11 +75,11 @@ struct UhjDecoder : public UhjFilterBase { * should contain 3 channels, the first two being the left and right stereo * channels, and the third left empty. */ - void decodeStereo(const al::span samples, const size_t offset, - const size_t samplesToDo, const size_t forwardSamples); + void decodeStereo(const al::span samples, const size_t samplesToDo, + const size_t forwardSamples); - using DecoderFunc = void (UhjDecoder::*)(const al::span samples, - const size_t offset, const size_t samplesToDo, const size_t forwardSamples); + using DecoderFunc = void (UhjDecoder::*)(const al::span samples, + const size_t samplesToDo, const size_t forwardSamples); DEF_NEWDEL(UhjDecoder) }; diff --git a/core/voice.cpp b/core/voice.cpp index 924a8446..424184af 100644 --- a/core/voice.cpp +++ b/core/voice.cpp @@ -53,6 +53,7 @@ struct CopyTag; static_assert(!(sizeof(DeviceBase::MixerBufferLine)&15), "DeviceBase::MixerBufferLine must be a multiple of 16 bytes"); +static_assert(!(MaxResamplerEdge&3), "MaxResamplerEdge is not a multiple of 4"); Resampler ResamplerDefault{Resampler::Linear}; @@ -627,9 +628,13 @@ void Voice::mix(const State vstate, ContextBase *Context, const uint SamplesToDo if(mDecoder) { + std::array chanptrs; + std::transform(MixingSamples.begin(), MixingSamples.end(), chanptrs.begin(), + [](DeviceBase::MixerBufferLine &bufline) noexcept -> float* + { return bufline.data() + MaxResamplerEdge; }); const size_t srcOffset{(increment*DstBufferSize + DataPosFrac)>>MixerFracBits}; SrcBufferSize = SrcBufferSize - PostPadding + MaxResamplerEdge; - ((*mDecoder).*mDecoderFunc)(MixingSamples, MaxResamplerEdge, SrcBufferSize, + ((*mDecoder).*mDecoderFunc)({chanptrs.data(), MixingSamples.size()}, SrcBufferSize, srcOffset * likely(vstate == Playing)); } }