/******************************************************************************** Copyright (C) 2012 Hugh Bailey This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ********************************************************************************/ #include "OBSApi.h" #include #include "../libsamplerate/samplerate.h" #define KSAUDIO_SPEAKER_4POINT1 (KSAUDIO_SPEAKER_QUAD|SPEAKER_LOW_FREQUENCY) #define KSAUDIO_SPEAKER_3POINT1 (KSAUDIO_SPEAKER_STEREO|SPEAKER_FRONT_CENTER|SPEAKER_LOW_FREQUENCY) #define KSAUDIO_SPEAKER_2POINT1 (KSAUDIO_SPEAKER_STEREO|SPEAKER_LOW_FREQUENCY) void MultiplyAudioBuffer(float *buffer, int totalFloats, float mulVal) { float sum = 0.0f; int totalFloatsStore = totalFloats; if((UPARAM(buffer) & 0xF) == 0) { UINT alignedFloats = totalFloats & 0xFFFFFFFC; __m128 sseMulVal = _mm_set_ps1(mulVal); for(UINT i=0; ibFloat = bFloat; inputChannels = channels; inputSamplesPerSec = samplesPerSec; inputBitsPerSample = bitsPerSample; inputBlockSize = blockSize; inputChannelMask = channelMask; //----------------------------- if(inputSamplesPerSec != 44100) { int errVal; int converterType = SRC_SINC_FASTEST; resampler = src_new(converterType, 2, &errVal); if(!resampler) CrashError(TEXT("AudioSource::InitAudioData: Could not initiate resampler")); resampleRatio = 44100.0 / double(inputSamplesPerSec); bResample = true; //---------------------------------------------------- // hack to get rid of that weird first quirky resampled packet size // (always returns a non-441 sized packet on the first resample) SRC_DATA data; data.src_ratio = resampleRatio; List blankBuffer; blankBuffer.SetSize(inputSamplesPerSec/100*2); data.data_in = blankBuffer.Array(); data.input_frames = inputSamplesPerSec/100; UINT frameAdjust = UINT((double(data.input_frames) * resampleRatio) + 1.0); UINT newFrameSize = frameAdjust*2; tempResampleBuffer.SetSize(newFrameSize); data.data_out = tempResampleBuffer.Array(); data.output_frames = frameAdjust; data.end_of_input = 0; int err = src_process((SRC_STATE*)resampler, &data); nop(); } //------------------------------------------------------------------------- if(inputChannels > 2) { switch(inputChannelMask) { case KSAUDIO_SPEAKER_QUAD: Log(TEXT("Using quad speaker setup")); break; //ocd anyone? case KSAUDIO_SPEAKER_2POINT1: Log(TEXT("Using 2.1 speaker setup")); break; case KSAUDIO_SPEAKER_3POINT1: Log(TEXT("Using 3.1 speaker setup")); break; case KSAUDIO_SPEAKER_4POINT1: Log(TEXT("Using 4.1 speaker setup")); break; case KSAUDIO_SPEAKER_SURROUND: Log(TEXT("Using basic surround speaker setup")); break; case KSAUDIO_SPEAKER_5POINT1: Log(TEXT("Using 5.1 speaker setup")); break; case KSAUDIO_SPEAKER_5POINT1_SURROUND: Log(TEXT("Using 5.1 surround speaker setup")); break; case KSAUDIO_SPEAKER_7POINT1: Log(TEXT("Using 7.1 speaker setup (experimental)")); break; case KSAUDIO_SPEAKER_7POINT1_SURROUND: Log(TEXT("Using 7.1 surround speaker setup (experimental)")); break; default: Log(TEXT("Using unknown speaker setup: 0x%lX, %d channels"), inputChannels, inputChannelMask); inputChannelMask = 0; break; } if(inputChannelMask == 0) { switch(inputChannels) { case 3: inputChannelMask = KSAUDIO_SPEAKER_2POINT1; break; case 4: inputChannelMask = KSAUDIO_SPEAKER_QUAD; break; case 5: inputChannelMask = KSAUDIO_SPEAKER_4POINT1; break; case 6: inputChannelMask = KSAUDIO_SPEAKER_5POINT1; break; case 8: inputChannelMask = KSAUDIO_SPEAKER_7POINT1; break; default: CrashError(TEXT("Unknown speaker setup, no downmixer available.")); } } } } const float dbMinus3 = 0.7071067811865476f; const float dbMinus6 = 0.5f; const float dbMinus9 = 0.3535533905932738f; //not entirely sure if these are the correct coefficients for downmixing, //I'm fairly new to the whole multi speaker thing const float surroundMix = dbMinus3; const float centerMix = dbMinus6; const float lowFreqMix = dbMinus3; const float surroundMix4 = dbMinus6; const float attn5dot1 = 1.0f / (1.0f + centerMix + surroundMix); const float attn4dotX = 1.0f / (1.0f + surroundMix4); void AudioSource::AddAudioSegment(AudioSegment *newSegment, float curVolume) { if (newSegment) MultiplyAudioBuffer(newSegment->audioData.Array(), newSegment->audioData.Num(), curVolume*sourceVolume); for (UINT i=0; iProcess(newSegment); } if (newSegment) audioSegments << newSegment; } UINT AudioSource::QueryAudio(float curVolume) { LPVOID buffer; UINT numAudioFrames; QWORD newTimestamp; if(GetNextBuffer((void**)&buffer, &numAudioFrames, &newTimestamp)) { //------------------------------------------------------------ // convert to float float *captureBuffer; if(!bFloat) { UINT totalSamples = numAudioFrames*inputChannels; if(convertBuffer.Num() < totalSamples) convertBuffer.SetSize(totalSamples); if(inputBitsPerSample == 8) { float *tempConvert = convertBuffer.Array(); char *tempSByte = (char*)buffer; while(totalSamples--) { *(tempConvert++) = float(*(tempSByte++))/127.0f; } } else if(inputBitsPerSample == 16) { float *tempConvert = convertBuffer.Array(); short *tempShort = (short*)buffer; while(totalSamples--) { *(tempConvert++) = float(*(tempShort++))/32767.0f; } } else if(inputBitsPerSample == 24) { float *tempConvert = convertBuffer.Array(); BYTE *tempTriple = (BYTE*)buffer; TripleToLong valOut; while(totalSamples--) { TripleToLong &valIn = (TripleToLong&)tempTriple; valOut.wVal = valIn.wVal; valOut.tripleVal = valIn.tripleVal; if(valOut.tripleVal > 0x7F) valOut.lastByte = 0xFF; *(tempConvert++) = float(double(valOut.val)/8388607.0); tempTriple += 3; } } else if(inputBitsPerSample == 32) { float *tempConvert = convertBuffer.Array(); long *tempShort = (long*)buffer; while(totalSamples--) { *(tempConvert++) = float(double(*(tempShort++))/2147483647.0); } } captureBuffer = convertBuffer.Array(); } else captureBuffer = (float*)buffer; //------------------------------------------------------------ // channel upmix/downmix if(tempBuffer.Num() < numAudioFrames*2) tempBuffer.SetSize(numAudioFrames*2); float *dataOutputBuffer = tempBuffer.Array(); float *tempOut = dataOutputBuffer; if(inputChannels == 1) { UINT numFloats = numAudioFrames; float *inputTemp = (float*)captureBuffer; float *outputTemp = dataOutputBuffer; if((UPARAM(inputTemp) & 0xF) == 0 && (UPARAM(outputTemp) & 0xF) == 0) { UINT alignedFloats = numFloats & 0xFFFFFFFC; for(UINT i=0; i stereo ] the approach seems almost the same [but different coefficients]) // http://acousticsfreq.com/blog/wp-content/uploads/2012/01/ITU-R-BS775-1.pdf // http://ir.lib.nctu.edu.tw/bitstream/987654321/22934/1/030104001.pdf *(outputTemp++) = (left + center + rearLeft) * attn5dot1; *(outputTemp++) = (right + center + rearRight) * attn5dot1; inputTemp += 6; } } // According to http://msdn.microsoft.com/en-us/library/windows/hardware/ff537083(v=vs.85).aspx // KSAUDIO_SPEAKER_7POINT1 is obsolete and no longer supported in Windows Vista and later versions of Windows // Not sure what to do about it, meh , drop front left of center/front right of center -> 5.1 -> stereo; else if(inputChannelMask == KSAUDIO_SPEAKER_7POINT1) { UINT numFloats = numAudioFrames*8; float *endTemp = inputTemp+numFloats; while(inputTemp < endTemp) { float left = inputTemp[0]; float right = inputTemp[1]; float center = inputTemp[2] * centerMix; // Drop LFE since we don't need it //float lowFreq = inputTemp[3]*lowFreqMix; float rearLeft = inputTemp[4] * surroundMix; float rearRight = inputTemp[5] * surroundMix; // Drop SPEAKER_FRONT_LEFT_OF_CENTER , SPEAKER_FRONT_RIGHT_OF_CENTER //float centerLeft = inputTemp[6]; //float centerRight = inputTemp[7]; // Downmix from 5.1 to stereo *(outputTemp++) = (left + center + rearLeft) * attn5dot1; *(outputTemp++) = (right + center + rearRight) * attn5dot1; inputTemp += 8; } } // Downmix to 5.1 (easy stuff) then downmix to stereo as done in KSAUDIO_SPEAKER_5POINT1 else if(inputChannelMask == KSAUDIO_SPEAKER_7POINT1_SURROUND) { UINT numFloats = numAudioFrames*8; float *endTemp = inputTemp+numFloats; while(inputTemp < endTemp) { float left = inputTemp[0]; float right = inputTemp[1]; float center = inputTemp[2] * centerMix; // Skip LFE we don't need it //float lowFreq = inputTemp[3]*lowFreqMix; float rearLeft = inputTemp[4]; float rearRight = inputTemp[5]; float sideLeft = inputTemp[6]; float sideRight = inputTemp[7]; // combine the rear/side channels first , baaam! 5.1 rearLeft = (rearLeft + sideLeft) * 0.5f; rearRight = (rearRight + sideRight) * 0.5f; // downmix to stereo as in 5.1 case *(outputTemp++) = (left + center + rearLeft * surroundMix) * attn5dot1; *(outputTemp++) = (right + center + rearRight * surroundMix) * attn5dot1; inputTemp += 8; } } } ReleaseBuffer(); //------------------------------------------------------------ // resample if(bResample) { UINT frameAdjust = UINT((double(numAudioFrames) * resampleRatio) + 1.0); UINT newFrameSize = frameAdjust*2; if(tempResampleBuffer.Num() < newFrameSize) tempResampleBuffer.SetSize(newFrameSize); SRC_DATA data; data.src_ratio = resampleRatio; data.data_in = tempBuffer.Array(); data.input_frames = numAudioFrames; data.data_out = tempResampleBuffer.Array(); data.output_frames = frameAdjust; data.end_of_input = 0; int err = src_process((SRC_STATE*)resampler, &data); if(err) { RUNONCE AppWarning(TEXT("AudioSource::QueryAudio: Was unable to resample audio for device '%s'"), GetDeviceName()); return NoAudioAvailable; } if(data.input_frames_used != numAudioFrames) { RUNONCE AppWarning(TEXT("AudioSource::QueryAudio: Failed to downsample buffer completely, which shouldn't actually happen because it should be using 10ms of samples")); return NoAudioAvailable; } numAudioFrames = data.output_frames_gen; } //------------------------------------------------------ // timestamp smoothing (keep audio within 70ms of target time) if (!lastUsedTimestamp) lastUsedTimestamp = newTimestamp; else lastUsedTimestamp += 10; QWORD difVal = GetQWDif(newTimestamp, lastUsedTimestamp); if (difVal > 70) { /*QWORD curTimeMS = App->GetVideoTime()-App->GetSceneTimestamp(); UINT curTimeTotalSec = (UINT)(curTimeMS/1000); UINT curTimeTotalMin = curTimeTotalSec/60; UINT curTimeHr = curTimeTotalMin/60; UINT curTimeMin = curTimeTotalMin-(curTimeHr*60); UINT curTimeSec = curTimeTotalSec-(curTimeTotalMin*60); Log(TEXT("A timestamp adjustment was encountered for device %s, approximate stream time is: %u:%u:%u, prev value: %llu, new value: %llu"), GetDeviceName(), curTimeHr, curTimeMin, curTimeSec, lastUsedTimestamp, newTimestamp);*/ lastUsedTimestamp = newTimestamp; } //----------------------------------------------------------------------------- float *newBuffer = (bResample) ? tempResampleBuffer.Array() : tempBuffer.Array(); if(lastUsedTimestamp >= lastSentTimestamp+10) { AudioSegment *newSegment = new AudioSegment(newBuffer, numAudioFrames*2, lastUsedTimestamp); AddAudioSegment(newSegment, curVolume*sourceVolume); lastSentTimestamp = lastUsedTimestamp; } //----------------------------------------------------------------------------- return AudioAvailable; } return NoAudioAvailable; } bool AudioSource::GetEarliestTimestamp(QWORD ×tamp) { if(audioSegments.Num()) { timestamp = audioSegments[0]->timestamp; return true; } return false; } bool AudioSource::GetLatestTimestamp(QWORD ×tamp) { if(audioSegments.Num()) { timestamp = audioSegments.Last()->timestamp; return true; } return false; } bool AudioSource::GetBuffer(float **buffer, QWORD targetTimestamp) { bool bSuccess = false; outputBuffer.Clear(); while(audioSegments.Num()) { if(audioSegments[0]->timestamp < targetTimestamp) { Log(TEXT("Audio timestamp for device '%s' was behind target timestamp by %llu! Had to delete audio segment.\r\n"), GetDeviceName(), targetTimestamp-audioSegments[0]->timestamp); delete audioSegments[0]; audioSegments.Remove(0); } else break; } if(audioSegments.Num()) { bool bUseSegment = false; AudioSegment *segment = audioSegments[0]; QWORD difference = (segment->timestamp-targetTimestamp); if(difference <= 10) { //Log(TEXT("segment.timestamp: %llu, targetTimestamp: %llu"), segment.timestamp, targetTimestamp); outputBuffer.TransferFrom(segment->audioData); delete segment; audioSegments.Remove(0); bSuccess = true; } } outputBuffer.SetSize(441*2); *buffer = outputBuffer.Array(); return bSuccess; } bool AudioSource::GetNewestFrame(float **buffer) { if(buffer) { if(audioSegments.Num()) { List &data = audioSegments.Last()->audioData; *buffer = data.Array(); return true; } } return false; } QWORD AudioSource::GetBufferedTime() { if(audioSegments.Num()) return audioSegments.Last()->timestamp - audioSegments[0]->timestamp; return 0; } void AudioSource::StartCapture() {} void AudioSource::StopCapture() {} UINT AudioSource::GetChannelCount() const {return inputChannels;} UINT AudioSource::GetSamplesPerSec() const {return inputSamplesPerSec;} int AudioSource::GetTimeOffset() const {return timeOffset;} void AudioSource::SetTimeOffset(int newOffset) {timeOffset = newOffset;} void AudioSource::SetVolume(float fVal) {sourceVolume = fabsf(fVal);} float AudioSource::GetVolume() const {return sourceVolume;} UINT AudioSource::NumAudioFilters() const {return audioFilters.Num();} AudioFilter* AudioSource::GetAudioFilter(UINT id) {if(audioFilters.Num() > id) return audioFilters[id]; return NULL;} void AudioSource::AddAudioFilter(AudioFilter *filter) {audioFilters << filter;} void AudioSource::InsertAudioFilter(UINT pos, AudioFilter *filter) {audioFilters.Insert(pos, filter);} void AudioSource::RemoveAudioFilter(AudioFilter *filter) {audioFilters.RemoveItem(filter);} void AudioSource::RemoveAudioFilter(UINT id) {if(audioFilters.Num() > id) audioFilters.Remove(id);}