/******************************************************************************** Copyright (C) 2012 Hugh Bailey This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ********************************************************************************/ #include "Main.h" #include #include #include #include #include "../libsamplerate/samplerate.h" class MMDeviceAudioSource : public AudioSource { bool bResample; SRC_STATE *resampler; double resampleRatio; //----------------------------------------- IMMDeviceEnumerator *mmEnumerator; IMMDevice *mmDevice; IAudioClient *mmClient; IAudioCaptureClient *mmCapture; UINT inputChannels; UINT inputSamplesPerSec; UINT inputBitsPerSample; UINT inputBlockSize; DWORD inputChannelMask; List audioSegments; bool bFirstFrameReceived; QWORD lastKnownTimestamp; bool bBrokenTimestamp; //----------------------------------------- List storageBuffer; //----------------------------------------- List outputBuffer; List tempBuffer; List tempResampleBuffer; String GetDeviceName(); public: bool Initialize(bool bMic, CTSTR lpID); ~MMDeviceAudioSource() { StopCapture(); SafeRelease(mmCapture); SafeRelease(mmClient); SafeRelease(mmDevice); SafeRelease(mmEnumerator); if(bResample) src_delete(resampler); for(UINT i=0; iInitialize(bMic, lpID)) return source; else { delete source; return NULL; } } //============================================================================================================================== #define KSAUDIO_SPEAKER_4POINT1 (KSAUDIO_SPEAKER_QUAD|SPEAKER_LOW_FREQUENCY) #define KSAUDIO_SPEAKER_2POINT1 (KSAUDIO_SPEAKER_STEREO|SPEAKER_LOW_FREQUENCY) bool MMDeviceAudioSource::Initialize(bool bMic, CTSTR lpID) { const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator); const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator); const IID IID_IAudioClient = __uuidof(IAudioClient); const IID IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient); HRESULT err; err = CoCreateInstance(CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL, IID_IMMDeviceEnumerator, (void**)&mmEnumerator); if(FAILED(err)) { AppWarning(TEXT("MMDeviceAudioSource::Initialize(%d): Could not create IMMDeviceEnumerator = %08lX"), (BOOL)bMic, err); return false; } if(bMic) err = mmEnumerator->GetDevice(lpID, &mmDevice); else err = mmEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &mmDevice); if(FAILED(err)) { AppWarning(TEXT("MMDeviceAudioSource::Initialize(%d): Could not create IMMDevice = %08lX"), (BOOL)bMic, err); return false; } err = mmDevice->Activate(IID_IAudioClient, CLSCTX_ALL, NULL, (void**)&mmClient); if(FAILED(err)) { AppWarning(TEXT("MMDeviceAudioSource::Initialize(%d): Could not create IAudioClient = %08lX"), (BOOL)bMic, err); return false; } WAVEFORMATEX *pwfx; err = mmClient->GetMixFormat(&pwfx); if(FAILED(err)) { AppWarning(TEXT("MMDeviceAudioSource::Initialize(%d): Could not get mix format from audio client = %08lX"), (BOOL)bMic, err); return false; } if(bMic) { String strName = GetDeviceName(); Log(TEXT("------------------------------------------")); Log(TEXT("Using auxilary audio input: %s"), strName.Array()); } //the internal audio engine should always use floats (or so I read), but I suppose just to be safe better check if(pwfx->wFormatTag == WAVE_FORMAT_EXTENSIBLE) { WAVEFORMATEXTENSIBLE *wfext = (WAVEFORMATEXTENSIBLE*)pwfx; inputChannelMask = wfext->dwChannelMask; if(wfext->SubFormat != KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) { AppWarning(TEXT("MMDeviceAudioSource::Initialize(%d): Unsupported wave format"), (BOOL)bMic); return false; } } else if(pwfx->wFormatTag != WAVE_FORMAT_IEEE_FLOAT) { AppWarning(TEXT("MMDeviceAudioSource::Initialize(%d): Unsupported wave format"), (BOOL)bMic); return false; } inputChannels = pwfx->nChannels; inputBitsPerSample = 32; inputBlockSize = pwfx->nBlockAlign; inputSamplesPerSec = pwfx->nSamplesPerSec; DWORD flags = bMic ? 0 : AUDCLNT_STREAMFLAGS_LOOPBACK; err = mmClient->Initialize(AUDCLNT_SHAREMODE_SHARED, flags, ConvertMSTo100NanoSec(5000), 0, pwfx, NULL); if(FAILED(err)) { AppWarning(TEXT("MMDeviceAudioSource::Initialize(%d): Could not initialize audio client, result = %08lX"), (BOOL)bMic, err); return false; } err = mmClient->GetService(IID_IAudioCaptureClient, (void**)&mmCapture); if(FAILED(err)) { AppWarning(TEXT("MMDeviceAudioSource::Initialize(%d): Could not get audio capture client, result = %08lX"), (BOOL)bMic, err); return false; } CoTaskMemFree(pwfx); //------------------------------------------------------------------------- if(inputSamplesPerSec != 44100) { int errVal; int converterType = AppConfig->GetInt(TEXT("Audio"), TEXT("UseHighQualityResampling"), FALSE) ? SRC_SINC_FASTEST : SRC_LINEAR; resampler = src_new(converterType, 2, &errVal);//SRC_SINC_FASTEST//SRC_ZERO_ORDER_HOLD if(!resampler) { CrashError(TEXT("MMDeviceAudioSource::Initialize(%d): Could not initiate resampler"), (BOOL)bMic); return false; } resampleRatio = 44100.0 / double(inputSamplesPerSec); bResample = true; //---------------------------------------------------- // hack to get rid of that weird first quirky resampled packet size // (always returns a non-441 sized packet on the first resample) SRC_DATA data; data.src_ratio = resampleRatio; List blankBuffer; blankBuffer.SetSize(inputSamplesPerSec/100*2); data.data_in = blankBuffer.Array(); data.input_frames = inputSamplesPerSec/100; UINT frameAdjust = UINT((double(data.input_frames) * resampleRatio) + 1.0); UINT newFrameSize = frameAdjust*2; tempResampleBuffer.SetSize(newFrameSize); data.data_out = tempResampleBuffer.Array(); data.output_frames = frameAdjust; data.end_of_input = 0; int err = src_process(resampler, &data); nop(); } //------------------------------------------------------------------------- if(inputChannels > 2) { if(inputChannelMask == 0) { switch(inputChannels) { case 3: inputChannelMask = KSAUDIO_SPEAKER_2POINT1; break; case 4: inputChannelMask = KSAUDIO_SPEAKER_QUAD; break; case 5: inputChannelMask = KSAUDIO_SPEAKER_4POINT1; break; case 6: inputChannelMask = KSAUDIO_SPEAKER_5POINT1; break; case 8: inputChannelMask = KSAUDIO_SPEAKER_7POINT1; break; } } switch(inputChannelMask) { case KSAUDIO_SPEAKER_QUAD: Log(TEXT("Using quad speaker setup")); break; //ocd anyone? case KSAUDIO_SPEAKER_2POINT1: Log(TEXT("Using 2.1 speaker setup")); break; case KSAUDIO_SPEAKER_4POINT1: Log(TEXT("Using 4.1 speaker setup")); break; case KSAUDIO_SPEAKER_SURROUND: Log(TEXT("Using basic surround speaker setup")); break; case KSAUDIO_SPEAKER_5POINT1: Log(TEXT("Using 5.1 speaker setup")); break; case KSAUDIO_SPEAKER_5POINT1_SURROUND: Log(TEXT("Using 5.1 surround speaker setup")); break; case KSAUDIO_SPEAKER_7POINT1: Log(TEXT("Using 7.1 speaker setup (experimental)")); break; case KSAUDIO_SPEAKER_7POINT1_SURROUND: Log(TEXT("Using 7.1 surround speaker setup (experimental)")); break; default: Log(TEXT("Using unknown speaker setup: 0x%lX"), inputChannelMask); CrashError(TEXT("Speaker setup not yet implemented -- dear god of all the audio APIs, the one I -have- to use doesn't support resampling or downmixing. fabulous.")); break; } } return true; } void MMDeviceAudioSource::StartCapture() { if(mmClient) mmClient->Start(); } void MMDeviceAudioSource::StopCapture() { if(mmClient) mmClient->Stop(); } String MMDeviceAudioSource::GetDeviceName() { IPropertyStore *store; if(SUCCEEDED(mmDevice->OpenPropertyStore(STGM_READ, &store))) { PROPVARIANT varName; PropVariantInit(&varName); if(SUCCEEDED(store->GetValue(PKEY_Device_FriendlyName, &varName))) { CWSTR wstrName = varName.pwszVal; String strName = wstrName; return strName; } store->Release(); } return String(TEXT("(could not query name of device)")); } const float dbMinus3 = 0.7071067811865476f; const float dbMinus6 = 0.5f; const float dbMinus9 = 0.3535533905932738f; //not entirely sure if these are the correct coefficients for downmixing, //I'm fairly new to the whole multi speaker thing const float surroundMix = dbMinus3; const float centerMix = dbMinus3; const float lowFreqMix = 3.16227766f*dbMinus3; UINT MMDeviceAudioSource::GetNextBuffer() { UINT captureSize = 0; HRESULT err = mmCapture->GetNextPacketSize(&captureSize); if(FAILED(err)) { RUNONCE AppWarning(TEXT("MMDeviceAudioSource::GetBuffer: GetNextPacketSize failed")); return NoAudioAvailable; } float *outputBuffer = NULL; if(captureSize) { LPBYTE captureBuffer; DWORD dwFlags = 0; UINT numAudioFrames = 0; UINT64 qpcTimestamp; err = mmCapture->GetBuffer(&captureBuffer, &numAudioFrames, &dwFlags, NULL, &qpcTimestamp); if(FAILED(err)) { RUNONCE AppWarning(TEXT("MMDeviceAudioSource::GetBuffer: GetBuffer failed")); return NoAudioAvailable; } QWORD newTimestamp; if(dwFlags & AUDCLNT_BUFFERFLAGS_TIMESTAMP_ERROR) { RUNONCE AppWarning(TEXT("MMDeviceAudioSource::GetBuffer: woa woa woa, getting timestamp errors from the audio subsystem. device = %s"), GetDeviceName().Array()); if(!bBrokenTimestamp) lastKnownTimestamp = newTimestamp = lastKnownTimestamp + numAudioFrames*1000/inputSamplesPerSec; } else { if(!bBrokenTimestamp) { newTimestamp = qpcTimestamp/10000; /*if(bFirstFrameReceived) { LONGLONG offset = LONGLONG(newTimestamp)-LONGLONG(lastKnownTimestamp); if(offset < 1 || offset > 20) bBrokenTimestamp = true; else lastKnownTimestamp = newTimestamp; } else*/ lastKnownTimestamp = newTimestamp; } } //have to do this crap to account for broken devices or device drivers. absolutely unbelievable. if(!bFirstFrameReceived) { LARGE_INTEGER clockFreq; QueryPerformanceFrequency(&clockFreq); QWORD curTime = GetQPCTimeMS(clockFreq.QuadPart); if(newTimestamp < (curTime-10000) || newTimestamp > (curTime+10000)) { bBrokenTimestamp = true; lastKnownTimestamp = newTimestamp = GetQPCTimeMS(clockFreq.QuadPart); Log(TEXT("MMDeviceAudioSource::GetNextBuffer: Got bad audio timestamp offset %d from device: '%s', timestamps for this device will be calculated"), (int)(newTimestamp - curTime), GetDeviceName().Array()); } bFirstFrameReceived = true; } if(tempBuffer.Num() < numAudioFrames*2) tempBuffer.SetSize(numAudioFrames*2); outputBuffer = tempBuffer.Array(); float *tempOut = outputBuffer; //------------------------------------------------------------ // channel upmix/downmix if(inputChannels == 1) { UINT numFloats = numAudioFrames; float *inputTemp = (float*)captureBuffer; float *outputTemp = outputBuffer; if(App->SSE2Available() && (UPARAM(inputTemp) & 0xF) == 0 && (UPARAM(outputTemp) & 0xF) == 0) { UINT alignedFloats = numFloats & 0xFFFFFFFC; for(UINT i=0; iSSE2Available()) SSECopy(outputBuffer, captureBuffer, numAudioFrames*2*sizeof(float)); else mcpy(outputBuffer, captureBuffer, numAudioFrames*2*sizeof(float)); } else { //todo: downmix optimization, also support for other speaker configurations than ones I can merely "think" of. ugh. float *inputTemp = (float*)captureBuffer; float *outputTemp = outputBuffer; if(inputChannelMask == KSAUDIO_SPEAKER_QUAD) { UINT numFloats = numAudioFrames*4; float *endTemp = inputTemp+numFloats; while(inputTemp < endTemp) { float left = inputTemp[0]; float right = inputTemp[1]; float rear = (inputTemp[2]+inputTemp[3])*surroundMix; *(outputTemp++) = left - rear; *(outputTemp++) = right + rear; inputTemp += 4; } } else if(inputChannelMask == KSAUDIO_SPEAKER_2POINT1) { UINT numFloats = numAudioFrames*3; float *endTemp = inputTemp+numFloats; while(inputTemp < endTemp) { float left = inputTemp[0]; float right = inputTemp[1]; float lfe = inputTemp[2]*lowFreqMix; *(outputTemp++) = left + lfe; *(outputTemp++) = right + lfe; inputTemp += 3; } } else if(inputChannelMask == KSAUDIO_SPEAKER_4POINT1) { UINT numFloats = numAudioFrames*5; float *endTemp = inputTemp+numFloats; while(inputTemp < endTemp) { float left = inputTemp[0]; float right = inputTemp[1]; float lfe = inputTemp[2]*lowFreqMix; float rear = (inputTemp[3]+inputTemp[4])*surroundMix; *(outputTemp++) = left + lfe - rear; *(outputTemp++) = right + lfe + rear; inputTemp += 5; } } else if(inputChannelMask == KSAUDIO_SPEAKER_SURROUND) { UINT numFloats = numAudioFrames*4; float *endTemp = inputTemp+numFloats; while(inputTemp < endTemp) { float left = inputTemp[0]; float right = inputTemp[1]; float center = inputTemp[2]*centerMix; float rear = inputTemp[3]*(surroundMix*dbMinus3); *(outputTemp++) = left + center - rear; *(outputTemp++) = right + center + rear; inputTemp += 4; } } //don't think this will work for both else if(inputChannelMask == KSAUDIO_SPEAKER_5POINT1) { UINT numFloats = numAudioFrames*6; float *endTemp = inputTemp+numFloats; while(inputTemp < endTemp) { float left = inputTemp[0]; float right = inputTemp[1]; float center = inputTemp[2]*centerMix; float lowFreq = inputTemp[3]*lowFreqMix; float rear = (inputTemp[4]+inputTemp[5])*surroundMix; *(outputTemp++) = left + center + lowFreq - rear; *(outputTemp++) = right + center + lowFreq + rear; inputTemp += 6; } } //todo ------------------ //not sure if my 5.1/7.1 downmixes are correct else if(inputChannelMask == KSAUDIO_SPEAKER_5POINT1_SURROUND) { UINT numFloats = numAudioFrames*6; float *endTemp = inputTemp+numFloats; while(inputTemp < endTemp) { float left = inputTemp[0]; float right = inputTemp[1]; float center = inputTemp[2]*centerMix; float lowFreq = inputTemp[3]*lowFreqMix; float sideLeft = inputTemp[4]*dbMinus3; float sideRight = inputTemp[5]*dbMinus3; *(outputTemp++) = left + center + sideLeft + lowFreq; *(outputTemp++) = right + center + sideRight + lowFreq; inputTemp += 6; } } else if(inputChannelMask == KSAUDIO_SPEAKER_7POINT1) { UINT numFloats = numAudioFrames*8; float *endTemp = inputTemp+numFloats; while(inputTemp < endTemp) { float left = inputTemp[0]; float right = inputTemp[1]; float center = inputTemp[2]*(centerMix*dbMinus3); float lowFreq = inputTemp[3]*lowFreqMix; float rear = (inputTemp[4]+inputTemp[5])*surroundMix; float centerLeft = inputTemp[6]*dbMinus6; float centerRight = inputTemp[7]*dbMinus6; *(outputTemp++) = left + centerLeft + center + lowFreq - rear; *(outputTemp++) = right + centerRight + center + lowFreq + rear; inputTemp += 8; } } else if(inputChannelMask == KSAUDIO_SPEAKER_7POINT1_SURROUND) { UINT numFloats = numAudioFrames*8; float *endTemp = inputTemp+numFloats; while(inputTemp < endTemp) { float left = inputTemp[0]; float right = inputTemp[1]; float center = inputTemp[2]*centerMix; float lowFreq = inputTemp[3]*lowFreqMix; float rear = (inputTemp[4]+inputTemp[5])*(surroundMix*dbMinus3); float sideLeft = inputTemp[6]*dbMinus6; float sideRight = inputTemp[7]*dbMinus6; *(outputTemp++) = left + sideLeft + center + lowFreq - rear; *(outputTemp++) = right + sideLeft + center + lowFreq + rear; inputTemp += 8; } } } mmCapture->ReleaseBuffer(numAudioFrames); //------------------------------------------------------------ // resample if(bResample) { UINT frameAdjust = UINT((double(numAudioFrames) * resampleRatio) + 1.0); UINT newFrameSize = frameAdjust*2; if(tempResampleBuffer.Num() < newFrameSize) tempResampleBuffer.SetSize(newFrameSize); SRC_DATA data; data.src_ratio = resampleRatio; data.data_in = tempBuffer.Array(); data.input_frames = numAudioFrames; data.data_out = tempResampleBuffer.Array(); data.output_frames = frameAdjust; data.end_of_input = 0; int err = src_process(resampler, &data); if(err) { RUNONCE AppWarning(TEXT("Was unable to resample audio")); return NoAudioAvailable; } if(data.input_frames_used != numAudioFrames) { RUNONCE AppWarning(TEXT("Failed to downsample buffer completely, which shouldn't actually happen because it should be using 10ms of samples")); return NoAudioAvailable; } numAudioFrames = data.output_frames_gen; } //----------------------------------------------------------------------------- // sort all audio frames into 10 millisecond increments (done because not all devices output in 10ms increments) float *newBuffer = (bResample) ? tempResampleBuffer.Array() : tempBuffer.Array(); if(storageBuffer.Num() == 0 && numAudioFrames == 441) { AudioSegment &newSegment = *audioSegments.CreateNew(); newSegment.audioData.CopyArray(newBuffer, numAudioFrames*2); if(bBrokenTimestamp) newSegment.timestamp = (lastKnownTimestamp += 10); else newSegment.timestamp = newTimestamp; } else { UINT storedFrames = storageBuffer.Num(); storageBuffer.AppendArray(newBuffer, numAudioFrames*2); if(storageBuffer.Num() >= (441*2)) { QWORD baseTimestmap; AudioSegment &newSegment = *audioSegments.CreateNew(); newSegment.audioData.CopyArray(storageBuffer.Array(), (441*2)); storageBuffer.RemoveRange(0, (441*2)); if(bBrokenTimestamp) newSegment.timestamp = (lastKnownTimestamp += 10); else baseTimestmap = newSegment.timestamp = newTimestamp - QWORD(storedFrames)/2*1000/44100; //if still data pending (can happen) while(storageBuffer.Num() >= (441*2)) { AudioSegment &newSegment = *audioSegments.CreateNew(); newSegment.audioData.CopyArray(storageBuffer.Array(), (441*2)); storageBuffer.RemoveRange(0, (441*2)); if(bBrokenTimestamp) newSegment.timestamp = (lastKnownTimestamp += 10); else newSegment.timestamp = (baseTimestmap += 10); } } } //----------------------------------------------------------------------------- return ContinueAudioRequest; } return NoAudioAvailable; } bool MMDeviceAudioSource::GetMostRecentTimestamp(QWORD ×tamp) { if(audioSegments.Num()) { timestamp = audioSegments[0].timestamp; return true; } return false; } bool MMDeviceAudioSource::GetBuffer(float **buffer, UINT *numFrames, QWORD targetTimestamp) { bool bSuccess = false; outputBuffer.Clear(); if(!bBrokenTimestamp) { while(audioSegments.Num()) { if(audioSegments[0].timestamp < targetTimestamp) audioSegments.Remove(0); else break; } } if(audioSegments.Num()) { bool bUseSegment = false; AudioSegment &segment = audioSegments[0]; QWORD difference = (segment.timestamp-targetTimestamp); if(bBrokenTimestamp || difference <= 10) { //Log(TEXT("segment.timestamp: %llu, targetTimestamp: %llu"), segment.timestamp, targetTimestamp); outputBuffer.TransferFrom(segment.audioData); audioSegments.Remove(0); bSuccess = true; } } outputBuffer.SetSize(441*2); *buffer = outputBuffer.Array(); *numFrames = outputBuffer.Num()/2; return bSuccess; }