From 2d05e4edd32b391b7aa2738d314ce9e1d762fbfc Mon Sep 17 00:00:00 2001 From: jp9000 Date: Fri, 3 May 2013 17:17:45 -0700 Subject: [PATCH] reverted to memcpy because Jim apparently didn't do proper testing like he should have --- DShowPlugin/DeviceSource.cpp | 2 +- .../GraphicsCaptureHook/GraphicsCaptureHook.h | 30 ------------------- .../GraphicsCaptureHook/OpenGLCapture.cpp | 2 +- GraphicsCapture/MemoryCapture.cpp | 4 +-- OBSApi/AudioSource.cpp | 2 +- OBSApi/OBSApi.h | 30 ------------------- OBSApi/Utility/Inline.h | 9 ++---- Source/BitmapImageSource.cpp | 2 +- Source/D3D10Shader.cpp | 6 +--- Source/D3D10Texture.cpp | 9 ++---- Source/D3D10VertexBuffer.cpp | 29 ++++-------------- Source/MMDeviceAudioSource.cpp | 4 +-- 12 files changed, 19 insertions(+), 110 deletions(-) diff --git a/DShowPlugin/DeviceSource.cpp b/DShowPlugin/DeviceSource.cpp index c093448c..1683a096 100644 --- a/DShowPlugin/DeviceSource.cpp +++ b/DShowPlugin/DeviceSource.cpp @@ -998,7 +998,7 @@ void DeviceSource::ReceiveMediaSample(IMediaSample *sample, bool bAudio) /*data->sample = sample; sample->AddRef();*/ - SSECopy(data->lpData, pointer, data->dataLength); + memcpy(data->lpData, pointer, data->dataLength); LONGLONG stopTime; sample->GetTime(&data->timestamp, &stopTime); diff --git a/GraphicsCapture/GraphicsCaptureHook/GraphicsCaptureHook.h b/GraphicsCapture/GraphicsCaptureHook/GraphicsCaptureHook.h index 000554a8..08ed8343 100644 --- a/GraphicsCapture/GraphicsCaptureHook/GraphicsCaptureHook.h +++ b/GraphicsCapture/GraphicsCaptureHook/GraphicsCaptureHook.h @@ -167,36 +167,6 @@ inline void SetVTable(LPVOID ptr, UINT funcOffset, FARPROC funcAddress) //VirtualProtect((LPVOID)(vtable+funcOffset), sizeof(UPARAM), oldProtect, &oldProtect); } -inline void SSECopy(void *lpDest, void *lpSource, UINT size) -{ - UINT alignedSize = size&0xFFFFFFF0; - - if(UPARAM(lpDest)&0xF || UPARAM(lpSource)&0xF) //if unaligned revert to normal copy - { - memcpy(lpDest, lpSource, size); - return; - } - - register __m128i *mDest = (__m128i*)lpDest; - register __m128i *mSrc = (__m128i*)lpSource; - - { - register UINT numCopies = alignedSize>>4; - while(numCopies--) - { - _mm_store_si128(mDest, *mSrc); - mDest++; - mSrc++; - } - } - - { - UINT sizeTemp = size-alignedSize; - if(sizeTemp) - memcpy(mDest, mSrc, sizeTemp); - } -} - inline string IntString(DWORD val) { stringstream ss; diff --git a/GraphicsCapture/GraphicsCaptureHook/OpenGLCapture.cpp b/GraphicsCapture/GraphicsCaptureHook/OpenGLCapture.cpp index 9053be9b..1cbe38a6 100644 --- a/GraphicsCapture/GraphicsCaptureHook/OpenGLCapture.cpp +++ b/GraphicsCapture/GraphicsCaptureHook/OpenGLCapture.cpp @@ -331,7 +331,7 @@ DWORD CopyGLCPUTextureThread(LPVOID lpUseless) if(lastRendered != -1) { - SSECopy(textureBuffers[lastRendered], data, glcaptureInfo.pitch*glcaptureInfo.cy); + memcpy(textureBuffers[lastRendered], data, glcaptureInfo.pitch*glcaptureInfo.cy); ReleaseMutex(textureMutexes[lastRendered]); copyData->lastRendered = (UINT)lastRendered; } diff --git a/GraphicsCapture/MemoryCapture.cpp b/GraphicsCapture/MemoryCapture.cpp index f96e95cc..c3bd6b9e 100644 --- a/GraphicsCapture/MemoryCapture.cpp +++ b/GraphicsCapture/MemoryCapture.cpp @@ -125,7 +125,7 @@ Texture* MemoryCapture::LockTexture() if(texture->Map(lpData, texPitch)) { if(pitch == texPitch) - SSECopy(lpData, textureBuffers[curTexture], pitch*height); + memcpy(lpData, textureBuffers[curTexture], pitch*height); else { UINT bestPitch = MIN(pitch, texPitch); @@ -135,7 +135,7 @@ Texture* MemoryCapture::LockTexture() LPBYTE curInput = ((LPBYTE)input) + (pitch*y); LPBYTE curOutput = ((LPBYTE)lpData) + (texPitch*y); - SSECopy(curOutput, curInput, bestPitch); + memcpy(curOutput, curInput, bestPitch); } } diff --git a/OBSApi/AudioSource.cpp b/OBSApi/AudioSource.cpp index acb99378..821bd527 100644 --- a/OBSApi/AudioSource.cpp +++ b/OBSApi/AudioSource.cpp @@ -314,7 +314,7 @@ UINT AudioSource::QueryAudio(float curVolume) } else if(inputChannels == 2) //straight up copy { - SSECopy(dataOutputBuffer, captureBuffer, numAudioFrames*2*sizeof(float)); + memcpy(dataOutputBuffer, captureBuffer, numAudioFrames*2*sizeof(float)); } else { diff --git a/OBSApi/OBSApi.h b/OBSApi/OBSApi.h index c4f7e996..ecbb49e2 100644 --- a/OBSApi/OBSApi.h +++ b/OBSApi/OBSApi.h @@ -52,36 +52,6 @@ BASE_EXPORT LPBYTE GetCursorData(HICON hIcon, ICONINFO &ii, UINT &width, UINT &h #define SafeReleaseLogRef(var) if(var) {ULONG chi = var->Release(); OSDebugOut(TEXT("releasing %s, %d refs were left\r\n"), L#var, chi); var = NULL;} #define SafeRelease(var) if(var) {var->Release(); var = NULL;} -inline void SSECopy(void *lpDest, void *lpSource, UINT size) -{ - UINT alignedSize = size&0xFFFFFFF0; - - if(UPARAM(lpDest)&0xF || UPARAM(lpSource)&0xF) //if unaligned revert to normal copy - { - mcpy(lpDest, lpSource, size); - return; - } - - register __m128i *mDest = (__m128i*)lpDest; - register __m128i *mSrc = (__m128i*)lpSource; - - { - register UINT numCopies = alignedSize>>4; - while(numCopies--) - { - _mm_store_si128(mDest, *mSrc); - mDest++; - mSrc++; - } - } - - { - UINT sizeTemp = size-alignedSize; - if(sizeTemp) - mcpy(mDest, mSrc, sizeTemp); - } -} - //big endian conversion functions #define QWORD_BE(val) (((val>>56)&0xFF) | (((val>>48)&0xFF)<<8) | (((val>>40)&0xFF)<<16) | (((val>>32)&0xFF)<<24) | \ (((val>>24)&0xFF)<<32) | (((val>>16)&0xFF)<<40) | (((val>>8)&0xFF)<<48) | ((val&0xFF)<<56)) diff --git a/OBSApi/Utility/Inline.h b/OBSApi/Utility/Inline.h index 9fa11294..46c53587 100644 --- a/OBSApi/Utility/Inline.h +++ b/OBSApi/Utility/Inline.h @@ -23,17 +23,14 @@ #pragma warning(disable : 4035) -#ifdef WIN32 - #pragma intrinsic(memcpy, memset, memcmp) -#endif - #ifndef USE_CUSTOM_MEMORY_FUNCTIONS -inline void STDCALL mcpy(void *pDest, const void *pSrc, size_t iLen) +#define mcpy memcpy +/*inline void STDCALL mcpy(void *pDest, const void *pSrc, size_t iLen) { memcpy(pDest, pSrc, iLen); -} +}*/ #endif diff --git a/Source/BitmapImageSource.cpp b/Source/BitmapImageSource.cpp index 425d7bd9..57d46500 100644 --- a/Source/BitmapImageSource.cpp +++ b/Source/BitmapImageSource.cpp @@ -255,7 +255,7 @@ public: if (gif_decode_frame(&gif, newFrame) == GIF_OK) { animationFrameCache[newFrame] = animationFrameData + (newFrame * (gif.width * gif.height * 4)); - SSECopy (animationFrameCache[newFrame], gif.frame_image, gif.width * gif.height * 4); + memcpy(animationFrameCache[newFrame], gif.frame_image, gif.width * gif.height * 4); } lastDecodedFrame = newFrame; diff --git a/Source/D3D10Shader.cpp b/Source/D3D10Shader.cpp index f688f25d..e1f44f10 100644 --- a/Source/D3D10Shader.cpp +++ b/Source/D3D10Shader.cpp @@ -440,11 +440,7 @@ void D3D10Shader::UpdateParams() return; } - if(App->SSE2Available()) - SSECopy(outData, shaderConstantData.Array(), shaderConstantData.Num()); - else - mcpy(outData, shaderConstantData.Array(), shaderConstantData.Num()); - + mcpy(outData, shaderConstantData.Array(), shaderConstantData.Num()); constantBuffer->Unmap(); } } diff --git a/Source/D3D10Texture.cpp b/Source/D3D10Texture.cpp index 971fb62a..6cc85a18 100644 --- a/Source/D3D10Texture.cpp +++ b/Source/D3D10Texture.cpp @@ -614,12 +614,7 @@ void D3D10Texture::SetImage(void *lpData, GSImageFormat imageFormat, UINT pitch) else { if(pitch == map.RowPitch) - { - if(App->SSE2Available()) - SSECopy(map.pData, lpData, pitch*height); - else - mcpy(map.pData, lpData, pitch*height); - } + mcpy(map.pData, lpData, pitch*height); else { UINT bestPitch = MIN(pitch, map.RowPitch); @@ -631,7 +626,7 @@ void D3D10Texture::SetImage(void *lpData, GSImageFormat imageFormat, UINT pitch) LPBYTE curInput = ((LPBYTE)lpData) + (pitch*y); LPBYTE curOutput = ((LPBYTE)map.pData) + (map.RowPitch*y); - SSECopy(curOutput, curInput, bestPitch); + mcpy(curOutput, curInput, bestPitch); } } else diff --git a/Source/D3D10VertexBuffer.cpp b/Source/D3D10VertexBuffer.cpp index 6bcabafe..6039cb7b 100644 --- a/Source/D3D10VertexBuffer.cpp +++ b/Source/D3D10VertexBuffer.cpp @@ -180,10 +180,7 @@ void D3D10VertexBuffer::FlushBuffers() return; } - if(App->SSE2Available()) - SSECopy(outData, data->VertList.Array(), sizeof(Vect)*numVerts); - else - mcpy(outData, data->VertList.Array(), sizeof(Vect)*numVerts); + mcpy(outData, data->VertList.Array(), sizeof(Vect)*numVerts); vertexBuffer->Unmap(); @@ -197,11 +194,7 @@ void D3D10VertexBuffer::FlushBuffers() return; } - if(App->SSE2Available()) - SSECopy(outData, data->NormalList.Array(), sizeof(Vect)*numVerts); - else - mcpy(outData, data->NormalList.Array(), sizeof(Vect)*numVerts); - + mcpy(outData, data->NormalList.Array(), sizeof(Vect)*numVerts); normalBuffer->Unmap(); } @@ -215,11 +208,7 @@ void D3D10VertexBuffer::FlushBuffers() return; } - if(App->SSE2Available()) - SSECopy(outData, data->ColorList.Array(), sizeof(Vect)*numVerts); - else - mcpy(outData, data->ColorList.Array(), sizeof(Vect)*numVerts); - + mcpy(outData, data->ColorList.Array(), sizeof(Vect)*numVerts); colorBuffer->Unmap(); } @@ -233,11 +222,7 @@ void D3D10VertexBuffer::FlushBuffers() return; } - if(App->SSE2Available()) - SSECopy(outData, data->TangentList.Array(), sizeof(Vect)*numVerts); - else - mcpy(outData, data->TangentList.Array(), sizeof(Vect)*numVerts); - + mcpy(outData, data->TangentList.Array(), sizeof(Vect)*numVerts); tangentBuffer->Unmap(); } @@ -257,11 +242,7 @@ void D3D10VertexBuffer::FlushBuffers() return; } - if(App->SSE2Available()) - SSECopy(outData, textureVerts.Array(), sizeof(UVCoord)*numVerts); - else - mcpy(outData, textureVerts.Array(), sizeof(UVCoord)*numVerts); - + mcpy(outData, textureVerts.Array(), sizeof(UVCoord)*numVerts); buffer->Unmap(); } } diff --git a/Source/MMDeviceAudioSource.cpp b/Source/MMDeviceAudioSource.cpp index 7796d100..9c0875d6 100644 --- a/Source/MMDeviceAudioSource.cpp +++ b/Source/MMDeviceAudioSource.cpp @@ -395,7 +395,7 @@ bool MMDeviceAudioSource::GetNextBuffer(void **buffer, UINT *numFrames, QWORD *t if (newInputBufferSize > inputBuffer.Num()) inputBuffer.SetSize(newInputBufferSize); - SSECopy(inputBuffer.Array()+inputBufferSize, captureBuffer, totalFloatsRead*sizeof(float)); + mcpy(inputBuffer.Array()+inputBufferSize, captureBuffer, totalFloatsRead*sizeof(float)); inputBufferSize = newInputBufferSize; mmCapture->ReleaseBuffer(numFramesRead); @@ -414,7 +414,7 @@ void MMDeviceAudioSource::ReleaseBuffer() { UINT sampleSizeFloats = sampleWindowSize*GetChannelCount(); if (inputBufferSize > sampleSizeFloats) - SSECopy(inputBuffer.Array(), inputBuffer.Array()+sampleSizeFloats, (inputBufferSize-sampleSizeFloats)*sizeof(float)); + mcpy(inputBuffer.Array(), inputBuffer.Array()+sampleSizeFloats, (inputBufferSize-sampleSizeFloats)*sizeof(float)); inputBufferSize -= sampleSizeFloats; }