reverted to memcpy because Jim apparently didn't do proper testing like he should have

This commit is contained in:
jp9000 2013-05-03 17:17:45 -07:00
parent 83fea9803e
commit 2d05e4edd3
12 changed files with 19 additions and 110 deletions

View File

@ -998,7 +998,7 @@ void DeviceSource::ReceiveMediaSample(IMediaSample *sample, bool bAudio)
/*data->sample = sample; /*data->sample = sample;
sample->AddRef();*/ sample->AddRef();*/
SSECopy(data->lpData, pointer, data->dataLength); memcpy(data->lpData, pointer, data->dataLength);
LONGLONG stopTime; LONGLONG stopTime;
sample->GetTime(&data->timestamp, &stopTime); sample->GetTime(&data->timestamp, &stopTime);

View File

@ -167,36 +167,6 @@ inline void SetVTable(LPVOID ptr, UINT funcOffset, FARPROC funcAddress)
//VirtualProtect((LPVOID)(vtable+funcOffset), sizeof(UPARAM), oldProtect, &oldProtect); //VirtualProtect((LPVOID)(vtable+funcOffset), sizeof(UPARAM), oldProtect, &oldProtect);
} }
inline void SSECopy(void *lpDest, void *lpSource, UINT size)
{
UINT alignedSize = size&0xFFFFFFF0;
if(UPARAM(lpDest)&0xF || UPARAM(lpSource)&0xF) //if unaligned revert to normal copy
{
memcpy(lpDest, lpSource, size);
return;
}
register __m128i *mDest = (__m128i*)lpDest;
register __m128i *mSrc = (__m128i*)lpSource;
{
register UINT numCopies = alignedSize>>4;
while(numCopies--)
{
_mm_store_si128(mDest, *mSrc);
mDest++;
mSrc++;
}
}
{
UINT sizeTemp = size-alignedSize;
if(sizeTemp)
memcpy(mDest, mSrc, sizeTemp);
}
}
inline string IntString(DWORD val) inline string IntString(DWORD val)
{ {
stringstream ss; stringstream ss;

View File

@ -331,7 +331,7 @@ DWORD CopyGLCPUTextureThread(LPVOID lpUseless)
if(lastRendered != -1) if(lastRendered != -1)
{ {
SSECopy(textureBuffers[lastRendered], data, glcaptureInfo.pitch*glcaptureInfo.cy); memcpy(textureBuffers[lastRendered], data, glcaptureInfo.pitch*glcaptureInfo.cy);
ReleaseMutex(textureMutexes[lastRendered]); ReleaseMutex(textureMutexes[lastRendered]);
copyData->lastRendered = (UINT)lastRendered; copyData->lastRendered = (UINT)lastRendered;
} }

View File

@ -125,7 +125,7 @@ Texture* MemoryCapture::LockTexture()
if(texture->Map(lpData, texPitch)) if(texture->Map(lpData, texPitch))
{ {
if(pitch == texPitch) if(pitch == texPitch)
SSECopy(lpData, textureBuffers[curTexture], pitch*height); memcpy(lpData, textureBuffers[curTexture], pitch*height);
else else
{ {
UINT bestPitch = MIN(pitch, texPitch); UINT bestPitch = MIN(pitch, texPitch);
@ -135,7 +135,7 @@ Texture* MemoryCapture::LockTexture()
LPBYTE curInput = ((LPBYTE)input) + (pitch*y); LPBYTE curInput = ((LPBYTE)input) + (pitch*y);
LPBYTE curOutput = ((LPBYTE)lpData) + (texPitch*y); LPBYTE curOutput = ((LPBYTE)lpData) + (texPitch*y);
SSECopy(curOutput, curInput, bestPitch); memcpy(curOutput, curInput, bestPitch);
} }
} }

View File

@ -314,7 +314,7 @@ UINT AudioSource::QueryAudio(float curVolume)
} }
else if(inputChannels == 2) //straight up copy else if(inputChannels == 2) //straight up copy
{ {
SSECopy(dataOutputBuffer, captureBuffer, numAudioFrames*2*sizeof(float)); memcpy(dataOutputBuffer, captureBuffer, numAudioFrames*2*sizeof(float));
} }
else else
{ {

View File

@ -52,36 +52,6 @@ BASE_EXPORT LPBYTE GetCursorData(HICON hIcon, ICONINFO &ii, UINT &width, UINT &h
#define SafeReleaseLogRef(var) if(var) {ULONG chi = var->Release(); OSDebugOut(TEXT("releasing %s, %d refs were left\r\n"), L#var, chi); var = NULL;} #define SafeReleaseLogRef(var) if(var) {ULONG chi = var->Release(); OSDebugOut(TEXT("releasing %s, %d refs were left\r\n"), L#var, chi); var = NULL;}
#define SafeRelease(var) if(var) {var->Release(); var = NULL;} #define SafeRelease(var) if(var) {var->Release(); var = NULL;}
inline void SSECopy(void *lpDest, void *lpSource, UINT size)
{
UINT alignedSize = size&0xFFFFFFF0;
if(UPARAM(lpDest)&0xF || UPARAM(lpSource)&0xF) //if unaligned revert to normal copy
{
mcpy(lpDest, lpSource, size);
return;
}
register __m128i *mDest = (__m128i*)lpDest;
register __m128i *mSrc = (__m128i*)lpSource;
{
register UINT numCopies = alignedSize>>4;
while(numCopies--)
{
_mm_store_si128(mDest, *mSrc);
mDest++;
mSrc++;
}
}
{
UINT sizeTemp = size-alignedSize;
if(sizeTemp)
mcpy(mDest, mSrc, sizeTemp);
}
}
//big endian conversion functions //big endian conversion functions
#define QWORD_BE(val) (((val>>56)&0xFF) | (((val>>48)&0xFF)<<8) | (((val>>40)&0xFF)<<16) | (((val>>32)&0xFF)<<24) | \ #define QWORD_BE(val) (((val>>56)&0xFF) | (((val>>48)&0xFF)<<8) | (((val>>40)&0xFF)<<16) | (((val>>32)&0xFF)<<24) | \
(((val>>24)&0xFF)<<32) | (((val>>16)&0xFF)<<40) | (((val>>8)&0xFF)<<48) | ((val&0xFF)<<56)) (((val>>24)&0xFF)<<32) | (((val>>16)&0xFF)<<40) | (((val>>8)&0xFF)<<48) | ((val&0xFF)<<56))

View File

@ -23,17 +23,14 @@
#pragma warning(disable : 4035) #pragma warning(disable : 4035)
#ifdef WIN32
#pragma intrinsic(memcpy, memset, memcmp)
#endif
#ifndef USE_CUSTOM_MEMORY_FUNCTIONS #ifndef USE_CUSTOM_MEMORY_FUNCTIONS
inline void STDCALL mcpy(void *pDest, const void *pSrc, size_t iLen) #define mcpy memcpy
/*inline void STDCALL mcpy(void *pDest, const void *pSrc, size_t iLen)
{ {
memcpy(pDest, pSrc, iLen); memcpy(pDest, pSrc, iLen);
} }*/
#endif #endif

View File

@ -255,7 +255,7 @@ public:
if (gif_decode_frame(&gif, newFrame) == GIF_OK) if (gif_decode_frame(&gif, newFrame) == GIF_OK)
{ {
animationFrameCache[newFrame] = animationFrameData + (newFrame * (gif.width * gif.height * 4)); animationFrameCache[newFrame] = animationFrameData + (newFrame * (gif.width * gif.height * 4));
SSECopy (animationFrameCache[newFrame], gif.frame_image, gif.width * gif.height * 4); memcpy(animationFrameCache[newFrame], gif.frame_image, gif.width * gif.height * 4);
} }
lastDecodedFrame = newFrame; lastDecodedFrame = newFrame;

View File

@ -440,11 +440,7 @@ void D3D10Shader::UpdateParams()
return; return;
} }
if(App->SSE2Available()) mcpy(outData, shaderConstantData.Array(), shaderConstantData.Num());
SSECopy(outData, shaderConstantData.Array(), shaderConstantData.Num());
else
mcpy(outData, shaderConstantData.Array(), shaderConstantData.Num());
constantBuffer->Unmap(); constantBuffer->Unmap();
} }
} }

View File

@ -614,12 +614,7 @@ void D3D10Texture::SetImage(void *lpData, GSImageFormat imageFormat, UINT pitch)
else else
{ {
if(pitch == map.RowPitch) if(pitch == map.RowPitch)
{ mcpy(map.pData, lpData, pitch*height);
if(App->SSE2Available())
SSECopy(map.pData, lpData, pitch*height);
else
mcpy(map.pData, lpData, pitch*height);
}
else else
{ {
UINT bestPitch = MIN(pitch, map.RowPitch); UINT bestPitch = MIN(pitch, map.RowPitch);
@ -631,7 +626,7 @@ void D3D10Texture::SetImage(void *lpData, GSImageFormat imageFormat, UINT pitch)
LPBYTE curInput = ((LPBYTE)lpData) + (pitch*y); LPBYTE curInput = ((LPBYTE)lpData) + (pitch*y);
LPBYTE curOutput = ((LPBYTE)map.pData) + (map.RowPitch*y); LPBYTE curOutput = ((LPBYTE)map.pData) + (map.RowPitch*y);
SSECopy(curOutput, curInput, bestPitch); mcpy(curOutput, curInput, bestPitch);
} }
} }
else else

View File

@ -180,10 +180,7 @@ void D3D10VertexBuffer::FlushBuffers()
return; return;
} }
if(App->SSE2Available()) mcpy(outData, data->VertList.Array(), sizeof(Vect)*numVerts);
SSECopy(outData, data->VertList.Array(), sizeof(Vect)*numVerts);
else
mcpy(outData, data->VertList.Array(), sizeof(Vect)*numVerts);
vertexBuffer->Unmap(); vertexBuffer->Unmap();
@ -197,11 +194,7 @@ void D3D10VertexBuffer::FlushBuffers()
return; return;
} }
if(App->SSE2Available()) mcpy(outData, data->NormalList.Array(), sizeof(Vect)*numVerts);
SSECopy(outData, data->NormalList.Array(), sizeof(Vect)*numVerts);
else
mcpy(outData, data->NormalList.Array(), sizeof(Vect)*numVerts);
normalBuffer->Unmap(); normalBuffer->Unmap();
} }
@ -215,11 +208,7 @@ void D3D10VertexBuffer::FlushBuffers()
return; return;
} }
if(App->SSE2Available()) mcpy(outData, data->ColorList.Array(), sizeof(Vect)*numVerts);
SSECopy(outData, data->ColorList.Array(), sizeof(Vect)*numVerts);
else
mcpy(outData, data->ColorList.Array(), sizeof(Vect)*numVerts);
colorBuffer->Unmap(); colorBuffer->Unmap();
} }
@ -233,11 +222,7 @@ void D3D10VertexBuffer::FlushBuffers()
return; return;
} }
if(App->SSE2Available()) mcpy(outData, data->TangentList.Array(), sizeof(Vect)*numVerts);
SSECopy(outData, data->TangentList.Array(), sizeof(Vect)*numVerts);
else
mcpy(outData, data->TangentList.Array(), sizeof(Vect)*numVerts);
tangentBuffer->Unmap(); tangentBuffer->Unmap();
} }
@ -257,11 +242,7 @@ void D3D10VertexBuffer::FlushBuffers()
return; return;
} }
if(App->SSE2Available()) mcpy(outData, textureVerts.Array(), sizeof(UVCoord)*numVerts);
SSECopy(outData, textureVerts.Array(), sizeof(UVCoord)*numVerts);
else
mcpy(outData, textureVerts.Array(), sizeof(UVCoord)*numVerts);
buffer->Unmap(); buffer->Unmap();
} }
} }

View File

@ -395,7 +395,7 @@ bool MMDeviceAudioSource::GetNextBuffer(void **buffer, UINT *numFrames, QWORD *t
if (newInputBufferSize > inputBuffer.Num()) if (newInputBufferSize > inputBuffer.Num())
inputBuffer.SetSize(newInputBufferSize); inputBuffer.SetSize(newInputBufferSize);
SSECopy(inputBuffer.Array()+inputBufferSize, captureBuffer, totalFloatsRead*sizeof(float)); mcpy(inputBuffer.Array()+inputBufferSize, captureBuffer, totalFloatsRead*sizeof(float));
inputBufferSize = newInputBufferSize; inputBufferSize = newInputBufferSize;
mmCapture->ReleaseBuffer(numFramesRead); mmCapture->ReleaseBuffer(numFramesRead);
@ -414,7 +414,7 @@ void MMDeviceAudioSource::ReleaseBuffer()
{ {
UINT sampleSizeFloats = sampleWindowSize*GetChannelCount(); UINT sampleSizeFloats = sampleWindowSize*GetChannelCount();
if (inputBufferSize > sampleSizeFloats) if (inputBufferSize > sampleSizeFloats)
SSECopy(inputBuffer.Array(), inputBuffer.Array()+sampleSizeFloats, (inputBufferSize-sampleSizeFloats)*sizeof(float)); mcpy(inputBuffer.Array(), inputBuffer.Array()+sampleSizeFloats, (inputBufferSize-sampleSizeFloats)*sizeof(float));
inputBufferSize -= sampleSizeFloats; inputBufferSize -= sampleSizeFloats;
} }