reverted to memcpy because Jim apparently didn't do proper testing like he should have

This commit is contained in:
jp9000 2013-05-03 17:17:45 -07:00
parent 83fea9803e
commit 2d05e4edd3
12 changed files with 19 additions and 110 deletions

View File

@ -998,7 +998,7 @@ void DeviceSource::ReceiveMediaSample(IMediaSample *sample, bool bAudio)
/*data->sample = sample;
sample->AddRef();*/
SSECopy(data->lpData, pointer, data->dataLength);
memcpy(data->lpData, pointer, data->dataLength);
LONGLONG stopTime;
sample->GetTime(&data->timestamp, &stopTime);

View File

@ -167,36 +167,6 @@ inline void SetVTable(LPVOID ptr, UINT funcOffset, FARPROC funcAddress)
//VirtualProtect((LPVOID)(vtable+funcOffset), sizeof(UPARAM), oldProtect, &oldProtect);
}
inline void SSECopy(void *lpDest, void *lpSource, UINT size)
{
UINT alignedSize = size&0xFFFFFFF0;
if(UPARAM(lpDest)&0xF || UPARAM(lpSource)&0xF) //if unaligned revert to normal copy
{
memcpy(lpDest, lpSource, size);
return;
}
register __m128i *mDest = (__m128i*)lpDest;
register __m128i *mSrc = (__m128i*)lpSource;
{
register UINT numCopies = alignedSize>>4;
while(numCopies--)
{
_mm_store_si128(mDest, *mSrc);
mDest++;
mSrc++;
}
}
{
UINT sizeTemp = size-alignedSize;
if(sizeTemp)
memcpy(mDest, mSrc, sizeTemp);
}
}
inline string IntString(DWORD val)
{
stringstream ss;

View File

@ -331,7 +331,7 @@ DWORD CopyGLCPUTextureThread(LPVOID lpUseless)
if(lastRendered != -1)
{
SSECopy(textureBuffers[lastRendered], data, glcaptureInfo.pitch*glcaptureInfo.cy);
memcpy(textureBuffers[lastRendered], data, glcaptureInfo.pitch*glcaptureInfo.cy);
ReleaseMutex(textureMutexes[lastRendered]);
copyData->lastRendered = (UINT)lastRendered;
}

View File

@ -125,7 +125,7 @@ Texture* MemoryCapture::LockTexture()
if(texture->Map(lpData, texPitch))
{
if(pitch == texPitch)
SSECopy(lpData, textureBuffers[curTexture], pitch*height);
memcpy(lpData, textureBuffers[curTexture], pitch*height);
else
{
UINT bestPitch = MIN(pitch, texPitch);
@ -135,7 +135,7 @@ Texture* MemoryCapture::LockTexture()
LPBYTE curInput = ((LPBYTE)input) + (pitch*y);
LPBYTE curOutput = ((LPBYTE)lpData) + (texPitch*y);
SSECopy(curOutput, curInput, bestPitch);
memcpy(curOutput, curInput, bestPitch);
}
}

View File

@ -314,7 +314,7 @@ UINT AudioSource::QueryAudio(float curVolume)
}
else if(inputChannels == 2) //straight up copy
{
SSECopy(dataOutputBuffer, captureBuffer, numAudioFrames*2*sizeof(float));
memcpy(dataOutputBuffer, captureBuffer, numAudioFrames*2*sizeof(float));
}
else
{

View File

@ -52,36 +52,6 @@ BASE_EXPORT LPBYTE GetCursorData(HICON hIcon, ICONINFO &ii, UINT &width, UINT &h
#define SafeReleaseLogRef(var) if(var) {ULONG chi = var->Release(); OSDebugOut(TEXT("releasing %s, %d refs were left\r\n"), L#var, chi); var = NULL;}
#define SafeRelease(var) if(var) {var->Release(); var = NULL;}
inline void SSECopy(void *lpDest, void *lpSource, UINT size)
{
UINT alignedSize = size&0xFFFFFFF0;
if(UPARAM(lpDest)&0xF || UPARAM(lpSource)&0xF) //if unaligned revert to normal copy
{
mcpy(lpDest, lpSource, size);
return;
}
register __m128i *mDest = (__m128i*)lpDest;
register __m128i *mSrc = (__m128i*)lpSource;
{
register UINT numCopies = alignedSize>>4;
while(numCopies--)
{
_mm_store_si128(mDest, *mSrc);
mDest++;
mSrc++;
}
}
{
UINT sizeTemp = size-alignedSize;
if(sizeTemp)
mcpy(mDest, mSrc, sizeTemp);
}
}
//big endian conversion functions
#define QWORD_BE(val) (((val>>56)&0xFF) | (((val>>48)&0xFF)<<8) | (((val>>40)&0xFF)<<16) | (((val>>32)&0xFF)<<24) | \
(((val>>24)&0xFF)<<32) | (((val>>16)&0xFF)<<40) | (((val>>8)&0xFF)<<48) | ((val&0xFF)<<56))

View File

@ -23,17 +23,14 @@
#pragma warning(disable : 4035)
#ifdef WIN32
#pragma intrinsic(memcpy, memset, memcmp)
#endif
#ifndef USE_CUSTOM_MEMORY_FUNCTIONS
inline void STDCALL mcpy(void *pDest, const void *pSrc, size_t iLen)
#define mcpy memcpy
/*inline void STDCALL mcpy(void *pDest, const void *pSrc, size_t iLen)
{
memcpy(pDest, pSrc, iLen);
}
}*/
#endif

View File

@ -255,7 +255,7 @@ public:
if (gif_decode_frame(&gif, newFrame) == GIF_OK)
{
animationFrameCache[newFrame] = animationFrameData + (newFrame * (gif.width * gif.height * 4));
SSECopy (animationFrameCache[newFrame], gif.frame_image, gif.width * gif.height * 4);
memcpy(animationFrameCache[newFrame], gif.frame_image, gif.width * gif.height * 4);
}
lastDecodedFrame = newFrame;

View File

@ -440,11 +440,7 @@ void D3D10Shader::UpdateParams()
return;
}
if(App->SSE2Available())
SSECopy(outData, shaderConstantData.Array(), shaderConstantData.Num());
else
mcpy(outData, shaderConstantData.Array(), shaderConstantData.Num());
mcpy(outData, shaderConstantData.Array(), shaderConstantData.Num());
constantBuffer->Unmap();
}
}

View File

@ -614,12 +614,7 @@ void D3D10Texture::SetImage(void *lpData, GSImageFormat imageFormat, UINT pitch)
else
{
if(pitch == map.RowPitch)
{
if(App->SSE2Available())
SSECopy(map.pData, lpData, pitch*height);
else
mcpy(map.pData, lpData, pitch*height);
}
mcpy(map.pData, lpData, pitch*height);
else
{
UINT bestPitch = MIN(pitch, map.RowPitch);
@ -631,7 +626,7 @@ void D3D10Texture::SetImage(void *lpData, GSImageFormat imageFormat, UINT pitch)
LPBYTE curInput = ((LPBYTE)lpData) + (pitch*y);
LPBYTE curOutput = ((LPBYTE)map.pData) + (map.RowPitch*y);
SSECopy(curOutput, curInput, bestPitch);
mcpy(curOutput, curInput, bestPitch);
}
}
else

View File

@ -180,10 +180,7 @@ void D3D10VertexBuffer::FlushBuffers()
return;
}
if(App->SSE2Available())
SSECopy(outData, data->VertList.Array(), sizeof(Vect)*numVerts);
else
mcpy(outData, data->VertList.Array(), sizeof(Vect)*numVerts);
mcpy(outData, data->VertList.Array(), sizeof(Vect)*numVerts);
vertexBuffer->Unmap();
@ -197,11 +194,7 @@ void D3D10VertexBuffer::FlushBuffers()
return;
}
if(App->SSE2Available())
SSECopy(outData, data->NormalList.Array(), sizeof(Vect)*numVerts);
else
mcpy(outData, data->NormalList.Array(), sizeof(Vect)*numVerts);
mcpy(outData, data->NormalList.Array(), sizeof(Vect)*numVerts);
normalBuffer->Unmap();
}
@ -215,11 +208,7 @@ void D3D10VertexBuffer::FlushBuffers()
return;
}
if(App->SSE2Available())
SSECopy(outData, data->ColorList.Array(), sizeof(Vect)*numVerts);
else
mcpy(outData, data->ColorList.Array(), sizeof(Vect)*numVerts);
mcpy(outData, data->ColorList.Array(), sizeof(Vect)*numVerts);
colorBuffer->Unmap();
}
@ -233,11 +222,7 @@ void D3D10VertexBuffer::FlushBuffers()
return;
}
if(App->SSE2Available())
SSECopy(outData, data->TangentList.Array(), sizeof(Vect)*numVerts);
else
mcpy(outData, data->TangentList.Array(), sizeof(Vect)*numVerts);
mcpy(outData, data->TangentList.Array(), sizeof(Vect)*numVerts);
tangentBuffer->Unmap();
}
@ -257,11 +242,7 @@ void D3D10VertexBuffer::FlushBuffers()
return;
}
if(App->SSE2Available())
SSECopy(outData, textureVerts.Array(), sizeof(UVCoord)*numVerts);
else
mcpy(outData, textureVerts.Array(), sizeof(UVCoord)*numVerts);
mcpy(outData, textureVerts.Array(), sizeof(UVCoord)*numVerts);
buffer->Unmap();
}
}

View File

@ -395,7 +395,7 @@ bool MMDeviceAudioSource::GetNextBuffer(void **buffer, UINT *numFrames, QWORD *t
if (newInputBufferSize > inputBuffer.Num())
inputBuffer.SetSize(newInputBufferSize);
SSECopy(inputBuffer.Array()+inputBufferSize, captureBuffer, totalFloatsRead*sizeof(float));
mcpy(inputBuffer.Array()+inputBufferSize, captureBuffer, totalFloatsRead*sizeof(float));
inputBufferSize = newInputBufferSize;
mmCapture->ReleaseBuffer(numFramesRead);
@ -414,7 +414,7 @@ void MMDeviceAudioSource::ReleaseBuffer()
{
UINT sampleSizeFloats = sampleWindowSize*GetChannelCount();
if (inputBufferSize > sampleSizeFloats)
SSECopy(inputBuffer.Array(), inputBuffer.Array()+sampleSizeFloats, (inputBufferSize-sampleSizeFloats)*sizeof(float));
mcpy(inputBuffer.Array(), inputBuffer.Array()+sampleSizeFloats, (inputBufferSize-sampleSizeFloats)*sizeof(float));
inputBufferSize -= sampleSizeFloats;
}