141 lines
5.4 KiB
C++
141 lines
5.4 KiB
C++
/********************************************************************************
|
|
Copyright (C) 2012 Hugh Bailey <obs.jim@gmail.com>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
|
|
********************************************************************************/
|
|
|
|
#include "Main.h"
|
|
|
|
|
|
void Convert444to420(LPBYTE input, int width, int pitch, int height, int startY, int endY, LPBYTE *output, bool bSSE2Available)
|
|
{
|
|
LPBYTE lumPlane = output[0];
|
|
LPBYTE uPlane = output[1];
|
|
LPBYTE vPlane = output[2];
|
|
int chrPitch = width>>1;
|
|
|
|
if(bSSE2Available)
|
|
{
|
|
__m128i lumMask = _mm_set1_epi32(0x0000FF00);
|
|
__m128i uvMask = _mm_set1_epi16(0x00FF);
|
|
|
|
for(int y=startY; y<endY; y+=2)
|
|
{
|
|
int yPos = y*pitch;
|
|
int chrYPos = ((y>>1)*chrPitch);
|
|
int lumYPos = y*width;
|
|
|
|
for(int x=0; x<width; x+=4)
|
|
{
|
|
LPBYTE lpImagePos = input+yPos+(x*4);
|
|
int chrPos = chrYPos + (x>>1);
|
|
int lumPos0 = lumYPos + x;
|
|
int lumPos1 = lumPos0+width;
|
|
|
|
__m128i line1 = _mm_load_si128((__m128i*)lpImagePos);
|
|
__m128i line2 = _mm_load_si128((__m128i*)(lpImagePos+pitch));
|
|
|
|
//pack lum vals
|
|
{
|
|
__m128i packVal = _mm_packs_epi32(_mm_srli_si128(_mm_and_si128(line1, lumMask), 1), _mm_srli_si128(_mm_and_si128(line2, lumMask), 1));
|
|
packVal = _mm_packus_epi16(packVal, packVal);
|
|
|
|
*(LPUINT)(lumPlane+lumPos0) = packVal.m128i_u32[0];
|
|
*(LPUINT)(lumPlane+lumPos1) = packVal.m128i_u32[1];
|
|
}
|
|
|
|
//do average, pack UV vals
|
|
{
|
|
__m128i addVal = _mm_add_epi64(_mm_and_si128(line1, uvMask), _mm_and_si128(line2, uvMask));
|
|
__m128i avgVal = _mm_srai_epi16(_mm_add_epi64(addVal, _mm_shuffle_epi32(addVal, _MM_SHUFFLE(2, 3, 0, 1))), 2);
|
|
avgVal = _mm_shuffle_epi32(avgVal, _MM_SHUFFLE(3, 1, 2, 0));
|
|
avgVal = _mm_shufflelo_epi16(avgVal, _MM_SHUFFLE(3, 1, 2, 0));
|
|
avgVal = _mm_packus_epi16(avgVal, avgVal);
|
|
|
|
DWORD packedVals = avgVal.m128i_u32[0];
|
|
|
|
*(LPWORD)(uPlane+chrPos) = WORD(packedVals);
|
|
*(LPWORD)(vPlane+chrPos) = WORD(packedVals>>16);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
#ifdef _WIN64
|
|
for(int y=startY; y<endY; y+=2)
|
|
{
|
|
int yPos = y*pitch;
|
|
int chrYPos = ((y>>1)*chrPitch);
|
|
int lumYPos = y*width;
|
|
|
|
for(int x=0; x<width; x+=2)
|
|
{
|
|
LPBYTE lpImagePos = input+yPos+(x*4);
|
|
int chrPos = chrYPos + (x>>1);
|
|
int lumPos0 = lumYPos + x;
|
|
int lumPos1 = lumPos0+width;
|
|
|
|
QWORD pixel_0x0 = *(QWORD*)(lpImagePos);
|
|
QWORD pixel_1x0 = *(QWORD*)(lpImagePos+pitch);
|
|
|
|
*(LPWORD)(lumPlane+lumPos0) = WORD(((pixel_0x0>>8)&0xFF) | ((pixel_0x0>>32)&0xFF00));
|
|
*(LPWORD)(lumPlane+lumPos1) = WORD(((pixel_1x0>>8)&0xFF) | ((pixel_1x0>>32)&0xFF00));
|
|
|
|
DWORD pixAvg = (( pixel_0x0 & 0xFF00FF) +
|
|
((pixel_0x0>>32) & 0xFF00FF) +
|
|
( pixel_1x0 & 0xFF00FF) +
|
|
((pixel_1x0>>32) & 0xFF00FF) ) >> 2;
|
|
|
|
uPlane[chrPos] = BYTE(pixAvg);
|
|
vPlane[chrPos] = BYTE(pixAvg>>16);
|
|
}
|
|
}
|
|
#else
|
|
for(int y=startY; y<endY; y+=2)
|
|
{
|
|
int yPos = y*pitch;
|
|
int chrYPos = ((y>>1)*chrPitch);
|
|
int lumYPos = y*width;
|
|
|
|
for(int x=0; x<width; x+=2)
|
|
{
|
|
LPBYTE lpImagePos = input+yPos+(x*4);
|
|
int chrPos = chrYPos + (x>>1);
|
|
int lumPos0 = lumYPos + x;
|
|
int lumPos1 = lumPos0+width;
|
|
|
|
DWORD pixel_0x0 = *(DWORD*)(lpImagePos);
|
|
DWORD pixel_0x1 = *(DWORD*)(lpImagePos+4);
|
|
DWORD pixel_1x0 = *(DWORD*)(lpImagePos+pitch);
|
|
DWORD pixel_1x1 = *(DWORD*)(lpImagePos+pitch+4);
|
|
|
|
*(LPWORD)(lumPlane+lumPos0) = WORD(((pixel_0x0>>8)&0xFF) | ((pixel_0x1)&0xFF00));
|
|
*(LPWORD)(lumPlane+lumPos1) = WORD(((pixel_1x0>>8)&0xFF) | ((pixel_1x1)&0xFF00));
|
|
|
|
DWORD pixAvg = ((pixel_0x0 & 0xFF00FF) +
|
|
(pixel_0x1 & 0xFF00FF) +
|
|
(pixel_1x0 & 0xFF00FF) +
|
|
(pixel_1x1 & 0xFF00FF) ) >> 2;
|
|
|
|
uPlane[chrPos] = BYTE(pixAvg);
|
|
vPlane[chrPos] = BYTE(pixAvg>>16);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|