win-capture: Modify 16bit to 32bit color conversion to use SSE
parent
e7eaa268e5
commit
ba4ac47ee3
|
@ -3,6 +3,7 @@
|
|||
#include <util/platform.h>
|
||||
#include <windows.h>
|
||||
#include <dxgi.h>
|
||||
#include <emmintrin.h>
|
||||
#include <ipc-util/pipe.h>
|
||||
#include "obfuscate.h"
|
||||
#include "graphics-hook-info.h"
|
||||
|
@ -865,24 +866,67 @@ static void copy_b5g6r5_tex(struct game_capture *gc, int cur_texture,
|
|||
uint32_t gc_pitch = gc->pitch;
|
||||
|
||||
for (uint32_t y = 0; y < gc_cy; y++) {
|
||||
register uint8_t *in = input + (gc_pitch * y);
|
||||
register uint8_t *end = in + (gc_cx * PIXEL_16BIT_SIZE);
|
||||
register uint8_t *out = data + (pitch * y);
|
||||
uint8_t *row = input + (gc_pitch * y);
|
||||
uint8_t *out = data + (pitch * y);
|
||||
|
||||
while (in < end) {
|
||||
register uint16_t in_pix = *(uint16_t*)in;
|
||||
register uint32_t out_pix = 0xFF000000;
|
||||
for (uint32_t x = 0; x < gc_cx; x += 8) {
|
||||
__m128i pixels_blue, pixels_green, pixels_red;
|
||||
__m128i pixels_result;
|
||||
__m128i *pixels_dest;
|
||||
|
||||
out_pix |= convert_5_to_8bit(in_pix);
|
||||
in_pix >>= 5;
|
||||
out_pix |= convert_6_to_8bit(in_pix) << 8;
|
||||
in_pix >>= 6;
|
||||
out_pix |= convert_5_to_8bit(in_pix) << 16;
|
||||
__m128i *pixels_src = (__m128i*)(row + x * sizeof(uint16_t));
|
||||
__m128i pixels = _mm_load_si128(pixels_src);
|
||||
|
||||
*(uint32_t*)out = out_pix;
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
__m128i pixels_low = _mm_unpacklo_epi16(pixels, zero);
|
||||
__m128i pixels_high = _mm_unpackhi_epi16(pixels, zero);
|
||||
|
||||
in += PIXEL_16BIT_SIZE;
|
||||
out += PIXEL_32BIT_SIZE;
|
||||
__m128i blue_channel_mask = _mm_set1_epi32(0x0000001F);
|
||||
__m128i blue_offset = _mm_set1_epi32(0x00000003);
|
||||
__m128i green_channel_mask = _mm_set1_epi32(0x000007E0);
|
||||
__m128i green_offset = _mm_set1_epi32(0x00000008);
|
||||
__m128i red_channel_mask = _mm_set1_epi32(0x0000F800);
|
||||
__m128i red_offset = _mm_set1_epi32(0x00000300);
|
||||
|
||||
pixels_blue = _mm_and_si128(pixels_low, blue_channel_mask);
|
||||
pixels_blue = _mm_slli_epi32(pixels_blue, 3);
|
||||
pixels_blue = _mm_add_epi32(pixels_blue, blue_offset);
|
||||
|
||||
pixels_green = _mm_and_si128(pixels_low, green_channel_mask);
|
||||
pixels_green = _mm_add_epi32(pixels_green, green_offset);
|
||||
pixels_green = _mm_slli_epi32(pixels_green, 5);
|
||||
|
||||
pixels_red = _mm_and_si128(pixels_low, red_channel_mask);
|
||||
pixels_red = _mm_add_epi32(pixels_red, red_offset);
|
||||
pixels_red = _mm_slli_epi32(pixels_red, 8);
|
||||
|
||||
pixels_result = _mm_set1_epi32(0xFF000000);
|
||||
pixels_result = _mm_or_si128(pixels_result, pixels_blue);
|
||||
pixels_result = _mm_or_si128(pixels_result, pixels_green);
|
||||
pixels_result = _mm_or_si128(pixels_result, pixels_red);
|
||||
|
||||
pixels_dest = (__m128i*)(out + x * sizeof(uint32_t));
|
||||
_mm_store_si128(pixels_dest, pixels_result);
|
||||
|
||||
pixels_blue = _mm_and_si128(pixels_high, blue_channel_mask);
|
||||
pixels_blue = _mm_slli_epi32(pixels_blue, 3);
|
||||
pixels_blue = _mm_add_epi32(pixels_blue, blue_offset);
|
||||
|
||||
pixels_green = _mm_and_si128(pixels_high, green_channel_mask);
|
||||
pixels_green = _mm_add_epi32(pixels_green, green_offset);
|
||||
pixels_green = _mm_slli_epi32(pixels_green, 5);
|
||||
|
||||
pixels_red = _mm_and_si128(pixels_high, red_channel_mask);
|
||||
pixels_red = _mm_add_epi32(pixels_red, red_offset);
|
||||
pixels_red = _mm_slli_epi32(pixels_red, 8);
|
||||
|
||||
pixels_result = _mm_set1_epi32(0xFF000000);
|
||||
pixels_result = _mm_or_si128(pixels_result, pixels_blue);
|
||||
pixels_result = _mm_or_si128(pixels_result, pixels_green);
|
||||
pixels_result = _mm_or_si128(pixels_result, pixels_red);
|
||||
|
||||
pixels_dest = (__m128i*)(out + (x + 4) * sizeof(uint32_t));
|
||||
_mm_store_si128(pixels_dest, pixels_result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -896,26 +940,80 @@ static void copy_b5g5r5a1_tex(struct game_capture *gc, int cur_texture,
|
|||
uint32_t gc_pitch = gc->pitch;
|
||||
|
||||
for (uint32_t y = 0; y < gc_cy; y++) {
|
||||
register uint8_t *in = input + (gc_pitch * y);
|
||||
register uint8_t *end = in + (gc_cx * PIXEL_16BIT_SIZE);
|
||||
register uint8_t *out = data + (pitch * y);
|
||||
uint8_t *row = input + (gc_pitch * y);
|
||||
uint8_t *out = data + (pitch * y);
|
||||
|
||||
while (in < end) {
|
||||
register uint16_t in_pix = *(uint16_t*)in;
|
||||
register uint32_t out_pix = 0;
|
||||
for (uint32_t x = 0; x < gc_cx; x += 8) {
|
||||
__m128i pixels_blue, pixels_green, pixels_red, pixels_alpha;
|
||||
__m128i pixels_result;
|
||||
__m128i *pixels_dest;
|
||||
|
||||
out_pix |= convert_5_to_8bit(in_pix);
|
||||
in_pix >>= 5;
|
||||
out_pix |= convert_5_to_8bit(in_pix) << 8;
|
||||
in_pix >>= 5;
|
||||
out_pix |= convert_5_to_8bit(in_pix) << 16;
|
||||
in_pix >>= 5;
|
||||
out_pix |= (in_pix * 255) << 24;
|
||||
__m128i *pixels_src = (__m128i*)(row + x * sizeof(uint16_t));
|
||||
__m128i pixels = _mm_load_si128(pixels_src);
|
||||
|
||||
*(uint32_t*)out = out_pix;
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
__m128i pixels_low = _mm_unpacklo_epi16(pixels, zero);
|
||||
__m128i pixels_high = _mm_unpackhi_epi16(pixels, zero);
|
||||
|
||||
in += PIXEL_16BIT_SIZE;
|
||||
out += PIXEL_32BIT_SIZE;
|
||||
__m128i blue_channel_mask = _mm_set1_epi32(0x0000001F);
|
||||
__m128i blue_offset = _mm_set1_epi32(0x00000003);
|
||||
__m128i green_channel_mask = _mm_set1_epi32(0x000003E0);
|
||||
__m128i green_offset = _mm_set1_epi32(0x000000C);
|
||||
__m128i red_channel_mask = _mm_set1_epi32(0x00007C00);
|
||||
__m128i red_offset = _mm_set1_epi32(0x00000180);
|
||||
__m128i alpha_channel_mask = _mm_set1_epi32(0x00008000);
|
||||
__m128i alpha_offset = _mm_set1_epi32(0x00000001);
|
||||
__m128i alpha_mask32 = _mm_set1_epi32(0xFF000000);
|
||||
|
||||
pixels_blue = _mm_and_si128(pixels_low, blue_channel_mask);
|
||||
pixels_blue = _mm_slli_epi32(pixels_blue, 3);
|
||||
pixels_blue = _mm_add_epi32(pixels_blue, blue_offset);
|
||||
|
||||
pixels_green = _mm_and_si128(pixels_low, green_channel_mask);
|
||||
pixels_green = _mm_add_epi32(pixels_green, green_offset);
|
||||
pixels_green = _mm_slli_epi32(pixels_green, 6);
|
||||
|
||||
pixels_red = _mm_and_si128(pixels_low, red_channel_mask);
|
||||
pixels_red = _mm_add_epi32(pixels_red, red_offset);
|
||||
pixels_red = _mm_slli_epi32(pixels_red, 9);
|
||||
|
||||
pixels_alpha = _mm_and_si128(pixels_low, alpha_channel_mask);
|
||||
pixels_alpha = _mm_srli_epi32(pixels_alpha, 15);
|
||||
pixels_alpha = _mm_sub_epi32(pixels_alpha, alpha_offset);
|
||||
pixels_alpha = _mm_andnot_si128(pixels_alpha, alpha_mask32);
|
||||
|
||||
pixels_result = pixels_red;
|
||||
pixels_result = _mm_or_si128(pixels_result, pixels_alpha);
|
||||
pixels_result = _mm_or_si128(pixels_result, pixels_blue);
|
||||
pixels_result = _mm_or_si128(pixels_result, pixels_green);
|
||||
|
||||
pixels_dest = (__m128i*)(out + x * sizeof(uint32_t));
|
||||
_mm_store_si128(pixels_dest, pixels_result);
|
||||
|
||||
pixels_blue = _mm_and_si128(pixels_high, blue_channel_mask);
|
||||
pixels_blue = _mm_slli_epi32(pixels_blue, 3);
|
||||
pixels_blue = _mm_add_epi32(pixels_blue, blue_offset);
|
||||
|
||||
pixels_green = _mm_and_si128(pixels_high, green_channel_mask);
|
||||
pixels_green = _mm_add_epi32(pixels_green, green_offset);
|
||||
pixels_green = _mm_slli_epi32(pixels_green, 6);
|
||||
|
||||
pixels_red = _mm_and_si128(pixels_high, red_channel_mask);
|
||||
pixels_red = _mm_add_epi32(pixels_red, red_offset);
|
||||
pixels_red = _mm_slli_epi32(pixels_red, 9);
|
||||
|
||||
pixels_alpha = _mm_and_si128(pixels_high, alpha_channel_mask);
|
||||
pixels_alpha = _mm_srli_epi32(pixels_alpha, 15);
|
||||
pixels_alpha = _mm_sub_epi32(pixels_alpha, alpha_offset);
|
||||
pixels_alpha = _mm_andnot_si128(pixels_alpha, alpha_mask32);
|
||||
|
||||
pixels_result = pixels_red;
|
||||
pixels_result = _mm_or_si128(pixels_result, pixels_alpha);
|
||||
pixels_result = _mm_or_si128(pixels_result, pixels_blue);
|
||||
pixels_result = _mm_or_si128(pixels_result, pixels_green);
|
||||
|
||||
pixels_dest = (__m128i*)(out + (x + 4) * sizeof(uint32_t));
|
||||
_mm_store_si128(pixels_dest, pixels_result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue