win-capture: Modify 16bit to 32bit color conversion to use SSE

master
Bl00drav3n 2015-06-04 19:20:38 +02:00
parent e7eaa268e5
commit ba4ac47ee3
1 changed files with 128 additions and 30 deletions

View File

@ -3,6 +3,7 @@
#include <util/platform.h>
#include <windows.h>
#include <dxgi.h>
#include <emmintrin.h>
#include <ipc-util/pipe.h>
#include "obfuscate.h"
#include "graphics-hook-info.h"
@ -865,24 +866,67 @@ static void copy_b5g6r5_tex(struct game_capture *gc, int cur_texture,
uint32_t gc_pitch = gc->pitch;
for (uint32_t y = 0; y < gc_cy; y++) {
register uint8_t *in = input + (gc_pitch * y);
register uint8_t *end = in + (gc_cx * PIXEL_16BIT_SIZE);
register uint8_t *out = data + (pitch * y);
uint8_t *row = input + (gc_pitch * y);
uint8_t *out = data + (pitch * y);
while (in < end) {
register uint16_t in_pix = *(uint16_t*)in;
register uint32_t out_pix = 0xFF000000;
for (uint32_t x = 0; x < gc_cx; x += 8) {
__m128i pixels_blue, pixels_green, pixels_red;
__m128i pixels_result;
__m128i *pixels_dest;
out_pix |= convert_5_to_8bit(in_pix);
in_pix >>= 5;
out_pix |= convert_6_to_8bit(in_pix) << 8;
in_pix >>= 6;
out_pix |= convert_5_to_8bit(in_pix) << 16;
__m128i *pixels_src = (__m128i*)(row + x * sizeof(uint16_t));
__m128i pixels = _mm_load_si128(pixels_src);
*(uint32_t*)out = out_pix;
__m128i zero = _mm_setzero_si128();
__m128i pixels_low = _mm_unpacklo_epi16(pixels, zero);
__m128i pixels_high = _mm_unpackhi_epi16(pixels, zero);
in += PIXEL_16BIT_SIZE;
out += PIXEL_32BIT_SIZE;
__m128i blue_channel_mask = _mm_set1_epi32(0x0000001F);
__m128i blue_offset = _mm_set1_epi32(0x00000003);
__m128i green_channel_mask = _mm_set1_epi32(0x000007E0);
__m128i green_offset = _mm_set1_epi32(0x00000008);
__m128i red_channel_mask = _mm_set1_epi32(0x0000F800);
__m128i red_offset = _mm_set1_epi32(0x00000300);
pixels_blue = _mm_and_si128(pixels_low, blue_channel_mask);
pixels_blue = _mm_slli_epi32(pixels_blue, 3);
pixels_blue = _mm_add_epi32(pixels_blue, blue_offset);
pixels_green = _mm_and_si128(pixels_low, green_channel_mask);
pixels_green = _mm_add_epi32(pixels_green, green_offset);
pixels_green = _mm_slli_epi32(pixels_green, 5);
pixels_red = _mm_and_si128(pixels_low, red_channel_mask);
pixels_red = _mm_add_epi32(pixels_red, red_offset);
pixels_red = _mm_slli_epi32(pixels_red, 8);
pixels_result = _mm_set1_epi32(0xFF000000);
pixels_result = _mm_or_si128(pixels_result, pixels_blue);
pixels_result = _mm_or_si128(pixels_result, pixels_green);
pixels_result = _mm_or_si128(pixels_result, pixels_red);
pixels_dest = (__m128i*)(out + x * sizeof(uint32_t));
_mm_store_si128(pixels_dest, pixels_result);
pixels_blue = _mm_and_si128(pixels_high, blue_channel_mask);
pixels_blue = _mm_slli_epi32(pixels_blue, 3);
pixels_blue = _mm_add_epi32(pixels_blue, blue_offset);
pixels_green = _mm_and_si128(pixels_high, green_channel_mask);
pixels_green = _mm_add_epi32(pixels_green, green_offset);
pixels_green = _mm_slli_epi32(pixels_green, 5);
pixels_red = _mm_and_si128(pixels_high, red_channel_mask);
pixels_red = _mm_add_epi32(pixels_red, red_offset);
pixels_red = _mm_slli_epi32(pixels_red, 8);
pixels_result = _mm_set1_epi32(0xFF000000);
pixels_result = _mm_or_si128(pixels_result, pixels_blue);
pixels_result = _mm_or_si128(pixels_result, pixels_green);
pixels_result = _mm_or_si128(pixels_result, pixels_red);
pixels_dest = (__m128i*)(out + (x + 4) * sizeof(uint32_t));
_mm_store_si128(pixels_dest, pixels_result);
}
}
}
@ -896,26 +940,80 @@ static void copy_b5g5r5a1_tex(struct game_capture *gc, int cur_texture,
uint32_t gc_pitch = gc->pitch;
for (uint32_t y = 0; y < gc_cy; y++) {
register uint8_t *in = input + (gc_pitch * y);
register uint8_t *end = in + (gc_cx * PIXEL_16BIT_SIZE);
register uint8_t *out = data + (pitch * y);
uint8_t *row = input + (gc_pitch * y);
uint8_t *out = data + (pitch * y);
while (in < end) {
register uint16_t in_pix = *(uint16_t*)in;
register uint32_t out_pix = 0;
for (uint32_t x = 0; x < gc_cx; x += 8) {
__m128i pixels_blue, pixels_green, pixels_red, pixels_alpha;
__m128i pixels_result;
__m128i *pixels_dest;
out_pix |= convert_5_to_8bit(in_pix);
in_pix >>= 5;
out_pix |= convert_5_to_8bit(in_pix) << 8;
in_pix >>= 5;
out_pix |= convert_5_to_8bit(in_pix) << 16;
in_pix >>= 5;
out_pix |= (in_pix * 255) << 24;
__m128i *pixels_src = (__m128i*)(row + x * sizeof(uint16_t));
__m128i pixels = _mm_load_si128(pixels_src);
*(uint32_t*)out = out_pix;
__m128i zero = _mm_setzero_si128();
__m128i pixels_low = _mm_unpacklo_epi16(pixels, zero);
__m128i pixels_high = _mm_unpackhi_epi16(pixels, zero);
in += PIXEL_16BIT_SIZE;
out += PIXEL_32BIT_SIZE;
__m128i blue_channel_mask = _mm_set1_epi32(0x0000001F);
__m128i blue_offset = _mm_set1_epi32(0x00000003);
__m128i green_channel_mask = _mm_set1_epi32(0x000003E0);
__m128i green_offset = _mm_set1_epi32(0x000000C);
__m128i red_channel_mask = _mm_set1_epi32(0x00007C00);
__m128i red_offset = _mm_set1_epi32(0x00000180);
__m128i alpha_channel_mask = _mm_set1_epi32(0x00008000);
__m128i alpha_offset = _mm_set1_epi32(0x00000001);
__m128i alpha_mask32 = _mm_set1_epi32(0xFF000000);
pixels_blue = _mm_and_si128(pixels_low, blue_channel_mask);
pixels_blue = _mm_slli_epi32(pixels_blue, 3);
pixels_blue = _mm_add_epi32(pixels_blue, blue_offset);
pixels_green = _mm_and_si128(pixels_low, green_channel_mask);
pixels_green = _mm_add_epi32(pixels_green, green_offset);
pixels_green = _mm_slli_epi32(pixels_green, 6);
pixels_red = _mm_and_si128(pixels_low, red_channel_mask);
pixels_red = _mm_add_epi32(pixels_red, red_offset);
pixels_red = _mm_slli_epi32(pixels_red, 9);
pixels_alpha = _mm_and_si128(pixels_low, alpha_channel_mask);
pixels_alpha = _mm_srli_epi32(pixels_alpha, 15);
pixels_alpha = _mm_sub_epi32(pixels_alpha, alpha_offset);
pixels_alpha = _mm_andnot_si128(pixels_alpha, alpha_mask32);
pixels_result = pixels_red;
pixels_result = _mm_or_si128(pixels_result, pixels_alpha);
pixels_result = _mm_or_si128(pixels_result, pixels_blue);
pixels_result = _mm_or_si128(pixels_result, pixels_green);
pixels_dest = (__m128i*)(out + x * sizeof(uint32_t));
_mm_store_si128(pixels_dest, pixels_result);
pixels_blue = _mm_and_si128(pixels_high, blue_channel_mask);
pixels_blue = _mm_slli_epi32(pixels_blue, 3);
pixels_blue = _mm_add_epi32(pixels_blue, blue_offset);
pixels_green = _mm_and_si128(pixels_high, green_channel_mask);
pixels_green = _mm_add_epi32(pixels_green, green_offset);
pixels_green = _mm_slli_epi32(pixels_green, 6);
pixels_red = _mm_and_si128(pixels_high, red_channel_mask);
pixels_red = _mm_add_epi32(pixels_red, red_offset);
pixels_red = _mm_slli_epi32(pixels_red, 9);
pixels_alpha = _mm_and_si128(pixels_high, alpha_channel_mask);
pixels_alpha = _mm_srli_epi32(pixels_alpha, 15);
pixels_alpha = _mm_sub_epi32(pixels_alpha, alpha_offset);
pixels_alpha = _mm_andnot_si128(pixels_alpha, alpha_mask32);
pixels_result = pixels_red;
pixels_result = _mm_or_si128(pixels_result, pixels_alpha);
pixels_result = _mm_or_si128(pixels_result, pixels_blue);
pixels_result = _mm_or_si128(pixels_result, pixels_green);
pixels_dest = (__m128i*)(out + (x + 4) * sizeof(uint32_t));
_mm_store_si128(pixels_dest, pixels_result);
}
}
}