libobs: Rework RGB to YUV conversion
RGB to YUV converison was previously baked into every scale shader, but this work has been moved to the YUV packing shaders. The scale shaders now write RGBA instead. In the case where base and output resolutions are identical, the render texture is forwarded directly to the YUV pack step, skipping an entire fullscreen pass. Intel GPA, SetStablePowerState, Intel HD Graphics 530, NV12 1920x1080, Before: RGBA -> UYVX: ~321 us UYVX -> Y: ~480 us UYVX -> UV: ~127 us 1920x1080, After: [forward render texture] RGBA -> Y: ~487 us RGBA -> UV: ~131 us 1920x1080 -> 1280x720, Before: RGBA -> UYVX: ~268 us UYVX -> Y: ~209 us UYVX -> UV: ~57 us 1920x1080 -> 1280x720, After: RGBA -> RGBA (rescale): ~268 us RGBA -> Y: ~210 us RGBA -> UV: ~58 us
This commit is contained in:
parent
e5b004fd48
commit
2656bf0a90
@ -6,7 +6,6 @@
|
||||
|
||||
uniform float4x4 ViewProj;
|
||||
uniform texture2d image;
|
||||
uniform float4x4 color_matrix;
|
||||
uniform float2 base_dimension_i;
|
||||
uniform float undistort_factor = 1.0;
|
||||
|
||||
@ -138,13 +137,6 @@ float4 PSDrawBicubicRGBADivide(VertData v_in) : TARGET
|
||||
return float4(rgba.rgb * multiplier, alpha);
|
||||
}
|
||||
|
||||
float4 PSDrawBicubicMatrix(VertData v_in) : TARGET
|
||||
{
|
||||
float3 rgb = DrawBicubic(v_in, false).rgb;
|
||||
float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
|
||||
return float4(yuv, 1.0);
|
||||
}
|
||||
|
||||
technique Draw
|
||||
{
|
||||
pass
|
||||
@ -171,12 +163,3 @@ technique DrawUndistort
|
||||
pixel_shader = PSDrawBicubicRGBA(v_in, true);
|
||||
}
|
||||
}
|
||||
|
||||
technique DrawMatrix
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(v_in);
|
||||
pixel_shader = PSDrawBicubicMatrix(v_in);
|
||||
}
|
||||
}
|
||||
|
@ -5,7 +5,6 @@
|
||||
|
||||
uniform float4x4 ViewProj;
|
||||
uniform texture2d image;
|
||||
uniform float4x4 color_matrix;
|
||||
|
||||
sampler_state textureSampler {
|
||||
Filter = Linear;
|
||||
@ -66,13 +65,6 @@ float4 PSDrawLowresBilinearRGBADivide(VertData v_in) : TARGET
|
||||
return float4(rgba.rgb * multiplier, alpha);
|
||||
}
|
||||
|
||||
float4 PSDrawLowresBilinearMatrix(VertData v_in) : TARGET
|
||||
{
|
||||
float3 rgb = DrawLowresBilinear(v_in).rgb;
|
||||
float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
|
||||
return float4(yuv, 1.0);
|
||||
}
|
||||
|
||||
technique Draw
|
||||
{
|
||||
pass
|
||||
@ -91,12 +83,3 @@ technique DrawAlphaDivide
|
||||
}
|
||||
}
|
||||
|
||||
technique DrawMatrix
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(v_in);
|
||||
pixel_shader = PSDrawLowresBilinearMatrix(v_in);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
uniform float4x4 ViewProj;
|
||||
uniform float4x4 color_matrix;
|
||||
uniform texture2d image;
|
||||
|
||||
sampler_state def_sampler {
|
||||
@ -34,13 +33,6 @@ float4 PSDrawAlphaDivide(VertInOut vert_in) : TARGET
|
||||
return float4(rgba.rgb * multiplier, alpha);
|
||||
}
|
||||
|
||||
float4 PSDrawMatrix(VertInOut vert_in) : TARGET
|
||||
{
|
||||
float3 rgb = image.Sample(def_sampler, vert_in.uv).rgb;
|
||||
float3 yuv = mul(float4(rgb, 1.0), color_matrix).xyz;
|
||||
return float4(yuv, 1.0);
|
||||
}
|
||||
|
||||
technique Draw
|
||||
{
|
||||
pass
|
||||
@ -58,12 +50,3 @@ technique DrawAlphaDivide
|
||||
pixel_shader = PSDrawAlphaDivide(vert_in);
|
||||
}
|
||||
}
|
||||
|
||||
technique DrawMatrix
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(vert_in);
|
||||
pixel_shader = PSDrawMatrix(vert_in);
|
||||
}
|
||||
}
|
||||
|
@ -44,6 +44,10 @@ uniform float4x4 color_matrix;
|
||||
uniform float3 color_range_min = {0.0, 0.0, 0.0};
|
||||
uniform float3 color_range_max = {1.0, 1.0, 1.0};
|
||||
|
||||
uniform float4 color_vec_y;
|
||||
uniform float4 color_vec_u;
|
||||
uniform float4 color_vec_v;
|
||||
|
||||
uniform texture2d image;
|
||||
|
||||
sampler_state def_sampler {
|
||||
@ -52,12 +56,33 @@ sampler_state def_sampler {
|
||||
AddressV = Clamp;
|
||||
};
|
||||
|
||||
struct VertInOut {
|
||||
struct FragPos {
|
||||
float4 pos : POSITION;
|
||||
};
|
||||
|
||||
struct VertTexPos {
|
||||
float2 uv : TEXCOORD0;
|
||||
float4 pos : POSITION;
|
||||
};
|
||||
|
||||
struct FragTex {
|
||||
float2 uv : TEXCOORD0;
|
||||
};
|
||||
|
||||
VertInOut VSDefault(uint id : VERTEXID)
|
||||
FragPos VSPos(uint id : VERTEXID)
|
||||
{
|
||||
float idHigh = float(id >> 1);
|
||||
float idLow = float(id & uint(1));
|
||||
|
||||
float x = idHigh * 4.0 - 1.0;
|
||||
float y = idLow * 4.0 - 1.0;
|
||||
|
||||
FragPos vert_out;
|
||||
vert_out.pos = float4(x, y, 0.0, 1.0);
|
||||
return vert_out;
|
||||
}
|
||||
|
||||
VertTexPos VSPosTex(uint id : VERTEXID)
|
||||
{
|
||||
float idHigh = float(id >> 1);
|
||||
float idLow = float(id & uint(1));
|
||||
@ -68,24 +93,22 @@ VertInOut VSDefault(uint id : VERTEXID)
|
||||
float u = idHigh * 2.0;
|
||||
float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
|
||||
|
||||
VertInOut vert_out;
|
||||
vert_out.pos = float4(x, y, 0.0, 1.0);
|
||||
VertTexPos vert_out;
|
||||
vert_out.uv = float2(u, v);
|
||||
vert_out.pos = float4(x, y, 0.0, 1.0);
|
||||
return vert_out;
|
||||
}
|
||||
|
||||
/* used to prevent internal GPU precision issues width fmod in particular */
|
||||
#define PRECISION_OFFSET 0.2
|
||||
|
||||
float4 PSNV12(VertInOut vert_in) : TARGET
|
||||
float4 PSNV12(FragTex frag_in) : TARGET
|
||||
{
|
||||
float v_mul = floor(vert_in.uv.y * input_height);
|
||||
float v_mul = floor(frag_in.uv.y * input_height);
|
||||
|
||||
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
|
||||
float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
|
||||
byte_offset += PRECISION_OFFSET;
|
||||
|
||||
float2 sample_pos[4];
|
||||
|
||||
if (byte_offset < u_plane_offset) {
|
||||
#ifdef DEBUGGING
|
||||
return float4(1.0, 1.0, 1.0, 1.0);
|
||||
@ -98,19 +121,24 @@ float4 PSNV12(VertInOut vert_in) : TARGET
|
||||
lum_u += width_i * 0.5;
|
||||
lum_v += height_i * 0.5;
|
||||
|
||||
sample_pos[0] = float2(lum_u, lum_v);
|
||||
sample_pos[1] = float2(lum_u += width_i, lum_v);
|
||||
sample_pos[2] = float2(lum_u += width_i, lum_v);
|
||||
sample_pos[3] = float2(lum_u + width_i, lum_v);
|
||||
float2 sample_pos0 = float2(lum_u, lum_v);
|
||||
float2 sample_pos1 = float2(lum_u += width_i, lum_v);
|
||||
float2 sample_pos2 = float2(lum_u += width_i, lum_v);
|
||||
float2 sample_pos3 = float2(lum_u + width_i, lum_v);
|
||||
|
||||
float4x4 out_val = float4x4(
|
||||
image.Sample(def_sampler, sample_pos[0]),
|
||||
image.Sample(def_sampler, sample_pos[1]),
|
||||
image.Sample(def_sampler, sample_pos[2]),
|
||||
image.Sample(def_sampler, sample_pos[3])
|
||||
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
|
||||
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
|
||||
float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
|
||||
float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
|
||||
|
||||
float4 out_val = float4(
|
||||
dot(color_vec_y.xyz, rgb0) + color_vec_y.w,
|
||||
dot(color_vec_y.xyz, rgb1) + color_vec_y.w,
|
||||
dot(color_vec_y.xyz, rgb2) + color_vec_y.w,
|
||||
dot(color_vec_y.xyz, rgb3) + color_vec_y.w
|
||||
);
|
||||
|
||||
return transpose(out_val)[1];
|
||||
return out_val;
|
||||
} else {
|
||||
#ifdef DEBUGGING
|
||||
return float4(0.5, 0.2, 0.5, 0.2);
|
||||
@ -127,34 +155,44 @@ float4 PSNV12(VertInOut vert_in) : TARGET
|
||||
ch_u += width_i;
|
||||
ch_v += height_i;
|
||||
|
||||
sample_pos[0] = float2(ch_u, ch_v);
|
||||
sample_pos[1] = float2(ch_u + width_i2, ch_v);
|
||||
|
||||
float2 sample_pos0 = float2(ch_u, ch_v);
|
||||
float2 sample_pos1 = float2(ch_u + width_i2, ch_v);
|
||||
|
||||
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
|
||||
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
|
||||
|
||||
return float4(
|
||||
image.Sample(def_sampler, sample_pos[0]).rb,
|
||||
image.Sample(def_sampler, sample_pos[1]).rb
|
||||
);
|
||||
dot(color_vec_u.xyz, rgb0) + color_vec_u.w,
|
||||
dot(color_vec_v.xyz, rgb0) + color_vec_v.w,
|
||||
dot(color_vec_u.xyz, rgb1) + color_vec_u.w,
|
||||
dot(color_vec_v.xyz, rgb1) + color_vec_v.w
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
float PSNV12_Y(VertInOut vert_in) : TARGET
|
||||
float PSNV12_Y(FragPos frag_in) : TARGET
|
||||
{
|
||||
return image.Sample(def_sampler, vert_in.uv.xy).y;
|
||||
float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb;
|
||||
float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w;
|
||||
return y;
|
||||
}
|
||||
|
||||
float2 PSNV12_UV(VertInOut vert_in) : TARGET
|
||||
float2 PSNV12_UV(FragTex frag_in) : TARGET
|
||||
{
|
||||
return image.Sample(def_sampler, vert_in.uv.xy).xz;
|
||||
float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb;
|
||||
float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
|
||||
float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
|
||||
return float2(u, v);
|
||||
}
|
||||
|
||||
float4 PSPlanar420(VertInOut vert_in) : TARGET
|
||||
float4 PSPlanar420(FragTex frag_in) : TARGET
|
||||
{
|
||||
float v_mul = floor(vert_in.uv.y * input_height);
|
||||
float v_mul = floor(frag_in.uv.y * input_height);
|
||||
|
||||
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
|
||||
float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
|
||||
byte_offset += PRECISION_OFFSET;
|
||||
|
||||
float2 sample_pos[4];
|
||||
float2 sample_pos0, sample_pos1, sample_pos2, sample_pos3;
|
||||
|
||||
if (byte_offset < u_plane_offset) {
|
||||
#ifdef DEBUGGING
|
||||
@ -168,10 +206,10 @@ float4 PSPlanar420(VertInOut vert_in) : TARGET
|
||||
lum_u += width_i * 0.5;
|
||||
lum_v += height_i * 0.5;
|
||||
|
||||
sample_pos[0] = float2(lum_u, lum_v);
|
||||
sample_pos[1] = float2(lum_u += width_i, lum_v);
|
||||
sample_pos[2] = float2(lum_u += width_i, lum_v);
|
||||
sample_pos[3] = float2(lum_u + width_i, lum_v);
|
||||
sample_pos0 = float2(lum_u, lum_v);
|
||||
sample_pos1 = float2(lum_u += width_i, lum_v);
|
||||
sample_pos2 = float2(lum_u += width_i, lum_v);
|
||||
sample_pos3 = float2(lum_u + width_i, lum_v);
|
||||
|
||||
} else {
|
||||
#ifdef DEBUGGING
|
||||
@ -200,42 +238,46 @@ float4 PSPlanar420(VertInOut vert_in) : TARGET
|
||||
float ch_u_n = 0. + width_i;
|
||||
float ch_v_n = ch_v + height_i * 3;
|
||||
|
||||
sample_pos[0] = float2(ch_u, ch_v);
|
||||
sample_pos[1] = float2(ch_u += width_i2, ch_v);
|
||||
sample_pos0 = float2(ch_u, ch_v);
|
||||
sample_pos1 = float2(ch_u += width_i2, ch_v);
|
||||
|
||||
ch_u += width_i2;
|
||||
// check if ch_u overflowed the current source and chroma line
|
||||
if (ch_u > 1.0) {
|
||||
sample_pos[2] = float2(ch_u_n, ch_v_n);
|
||||
sample_pos[2] = float2(ch_u_n + width_i2, ch_v_n);
|
||||
sample_pos2 = float2(ch_u_n, ch_v_n);
|
||||
sample_pos2 = float2(ch_u_n + width_i2, ch_v_n);
|
||||
} else {
|
||||
sample_pos[2] = float2(ch_u, ch_v);
|
||||
sample_pos[3] = float2(ch_u + width_i2, ch_v);
|
||||
sample_pos2 = float2(ch_u, ch_v);
|
||||
sample_pos3 = float2(ch_u + width_i2, ch_v);
|
||||
}
|
||||
}
|
||||
|
||||
float4x4 out_val = float4x4(
|
||||
image.Sample(def_sampler, sample_pos[0]),
|
||||
image.Sample(def_sampler, sample_pos[1]),
|
||||
image.Sample(def_sampler, sample_pos[2]),
|
||||
image.Sample(def_sampler, sample_pos[3])
|
||||
);
|
||||
|
||||
out_val = transpose(out_val);
|
||||
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
|
||||
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
|
||||
float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
|
||||
float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
|
||||
|
||||
float4 color_vec;
|
||||
if (byte_offset < u_plane_offset)
|
||||
return out_val[1];
|
||||
color_vec = color_vec_y;
|
||||
else if (byte_offset < v_plane_offset)
|
||||
return out_val[0];
|
||||
color_vec = color_vec_u;
|
||||
else
|
||||
return out_val[2];
|
||||
color_vec = color_vec_v;
|
||||
|
||||
return float4(
|
||||
dot(color_vec.xyz, rgb0) + color_vec.w,
|
||||
dot(color_vec.xyz, rgb1) + color_vec.w,
|
||||
dot(color_vec.xyz, rgb2) + color_vec.w,
|
||||
dot(color_vec.xyz, rgb3) + color_vec.w
|
||||
);
|
||||
}
|
||||
|
||||
float4 PSPlanar444(VertInOut vert_in) : TARGET
|
||||
float4 PSPlanar444(FragTex frag_in) : TARGET
|
||||
{
|
||||
float v_mul = floor(vert_in.uv.y * input_height);
|
||||
float v_mul = floor(frag_in.uv.y * input_height);
|
||||
|
||||
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
|
||||
float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
|
||||
byte_offset += PRECISION_OFFSET;
|
||||
|
||||
float new_byte_offset = byte_offset;
|
||||
@ -245,8 +287,6 @@ float4 PSPlanar444(VertInOut vert_in) : TARGET
|
||||
else if (byte_offset >= u_plane_offset)
|
||||
new_byte_offset -= u_plane_offset;
|
||||
|
||||
float2 sample_pos[4];
|
||||
|
||||
float u_val = floor(fmod(new_byte_offset, width)) * width_i;
|
||||
float v_val = floor(new_byte_offset * width_i) * height_i;
|
||||
|
||||
@ -254,26 +294,30 @@ float4 PSPlanar444(VertInOut vert_in) : TARGET
|
||||
u_val += width_i * 0.5;
|
||||
v_val += height_i * 0.5;
|
||||
|
||||
sample_pos[0] = float2(u_val, v_val);
|
||||
sample_pos[1] = float2(u_val += width_i, v_val);
|
||||
sample_pos[2] = float2(u_val += width_i, v_val);
|
||||
sample_pos[3] = float2(u_val + width_i, v_val);
|
||||
float2 sample_pos0 = float2(u_val, v_val);
|
||||
float2 sample_pos1 = float2(u_val += width_i, v_val);
|
||||
float2 sample_pos2 = float2(u_val += width_i, v_val);
|
||||
float2 sample_pos3 = float2(u_val + width_i, v_val);
|
||||
|
||||
float4x4 out_val = float4x4(
|
||||
image.Sample(def_sampler, sample_pos[0]),
|
||||
image.Sample(def_sampler, sample_pos[1]),
|
||||
image.Sample(def_sampler, sample_pos[2]),
|
||||
image.Sample(def_sampler, sample_pos[3])
|
||||
);
|
||||
|
||||
out_val = transpose(out_val);
|
||||
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
|
||||
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
|
||||
float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
|
||||
float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
|
||||
|
||||
float4 color_vec;
|
||||
if (byte_offset < u_plane_offset)
|
||||
return out_val[1];
|
||||
color_vec = color_vec_y;
|
||||
else if (byte_offset < v_plane_offset)
|
||||
return out_val[0];
|
||||
color_vec = color_vec_u;
|
||||
else
|
||||
return out_val[2];
|
||||
color_vec = color_vec_v;
|
||||
|
||||
return float4(
|
||||
dot(color_vec.xyz, rgb0) + color_vec.w,
|
||||
dot(color_vec.xyz, rgb1) + color_vec.w,
|
||||
dot(color_vec.xyz, rgb2) + color_vec.w,
|
||||
dot(color_vec.xyz, rgb3) + color_vec.w
|
||||
);
|
||||
}
|
||||
|
||||
float GetIntOffsetColor(int offset)
|
||||
@ -283,12 +327,12 @@ float GetIntOffsetColor(int offset)
|
||||
0)).r;
|
||||
}
|
||||
|
||||
float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
|
||||
float4 PSPacked422_Reverse(FragTex frag_in, int u_pos, int v_pos,
|
||||
int y0_pos, int y1_pos) : TARGET
|
||||
{
|
||||
float y = vert_in.uv.y;
|
||||
float odd = floor(fmod(width * vert_in.uv.x + PRECISION_OFFSET, 2.0));
|
||||
float x = floor(width_d2 * vert_in.uv.x + PRECISION_OFFSET) *
|
||||
float y = frag_in.uv.y;
|
||||
float odd = floor(fmod(width * frag_in.uv.x + PRECISION_OFFSET, 2.0));
|
||||
float x = floor(width_d2 * frag_in.uv.x + PRECISION_OFFSET) *
|
||||
width_d2_i;
|
||||
|
||||
x += input_width_i_d2;
|
||||
@ -300,10 +344,10 @@ float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
|
||||
return saturate(mul(float4(yuv, 1.0), color_matrix));
|
||||
}
|
||||
|
||||
float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
|
||||
float4 PSPlanar420_Reverse(FragTex frag_in) : TARGET
|
||||
{
|
||||
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
|
||||
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
|
||||
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
|
||||
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
||||
|
||||
int lum_offset = y * int_width + x;
|
||||
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
|
||||
@ -319,10 +363,10 @@ float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
|
||||
return saturate(mul(float4(yuv, 1.0), color_matrix));
|
||||
}
|
||||
|
||||
float4 PSPlanar444_Reverse(VertInOut vert_in) : TARGET
|
||||
float4 PSPlanar444_Reverse(FragTex frag_in) : TARGET
|
||||
{
|
||||
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
|
||||
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
|
||||
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
|
||||
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
||||
|
||||
int lum_offset = y * int_width + x;
|
||||
int chroma_offset = y * int_width + x;
|
||||
@ -338,10 +382,10 @@ float4 PSPlanar444_Reverse(VertInOut vert_in) : TARGET
|
||||
return saturate(mul(float4(yuv, 1.0), color_matrix));
|
||||
}
|
||||
|
||||
float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
|
||||
float4 PSNV12_Reverse(FragTex frag_in) : TARGET
|
||||
{
|
||||
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
|
||||
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
|
||||
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
|
||||
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
||||
|
||||
int lum_offset = y * int_width + x;
|
||||
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
|
||||
@ -356,39 +400,39 @@ float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
|
||||
return saturate(mul(float4(yuv, 1.0), color_matrix));
|
||||
}
|
||||
|
||||
float4 PSY800_Limited(VertInOut vert_in) : TARGET
|
||||
float4 PSY800_Limited(FragTex frag_in) : TARGET
|
||||
{
|
||||
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
|
||||
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
|
||||
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
|
||||
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
||||
|
||||
float limited = image.Load(int3(x, y, 0)).x;
|
||||
float full = saturate((limited - (16.0 / 255.0)) * (255.0 / 219.0));
|
||||
return float4(full, full, full, 1.0);
|
||||
}
|
||||
|
||||
float4 PSY800_Full(VertInOut vert_in) : TARGET
|
||||
float4 PSY800_Full(FragTex frag_in) : TARGET
|
||||
{
|
||||
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
|
||||
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
|
||||
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
|
||||
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
||||
|
||||
float3 full = image.Load(int3(x, y, 0)).xxx;
|
||||
return float4(full, 1.0);
|
||||
}
|
||||
|
||||
float4 PSRGB_Limited(VertInOut vert_in) : TARGET
|
||||
float4 PSRGB_Limited(FragTex frag_in) : TARGET
|
||||
{
|
||||
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
|
||||
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
|
||||
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
|
||||
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
||||
|
||||
float4 rgba = image.Load(int3(x, y, 0));
|
||||
rgba.rgb = saturate((rgba.rgb - (16.0 / 255.0)) * (255.0 / 219.0));
|
||||
return rgba;
|
||||
}
|
||||
|
||||
float4 PSBGR3_Limited(VertInOut vert_in) : TARGET
|
||||
float4 PSBGR3_Limited(FragTex frag_in) : TARGET
|
||||
{
|
||||
int x = int(vert_in.uv.x * width * 3.0 + PRECISION_OFFSET);
|
||||
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
|
||||
int x = int(frag_in.uv.x * width * 3.0 + PRECISION_OFFSET);
|
||||
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
||||
|
||||
float b = image.Load(int3(x - 1, y, 0)).x;
|
||||
float g = image.Load(int3(x, y, 0)).x;
|
||||
@ -398,10 +442,10 @@ float4 PSBGR3_Limited(VertInOut vert_in) : TARGET
|
||||
return float4(rgb, 1.0);
|
||||
}
|
||||
|
||||
float4 PSBGR3_Full(VertInOut vert_in) : TARGET
|
||||
float4 PSBGR3_Full(FragTex frag_in) : TARGET
|
||||
{
|
||||
int x = int(vert_in.uv.x * width * 3.0 + PRECISION_OFFSET);
|
||||
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
|
||||
int x = int(frag_in.uv.x * width * 3.0 + PRECISION_OFFSET);
|
||||
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
||||
|
||||
float b = image.Load(int3(x - 1, y, 0)).x;
|
||||
float g = image.Load(int3(x, y, 0)).x;
|
||||
@ -414,8 +458,8 @@ technique Planar420
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSPlanar420(vert_in);
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSPlanar420(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -423,8 +467,8 @@ technique Planar444
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSPlanar444(vert_in);
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSPlanar444(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -432,8 +476,8 @@ technique NV12
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSNV12(vert_in);
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSNV12(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -441,8 +485,8 @@ technique NV12_Y
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSNV12_Y(vert_in);
|
||||
vertex_shader = VSPos(id);
|
||||
pixel_shader = PSNV12_Y(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -450,8 +494,8 @@ technique NV12_UV
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSNV12_UV(vert_in);
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSNV12_UV(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -459,8 +503,8 @@ technique UYVY_Reverse
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSPacked422_Reverse(vert_in, 2, 0, 1, 3);
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSPacked422_Reverse(frag_in, 2, 0, 1, 3);
|
||||
}
|
||||
}
|
||||
|
||||
@ -468,8 +512,8 @@ technique YUY2_Reverse
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSPacked422_Reverse(vert_in, 1, 3, 2, 0);
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSPacked422_Reverse(frag_in, 1, 3, 2, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -477,8 +521,8 @@ technique YVYU_Reverse
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSPacked422_Reverse(vert_in, 3, 1, 2, 0);
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSPacked422_Reverse(frag_in, 3, 1, 2, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -486,8 +530,8 @@ technique I420_Reverse
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSPlanar420_Reverse(vert_in);
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSPlanar420_Reverse(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -495,8 +539,8 @@ technique I444_Reverse
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSPlanar444_Reverse(vert_in);
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSPlanar444_Reverse(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -504,8 +548,8 @@ technique NV12_Reverse
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSNV12_Reverse(vert_in);
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSNV12_Reverse(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -513,8 +557,8 @@ technique Y800_Limited
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSY800_Limited(vert_in);
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSY800_Limited(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -522,8 +566,8 @@ technique Y800_Full
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSY800_Full(vert_in);
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSY800_Full(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -531,8 +575,8 @@ technique RGB_Limited
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSRGB_Limited(vert_in);
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSRGB_Limited(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -540,8 +584,8 @@ technique BGR3_Limited
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSBGR3_Limited(vert_in);
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSBGR3_Limited(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -549,7 +593,7 @@ technique BGR3_Full
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(id);
|
||||
pixel_shader = PSBGR3_Full(vert_in);
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSBGR3_Full(frag_in);
|
||||
}
|
||||
}
|
||||
|
@ -6,7 +6,6 @@
|
||||
|
||||
uniform float4x4 ViewProj;
|
||||
uniform texture2d image;
|
||||
uniform float4x4 color_matrix;
|
||||
uniform float2 base_dimension_i;
|
||||
uniform float undistort_factor = 1.0;
|
||||
|
||||
@ -146,13 +145,6 @@ float4 PSDrawLanczosRGBADivide(FragData v_in) : TARGET
|
||||
return float4(rgba.rgb * multiplier, alpha);
|
||||
}
|
||||
|
||||
float4 PSDrawLanczosMatrix(FragData v_in) : TARGET
|
||||
{
|
||||
float3 rgb = DrawLanczos(v_in, false).rgb;
|
||||
float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
|
||||
return float4(yuv, 1.0);
|
||||
}
|
||||
|
||||
technique Draw
|
||||
{
|
||||
pass
|
||||
@ -179,12 +171,3 @@ technique DrawUndistort
|
||||
pixel_shader = PSDrawLanczosRGBA(v_in, true);
|
||||
}
|
||||
}
|
||||
|
||||
technique DrawMatrix
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(v_in);
|
||||
pixel_shader = PSDrawLanczosMatrix(v_in);
|
||||
}
|
||||
}
|
||||
|
@ -200,19 +200,12 @@ static inline gs_effect_t *get_scale_effect(struct obs_core_video *video,
|
||||
}
|
||||
|
||||
static const char *render_output_texture_name = "render_output_texture";
|
||||
static inline void render_output_texture(struct obs_core_video *video)
|
||||
static inline gs_texture_t *render_output_texture(struct obs_core_video *video)
|
||||
{
|
||||
profile_start(render_output_texture_name);
|
||||
|
||||
gs_texture_t *texture = video->render_texture;
|
||||
gs_texture_t *target = video->output_texture;
|
||||
uint32_t width = gs_texture_get_width(target);
|
||||
uint32_t height = gs_texture_get_height(target);
|
||||
struct vec2 base, base_i;
|
||||
|
||||
vec2_set(&base, (float)video->base_width, (float)video->base_height);
|
||||
vec2_set(&base_i, 1.0f / (float)video->base_width,
|
||||
1.0f / (float)video->base_height);
|
||||
|
||||
gs_effect_t *effect = get_scale_effect(video, width, height);
|
||||
gs_technique_t *tech;
|
||||
@ -220,12 +213,17 @@ static inline void render_output_texture(struct obs_core_video *video)
|
||||
if (video->ovi.output_format == VIDEO_FORMAT_RGBA) {
|
||||
tech = gs_effect_get_technique(effect, "DrawAlphaDivide");
|
||||
} else {
|
||||
tech = gs_effect_get_technique(effect, "DrawMatrix");
|
||||
if ((effect == video->default_effect) &&
|
||||
(width == video->base_width) &&
|
||||
(height == video->base_height))
|
||||
return texture;
|
||||
|
||||
tech = gs_effect_get_technique(effect, "Draw");
|
||||
}
|
||||
|
||||
profile_start(render_output_texture_name);
|
||||
|
||||
gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
|
||||
gs_eparam_t *matrix =
|
||||
gs_effect_get_param_by_name(effect, "color_matrix");
|
||||
gs_eparam_t *bres =
|
||||
gs_effect_get_param_by_name(effect, "base_dimension");
|
||||
gs_eparam_t *bres_i =
|
||||
@ -235,12 +233,20 @@ static inline void render_output_texture(struct obs_core_video *video)
|
||||
gs_set_render_target(target, NULL);
|
||||
set_render_size(width, height);
|
||||
|
||||
if (bres)
|
||||
if (bres) {
|
||||
struct vec2 base;
|
||||
vec2_set(&base, (float)video->base_width,
|
||||
(float)video->base_height);
|
||||
gs_effect_set_vec2(bres, &base);
|
||||
if (bres_i)
|
||||
gs_effect_set_vec2(bres_i, &base_i);
|
||||
}
|
||||
|
||||
if (bres_i) {
|
||||
struct vec2 base_i;
|
||||
vec2_set(&base_i, 1.0f / (float)video->base_width,
|
||||
1.0f / (float)video->base_height);
|
||||
gs_effect_set_vec2(bres_i, &base_i);
|
||||
}
|
||||
|
||||
gs_effect_set_val(matrix, video->color_matrix, sizeof(float) * 16);
|
||||
gs_effect_set_texture(image, texture);
|
||||
|
||||
gs_enable_blending(false);
|
||||
@ -254,6 +260,8 @@ static inline void render_output_texture(struct obs_core_video *video)
|
||||
gs_enable_blending(true);
|
||||
|
||||
profile_end(render_output_texture_name);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
static inline void set_eparam(gs_effect_t *effect, const char *name, float val)
|
||||
@ -263,17 +271,23 @@ static inline void set_eparam(gs_effect_t *effect, const char *name, float val)
|
||||
}
|
||||
|
||||
static const char *render_convert_texture_name = "render_convert_texture";
|
||||
static void render_convert_texture(struct obs_core_video *video)
|
||||
static void render_convert_texture(struct obs_core_video *video,
|
||||
gs_texture_t *texture)
|
||||
{
|
||||
profile_start(render_convert_texture_name);
|
||||
|
||||
gs_texture_t *texture = video->output_texture;
|
||||
gs_texture_t *target = video->convert_texture;
|
||||
float fwidth = (float)video->output_width;
|
||||
float fheight = (float)video->output_height;
|
||||
size_t passes, i;
|
||||
|
||||
gs_effect_t *effect = video->conversion_effect;
|
||||
gs_eparam_t *color_vec_y =
|
||||
gs_effect_get_param_by_name(effect, "color_vec_y");
|
||||
gs_eparam_t *color_vec_u =
|
||||
gs_effect_get_param_by_name(effect, "color_vec_u");
|
||||
gs_eparam_t *color_vec_v =
|
||||
gs_effect_get_param_by_name(effect, "color_vec_v");
|
||||
gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
|
||||
gs_technique_t *tech =
|
||||
gs_effect_get_technique(effect, video->conversion_tech);
|
||||
@ -290,6 +304,17 @@ static void render_convert_texture(struct obs_core_video *video)
|
||||
set_eparam(effect, "height_d2_i", 1.0f / (fheight * 0.5f));
|
||||
set_eparam(effect, "input_height", (float)video->conversion_height);
|
||||
|
||||
struct vec4 vec_y, vec_u, vec_v;
|
||||
vec4_set(&vec_y, video->color_matrix[4], video->color_matrix[5],
|
||||
video->color_matrix[6], video->color_matrix[7]);
|
||||
vec4_set(&vec_u, video->color_matrix[0], video->color_matrix[1],
|
||||
video->color_matrix[2], video->color_matrix[3]);
|
||||
vec4_set(&vec_v, video->color_matrix[8], video->color_matrix[9],
|
||||
video->color_matrix[10], video->color_matrix[11]);
|
||||
gs_effect_set_vec4(color_vec_y, &vec_y);
|
||||
gs_effect_set_vec4(color_vec_u, &vec_u);
|
||||
gs_effect_set_vec4(color_vec_v, &vec_v);
|
||||
|
||||
gs_effect_set_texture(image, texture);
|
||||
|
||||
gs_set_render_target(target, NULL);
|
||||
@ -310,16 +335,32 @@ static void render_convert_texture(struct obs_core_video *video)
|
||||
profile_end(render_convert_texture_name);
|
||||
}
|
||||
|
||||
static void render_nv12(struct obs_core_video *video, gs_texture_t *target,
|
||||
const char *tech_name, uint32_t width, uint32_t height)
|
||||
static void render_nv12(struct obs_core_video *video, gs_texture_t *texture,
|
||||
gs_texture_t *target, const char *tech_name,
|
||||
uint32_t width, uint32_t height)
|
||||
{
|
||||
gs_texture_t *texture = video->output_texture;
|
||||
|
||||
gs_effect_t *effect = video->conversion_effect;
|
||||
gs_eparam_t *color_vec_y =
|
||||
gs_effect_get_param_by_name(effect, "color_vec_y");
|
||||
gs_eparam_t *color_vec_u =
|
||||
gs_effect_get_param_by_name(effect, "color_vec_u");
|
||||
gs_eparam_t *color_vec_v =
|
||||
gs_effect_get_param_by_name(effect, "color_vec_v");
|
||||
gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
|
||||
gs_technique_t *tech = gs_effect_get_technique(effect, tech_name);
|
||||
size_t passes, i;
|
||||
|
||||
struct vec4 vec_y, vec_u, vec_v;
|
||||
vec4_set(&vec_y, video->color_matrix[4], video->color_matrix[5],
|
||||
video->color_matrix[6], video->color_matrix[7]);
|
||||
vec4_set(&vec_u, video->color_matrix[0], video->color_matrix[1],
|
||||
video->color_matrix[2], video->color_matrix[3]);
|
||||
vec4_set(&vec_v, video->color_matrix[8], video->color_matrix[9],
|
||||
video->color_matrix[10], video->color_matrix[11]);
|
||||
gs_effect_set_vec4(color_vec_y, &vec_y);
|
||||
gs_effect_set_vec4(color_vec_u, &vec_u);
|
||||
gs_effect_set_vec4(color_vec_v, &vec_v);
|
||||
|
||||
gs_effect_set_texture(image, texture);
|
||||
|
||||
gs_set_render_target(target, NULL);
|
||||
@ -337,13 +378,14 @@ static void render_nv12(struct obs_core_video *video, gs_texture_t *target,
|
||||
}
|
||||
|
||||
static const char *render_convert_nv12_name = "render_convert_texture_nv12";
|
||||
static void render_convert_texture_nv12(struct obs_core_video *video)
|
||||
static void render_convert_texture_nv12(struct obs_core_video *video,
|
||||
gs_texture_t *texture)
|
||||
{
|
||||
profile_start(render_convert_nv12_name);
|
||||
|
||||
render_nv12(video, video->convert_texture, "NV12_Y",
|
||||
render_nv12(video, texture, video->convert_texture, "NV12_Y",
|
||||
video->output_width, video->output_height);
|
||||
render_nv12(video, video->convert_uv_texture, "NV12_UV",
|
||||
render_nv12(video, texture, video->convert_uv_texture, "NV12_UV",
|
||||
video->output_width / 2, video->output_height / 2);
|
||||
|
||||
video->texture_converted = true;
|
||||
@ -353,11 +395,10 @@ static void render_convert_texture_nv12(struct obs_core_video *video)
|
||||
|
||||
static const char *stage_output_texture_name = "stage_output_texture";
|
||||
static inline void stage_output_texture(struct obs_core_video *video,
|
||||
int cur_texture)
|
||||
gs_texture_t *texture, int cur_texture)
|
||||
{
|
||||
profile_start(stage_output_texture_name);
|
||||
|
||||
gs_texture_t *texture;
|
||||
bool texture_ready;
|
||||
gs_stagesurf_t *copy = video->copy_surfaces[cur_texture];
|
||||
|
||||
@ -365,7 +406,6 @@ static inline void stage_output_texture(struct obs_core_video *video,
|
||||
texture = video->convert_texture;
|
||||
texture_ready = video->texture_converted;
|
||||
} else {
|
||||
texture = video->output_texture;
|
||||
texture_ready = true;
|
||||
}
|
||||
|
||||
@ -486,7 +526,7 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
|
||||
render_main_texture(video);
|
||||
|
||||
if (raw_active || gpu_active) {
|
||||
render_output_texture(video);
|
||||
gs_texture_t *texture = render_output_texture(video);
|
||||
|
||||
#ifdef _WIN32
|
||||
if (gpu_active) {
|
||||
@ -496,9 +536,9 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
|
||||
|
||||
if (video->gpu_conversion) {
|
||||
if (video->using_nv12_tex)
|
||||
render_convert_texture_nv12(video);
|
||||
render_convert_texture_nv12(video, texture);
|
||||
else
|
||||
render_convert_texture(video);
|
||||
render_convert_texture(video, texture);
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
@ -508,7 +548,7 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
|
||||
}
|
||||
#endif
|
||||
if (raw_active)
|
||||
stage_output_texture(video, cur_texture);
|
||||
stage_output_texture(video, texture, cur_texture);
|
||||
}
|
||||
|
||||
gs_set_render_target(NULL, NULL);
|
||||
|
Loading…
x
Reference in New Issue
Block a user