The shaders to pack YUV information into the same texture were rather complicated and suffering precision issues. Breaking them up into separate textures makes the shaders much simpler and avoids having to compute large integer offsets. Unfortunately, the code to handle multiple textures is not as pleasant, but at least the NV12 rendering path is no longer separate. In addition, write chroma samples to "standard" offsets. For I444, there's no difference, but I420/NV12 formats now have chroma shifted to the left as 4:2:0 is shown in the H.264 specification. Intel GPA, SetStablePowerState, Intel HD Graphics 530 Expect speed incrase: I420: 844 us -> 493 us (254 us + 190 us + 274 us) I444: 837 us -> 747 us (258 us + 276 us + 272 us) NV12: 450 us -> 368 us (319 us + 168 us) Expect no change: NV12 (HW): 580 (481 us + 166 us) us -> 588 us (468 us + 247 us) RGB: 359 us -> 387 us Fixes https://obsproject.com/mantis/view.php?id=624 Fixes https://obsproject.com/mantis/view.php?id=1512
493 lines
11 KiB
Plaintext
493 lines
11 KiB
Plaintext
/******************************************************************************
|
|
Copyright (C) 2014 by Hugh Bailey <obs.jim@gmail.com>
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
******************************************************************************/
|
|
|
|
uniform float width;
|
|
uniform float height;
|
|
uniform float width_i;
|
|
uniform float width_d2;
|
|
uniform float width_d2_i;
|
|
uniform float input_width_i_d2;
|
|
|
|
uniform int int_width;
|
|
uniform int int_input_width;
|
|
uniform int int_u_plane_offset;
|
|
uniform int int_v_plane_offset;
|
|
|
|
uniform float4x4 color_matrix;
|
|
uniform float3 color_range_min = {0.0, 0.0, 0.0};
|
|
uniform float3 color_range_max = {1.0, 1.0, 1.0};
|
|
|
|
uniform float4 color_vec_y;
|
|
uniform float4 color_vec_u;
|
|
uniform float4 color_vec_v;
|
|
|
|
uniform texture2d image;
|
|
|
|
sampler_state def_sampler {
|
|
Filter = Linear;
|
|
AddressU = Clamp;
|
|
AddressV = Clamp;
|
|
};
|
|
|
|
struct FragPos {
|
|
float4 pos : POSITION;
|
|
};
|
|
|
|
struct VertTexPos {
|
|
float2 uv : TEXCOORD0;
|
|
float4 pos : POSITION;
|
|
};
|
|
|
|
struct VertTexPosWide {
|
|
float3 uuv : TEXCOORD0;
|
|
float4 pos : POSITION;
|
|
};
|
|
|
|
struct FragTex {
|
|
float2 uv : TEXCOORD0;
|
|
};
|
|
|
|
struct FragTexWide {
|
|
float3 uuv : TEXCOORD0;
|
|
};
|
|
|
|
FragPos VSPos(uint id : VERTEXID)
|
|
{
|
|
float idHigh = float(id >> 1);
|
|
float idLow = float(id & uint(1));
|
|
|
|
float x = idHigh * 4.0 - 1.0;
|
|
float y = idLow * 4.0 - 1.0;
|
|
|
|
FragPos vert_out;
|
|
vert_out.pos = float4(x, y, 0.0, 1.0);
|
|
return vert_out;
|
|
}
|
|
|
|
VertTexPos VSTexPos(uint id : VERTEXID)
|
|
{
|
|
float idHigh = float(id >> 1);
|
|
float idLow = float(id & uint(1));
|
|
|
|
float x = idHigh * 4.0 - 1.0;
|
|
float y = idLow * 4.0 - 1.0;
|
|
|
|
float u = idHigh * 2.0;
|
|
float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
|
|
|
|
VertTexPos vert_out;
|
|
vert_out.uv = float2(u, v);
|
|
vert_out.pos = float4(x, y, 0.0, 1.0);
|
|
return vert_out;
|
|
}
|
|
|
|
VertTexPosWide VSTexPosLeft(uint id : VERTEXID)
|
|
{
|
|
float idHigh = float(id >> 1);
|
|
float idLow = float(id & uint(1));
|
|
|
|
float x = idHigh * 4.0 - 1.0;
|
|
float y = idLow * 4.0 - 1.0;
|
|
|
|
float u_right = idHigh * 2.0;
|
|
float u_left = u_right - width_i;
|
|
float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
|
|
|
|
VertTexPosWide vert_out;
|
|
vert_out.uuv.x = u_left;
|
|
vert_out.uuv.y = u_right;
|
|
vert_out.uuv.z = v;
|
|
vert_out.pos = float4(x, y, 0.0, 1.0);
|
|
return vert_out;
|
|
}
|
|
|
|
/* used to prevent internal GPU precision issues width fmod in particular */
|
|
#define PRECISION_OFFSET 0.2
|
|
|
|
float PS_Y(FragPos frag_in) : TARGET
|
|
{
|
|
float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb;
|
|
float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w;
|
|
return y;
|
|
}
|
|
|
|
float2 PS_UV_Wide(FragTexWide frag_in) : TARGET
|
|
{
|
|
float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb;
|
|
float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb;
|
|
float3 rgb = (rgb_left + rgb_right) * 0.5;
|
|
float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
|
|
float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
|
|
return float2(u, v);
|
|
}
|
|
|
|
float PS_U(FragTex frag_in) : TARGET
|
|
{
|
|
float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb;
|
|
float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
|
|
return u;
|
|
}
|
|
|
|
float PS_V(FragTex frag_in) : TARGET
|
|
{
|
|
float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb;
|
|
float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
|
|
return v;
|
|
}
|
|
|
|
float PS_U_Wide(FragTexWide frag_in) : TARGET
|
|
{
|
|
float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb;
|
|
float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb;
|
|
float3 rgb = (rgb_left + rgb_right) * 0.5;
|
|
float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
|
|
return u;
|
|
}
|
|
|
|
float PS_V_Wide(FragTexWide frag_in) : TARGET
|
|
{
|
|
float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb;
|
|
float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb;
|
|
float3 rgb = (rgb_left + rgb_right) * 0.5;
|
|
float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
|
|
return v;
|
|
}
|
|
|
|
float GetIntOffsetColor(int offset)
|
|
{
|
|
return image.Load(int3(offset % int_input_width,
|
|
offset / int_input_width,
|
|
0)).r;
|
|
}
|
|
|
|
float4 PSPacked422_Reverse(FragTex frag_in, int u_pos, int v_pos,
|
|
int y0_pos, int y1_pos) : TARGET
|
|
{
|
|
float y = frag_in.uv.y;
|
|
float odd = floor(fmod(width * frag_in.uv.x + PRECISION_OFFSET, 2.0));
|
|
float x = floor(width_d2 * frag_in.uv.x + PRECISION_OFFSET) *
|
|
width_d2_i;
|
|
|
|
x += input_width_i_d2;
|
|
|
|
float4 texel = image.Sample(def_sampler, float2(x, y));
|
|
float3 yuv = float3(odd > 0.5 ? texel[y1_pos] : texel[y0_pos],
|
|
texel[u_pos], texel[v_pos]);
|
|
yuv = clamp(yuv, color_range_min, color_range_max);
|
|
return saturate(mul(float4(yuv, 1.0), color_matrix));
|
|
}
|
|
|
|
float4 PSPlanar420_Reverse(FragTex frag_in) : TARGET
|
|
{
|
|
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
|
|
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
|
|
|
int lum_offset = y * int_width + x;
|
|
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
|
|
int chroma1 = int_u_plane_offset + chroma_offset;
|
|
int chroma2 = int_v_plane_offset + chroma_offset;
|
|
|
|
float3 yuv = float3(
|
|
GetIntOffsetColor(lum_offset),
|
|
GetIntOffsetColor(chroma1),
|
|
GetIntOffsetColor(chroma2)
|
|
);
|
|
yuv = clamp(yuv, color_range_min, color_range_max);
|
|
return saturate(mul(float4(yuv, 1.0), color_matrix));
|
|
}
|
|
|
|
float4 PSPlanar422_Reverse(FragTex frag_in) : TARGET
|
|
{
|
|
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
|
|
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
|
|
|
int lum_offset = y * int_width + x;
|
|
int chroma_offset = y * (int_width / 2) + x / 2;
|
|
int chroma1 = int_u_plane_offset + chroma_offset;
|
|
int chroma2 = int_v_plane_offset + chroma_offset;
|
|
|
|
float3 yuv = float3(
|
|
GetIntOffsetColor(lum_offset),
|
|
GetIntOffsetColor(chroma1),
|
|
GetIntOffsetColor(chroma2)
|
|
);
|
|
yuv = clamp(yuv, color_range_min, color_range_max);
|
|
return saturate(mul(float4(yuv, 1.0), color_matrix));
|
|
}
|
|
|
|
float4 PSPlanar444_Reverse(FragTex frag_in) : TARGET
|
|
{
|
|
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
|
|
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
|
|
|
int lum_offset = y * int_width + x;
|
|
int chroma_offset = y * int_width + x;
|
|
int chroma1 = int_u_plane_offset + chroma_offset;
|
|
int chroma2 = int_v_plane_offset + chroma_offset;
|
|
|
|
float3 yuv = float3(
|
|
GetIntOffsetColor(lum_offset),
|
|
GetIntOffsetColor(chroma1),
|
|
GetIntOffsetColor(chroma2)
|
|
);
|
|
yuv = clamp(yuv, color_range_min, color_range_max);
|
|
return saturate(mul(float4(yuv, 1.0), color_matrix));
|
|
}
|
|
|
|
float4 PSNV12_Reverse(FragTex frag_in) : TARGET
|
|
{
|
|
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
|
|
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
|
|
|
int lum_offset = y * int_width + x;
|
|
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
|
|
int chroma = int_u_plane_offset + chroma_offset * 2;
|
|
|
|
float3 yuv = float3(
|
|
GetIntOffsetColor(lum_offset),
|
|
GetIntOffsetColor(chroma),
|
|
GetIntOffsetColor(chroma + 1)
|
|
);
|
|
yuv = clamp(yuv, color_range_min, color_range_max);
|
|
return saturate(mul(float4(yuv, 1.0), color_matrix));
|
|
}
|
|
|
|
float4 PSY800_Limited(FragTex frag_in) : TARGET
|
|
{
|
|
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
|
|
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
|
|
|
float limited = image.Load(int3(x, y, 0)).x;
|
|
float full = saturate((limited - (16.0 / 255.0)) * (255.0 / 219.0));
|
|
return float4(full, full, full, 1.0);
|
|
}
|
|
|
|
float4 PSY800_Full(FragTex frag_in) : TARGET
|
|
{
|
|
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
|
|
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
|
|
|
float3 full = image.Load(int3(x, y, 0)).xxx;
|
|
return float4(full, 1.0);
|
|
}
|
|
|
|
float4 PSRGB_Limited(FragTex frag_in) : TARGET
|
|
{
|
|
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
|
|
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
|
|
|
float4 rgba = image.Load(int3(x, y, 0));
|
|
rgba.rgb = saturate((rgba.rgb - (16.0 / 255.0)) * (255.0 / 219.0));
|
|
return rgba;
|
|
}
|
|
|
|
float4 PSBGR3_Limited(FragTex frag_in) : TARGET
|
|
{
|
|
int x = int(frag_in.uv.x * width * 3.0 + PRECISION_OFFSET);
|
|
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
|
|
|
float b = image.Load(int3(x - 1, y, 0)).x;
|
|
float g = image.Load(int3(x, y, 0)).x;
|
|
float r = image.Load(int3(x + 1, y, 0)).x;
|
|
float3 rgb = float3(r, g, b);
|
|
rgb = saturate((rgb - (16.0 / 255.0)) * (255.0 / 219.0));
|
|
return float4(rgb, 1.0);
|
|
}
|
|
|
|
float4 PSBGR3_Full(FragTex frag_in) : TARGET
|
|
{
|
|
int x = int(frag_in.uv.x * width * 3.0 + PRECISION_OFFSET);
|
|
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
|
|
|
|
float b = image.Load(int3(x - 1, y, 0)).x;
|
|
float g = image.Load(int3(x, y, 0)).x;
|
|
float r = image.Load(int3(x + 1, y, 0)).x;
|
|
float3 rgb = float3(r, g, b);
|
|
return float4(rgb, 1.0);
|
|
}
|
|
|
|
technique Planar_Y
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSPos(id);
|
|
pixel_shader = PS_Y(frag_in);
|
|
}
|
|
}
|
|
|
|
technique Planar_U
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPos(id);
|
|
pixel_shader = PS_U(frag_in);
|
|
}
|
|
}
|
|
|
|
technique Planar_V
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPos(id);
|
|
pixel_shader = PS_V(frag_in);
|
|
}
|
|
}
|
|
|
|
technique Planar_U_Left
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPosLeft(id);
|
|
pixel_shader = PS_U_Wide(frag_in);
|
|
}
|
|
}
|
|
|
|
technique Planar_V_Left
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPosLeft(id);
|
|
pixel_shader = PS_V_Wide(frag_in);
|
|
}
|
|
}
|
|
|
|
technique NV12_Y
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSPos(id);
|
|
pixel_shader = PS_Y(frag_in);
|
|
}
|
|
}
|
|
|
|
technique NV12_UV
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPosLeft(id);
|
|
pixel_shader = PS_UV_Wide(frag_in);
|
|
}
|
|
}
|
|
|
|
technique UYVY_Reverse
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPos(id);
|
|
pixel_shader = PSPacked422_Reverse(frag_in, 2, 0, 1, 3);
|
|
}
|
|
}
|
|
|
|
technique YUY2_Reverse
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPos(id);
|
|
pixel_shader = PSPacked422_Reverse(frag_in, 1, 3, 2, 0);
|
|
}
|
|
}
|
|
|
|
technique YVYU_Reverse
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPos(id);
|
|
pixel_shader = PSPacked422_Reverse(frag_in, 3, 1, 2, 0);
|
|
}
|
|
}
|
|
|
|
technique I420_Reverse
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPos(id);
|
|
pixel_shader = PSPlanar420_Reverse(frag_in);
|
|
}
|
|
}
|
|
|
|
technique I422_Reverse
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPos(id);
|
|
pixel_shader = PSPlanar422_Reverse(frag_in);
|
|
}
|
|
}
|
|
|
|
technique I444_Reverse
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPos(id);
|
|
pixel_shader = PSPlanar444_Reverse(frag_in);
|
|
}
|
|
}
|
|
|
|
technique NV12_Reverse
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPos(id);
|
|
pixel_shader = PSNV12_Reverse(frag_in);
|
|
}
|
|
}
|
|
|
|
technique Y800_Limited
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPos(id);
|
|
pixel_shader = PSY800_Limited(frag_in);
|
|
}
|
|
}
|
|
|
|
technique Y800_Full
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPos(id);
|
|
pixel_shader = PSY800_Full(frag_in);
|
|
}
|
|
}
|
|
|
|
technique RGB_Limited
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPos(id);
|
|
pixel_shader = PSRGB_Limited(frag_in);
|
|
}
|
|
}
|
|
|
|
technique BGR3_Limited
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPos(id);
|
|
pixel_shader = PSBGR3_Limited(frag_in);
|
|
}
|
|
}
|
|
|
|
technique BGR3_Full
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSTexPos(id);
|
|
pixel_shader = PSBGR3_Full(frag_in);
|
|
}
|
|
}
|