libobs: Simplify YUV conversion

Currently several shaders need "DrawMatrix" techniques to support the
possibility that the input texture is a "YUV" format. Also, "DrawMatrix"
is overloaded for translation in both directions when it is written for
RGB to "YUV" only.

A cleaner solution is to handle "YUV" to RGB up-front as part of format
conversion, and ensure only RGB inputs reach the other shaders. This is
necessary to someday perform correct scale filtering without the cost of
redundant "YUV" conversions per texture tap.

A necessary prerequisite for this is to add conversion support for
VIDEO_FORMAT_I444, and that is now in place. There was already a hack in
place to cover VIDEO_FORMAT_Y800. All other "YUV" formats already have
conversion functions.

"DrawMatrix" has been removed from shaders that only supported "YUV" to
RGB conversions. It still exists in shaders that perform RGB to "YUV"
conversions, and the implementations have been sanitized accordingly.
master
James Park 2019-04-02 09:03:57 -07:00 committed by jp9000
parent 3031a11762
commit 69c215345a
19 changed files with 112 additions and 207 deletions

View File

@ -1,7 +1,4 @@
uniform float4x4 ViewProj;
uniform float4x4 color_matrix;
uniform float3 color_range_min = {0.0, 0.0, 0.0};
uniform float3 color_range_max = {1.0, 1.0, 1.0};
uniform float2 base_dimension_i;
uniform texture2d image;
@ -57,51 +54,6 @@ float4 PSDrawAreaRGBA(VertInOut vert_in) : TARGET
return float4(totalcolor.rgb / totalcolor.a, totalcolor.a);
}
float3 ConvertFromYuv(float3 yuv)
{
yuv = clamp(yuv, color_range_min, color_range_max);
return saturate(mul(float4(yuv, 1.0), color_matrix)).rgb;
}
float4 PSDrawAreaMatrix(VertInOut vert_in) : TARGET
{
float3 totalcolor = float3(0.0, 0.0, 0.0);
float2 uv = vert_in.uv;
float2 uvdelta = float2(ddx(uv.x), ddy(uv.y));
// Handle potential OpenGL flip.
uvdelta.y = abs(uvdelta.y);
float2 uvhalfdelta = 0.5 * uvdelta;
float2 uvmin = uv - uvhalfdelta;
float2 uvmax = uv + uvhalfdelta;
int2 loadindexmin = int2(uvmin / base_dimension_i);
int2 loadindexmax = int2(uvmax / base_dimension_i);
float2 targetpos = uv / uvdelta;
float2 targetposmin = targetpos - 0.5;
float2 targetposmax = targetpos + 0.5;
float2 scale = base_dimension_i / uvdelta;
for (int loadindexy = loadindexmin.y; loadindexy <= loadindexmax.y; ++loadindexy)
{
for (int loadindexx = loadindexmin.x; loadindexx <= loadindexmax.x; ++loadindexx)
{
int2 loadindex = int2(loadindexx, loadindexy);
float2 potentialtargetmin = float2(loadindex) * scale;
float2 potentialtargetmax = potentialtargetmin + scale;
float2 targetmin = max(potentialtargetmin, targetposmin);
float2 targetmax = min(potentialtargetmax, targetposmax);
float area = (targetmax.x - targetmin.x) * (targetmax.y - targetmin.y);
float3 yuv = image.Load(int3(loadindex, 0)).xyz;
totalcolor += area * ConvertFromYuv(yuv);
}
}
return float4(totalcolor, 1.0);
}
technique Draw
{
pass
@ -110,12 +62,3 @@ technique Draw
pixel_shader = PSDrawAreaRGBA(vert_in);
}
}
technique DrawMatrix
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSDrawAreaMatrix(vert_in);
}
}

View File

@ -7,8 +7,6 @@
uniform float4x4 ViewProj;
uniform texture2d image;
uniform float4x4 color_matrix;
uniform float3 color_range_min = {0.0, 0.0, 0.0};
uniform float3 color_range_max = {1.0, 1.0, 1.0};
uniform float2 base_dimension_i;
uniform float undistort_factor = 1.0;
@ -134,11 +132,9 @@ float4 PSDrawBicubicRGBA(VertData v_in, bool undistort) : TARGET
float4 PSDrawBicubicMatrix(VertData v_in) : TARGET
{
float4 rgba = DrawBicubic(v_in, false);
float4 yuv;
yuv.xyz = clamp(rgba.xyz, color_range_min, color_range_max);
return saturate(mul(float4(yuv.xyz, 1.0), color_matrix));
float3 rgb = DrawBicubic(v_in, false).rgb;
float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
return float4(yuv, 1.0);
}
technique Draw

View File

@ -6,8 +6,6 @@
uniform float4x4 ViewProj;
uniform texture2d image;
uniform float4x4 color_matrix;
uniform float3 color_range_min = {0.0, 0.0, 0.0};
uniform float3 color_range_max = {1.0, 1.0, 1.0};
uniform float2 base_dimension_i;
sampler_state textureSampler {
@ -58,10 +56,9 @@ float4 PSDrawLowresBilinearRGBA(VertData v_in) : TARGET
float4 PSDrawLowresBilinearMatrix(VertData v_in) : TARGET
{
float4 yuv = DrawLowresBilinear(v_in);
yuv.xyz = clamp(yuv.xyz, color_range_min, color_range_max);
return saturate(mul(float4(yuv.xyz, 1.0), color_matrix));
float3 rgb = DrawLowresBilinear(v_in);
float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
return float4(yuv, 1.0);
}
technique Draw

View File

@ -1,7 +1,5 @@
uniform float4x4 ViewProj;
uniform float4x4 color_matrix;
uniform float3 color_range_min = {0.0, 0.0, 0.0};
uniform float3 color_range_max = {1.0, 1.0, 1.0};
uniform texture2d image;
sampler_state def_sampler {
@ -30,9 +28,9 @@ float4 PSDrawBare(VertInOut vert_in) : TARGET
float4 PSDrawMatrix(VertInOut vert_in) : TARGET
{
float4 yuv = image.Sample(def_sampler, vert_in.uv);
yuv.xyz = clamp(yuv.xyz, color_range_min, color_range_max);
return saturate(mul(float4(yuv.xyz, 1.0), color_matrix));
float3 rgb = image.Sample(def_sampler, vert_in.uv).rgb;
float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
return float4(yuv, 1.0);
}
technique Draw

View File

@ -18,9 +18,6 @@
uniform float4x4 ViewProj;
uniform texture2d image;
uniform float4x4 color_matrix;
uniform float3 color_range_min = {0.0, 0.0, 0.0};
uniform float3 color_range_max = {1.0, 1.0, 1.0};
uniform texture2d previous_image;
uniform float2 dimensions;
@ -267,7 +264,7 @@ VertData VSDefault(VertData v_in)
return vert_out;
}
#define TECHNIQUE(rgba_ps, matrix_ps) \
#define TECHNIQUE(rgba_ps) \
technique Draw \
{ \
pass \
@ -275,19 +272,4 @@ technique Draw \
vertex_shader = VSDefault(v_in); \
pixel_shader = rgba_ps(v_in); \
} \
} \
float4 matrix_ps(VertData v_in) : TARGET \
{ \
float4 yuv = rgba_ps(v_in); \
yuv.xyz = clamp(yuv.xyz, color_range_min, color_range_max); \
return saturate(mul(float4(yuv.xyz, 1.0), color_matrix)); \
} \
\
technique DrawMatrix \
{ \
pass \
{ \
vertex_shader = VSDefault(v_in); \
pixel_shader = matrix_ps(v_in); \
} \
}

View File

@ -18,4 +18,4 @@
#include "deinterlace_base.effect"
TECHNIQUE( PSBlendRGBA, PSBlendMatrix);
TECHNIQUE(PSBlendRGBA);

View File

@ -18,4 +18,4 @@
#include "deinterlace_base.effect"
TECHNIQUE(PSBlendRGBA_2x, PSBlendMatrix_2x);
TECHNIQUE(PSBlendRGBA_2x);

View File

@ -18,4 +18,4 @@
#include "deinterlace_base.effect"
TECHNIQUE(PSDiscardRGBA, PSDiscardMatrix);
TECHNIQUE(PSDiscardRGBA);

View File

@ -18,4 +18,4 @@
#include "deinterlace_base.effect"
TECHNIQUE(PSDiscardRGBA_2x, PSDiscardMatrix_2x);
TECHNIQUE(PSDiscardRGBA_2x);

View File

@ -18,4 +18,4 @@
#include "deinterlace_base.effect"
TECHNIQUE(PSLinearRGBA, PSLinearMatrix);
TECHNIQUE(PSLinearRGBA);

View File

@ -18,4 +18,4 @@
#include "deinterlace_base.effect"
TECHNIQUE(PSLinearRGBA_2x, PSLinearxMatrixA_2x);
TECHNIQUE(PSLinearRGBA_2x);

View File

@ -18,4 +18,4 @@
#include "deinterlace_base.effect"
TECHNIQUE(PSYadifMode0RGBA, PSYadifMode0Matrix);
TECHNIQUE(PSYadifMode0RGBA);

View File

@ -18,4 +18,4 @@
#include "deinterlace_base.effect"
TECHNIQUE(PSYadifMode0RGBA_2x, PSYadifMode0Matrix_2x);
TECHNIQUE(PSYadifMode0RGBA_2x);

View File

@ -42,6 +42,10 @@ uniform int int_input_width;
uniform int int_u_plane_offset;
uniform int int_v_plane_offset;
uniform float4x4 color_matrix;
uniform float3 color_range_min = {0.0, 0.0, 0.0};
uniform float3 color_range_max = {1.0, 1.0, 1.0};
uniform texture2d image;
sampler_state def_sampler {
@ -283,8 +287,10 @@ float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
x += input_width_i_d2;
float4 texel = image.Sample(def_sampler, float2(x, y));
return float4(odd > 0.5 ? texel[y1_pos] : texel[y0_pos],
texel[u_pos], texel[v_pos], 1.0);
float3 yuv = float3(odd > 0.5 ? texel[y1_pos] : texel[y0_pos],
texel[u_pos], texel[v_pos]);
yuv = clamp(yuv, color_range_min, color_range_max);
return saturate(mul(float4(yuv, 1.0), color_matrix));
}
float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
@ -297,12 +303,32 @@ float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
int chroma1 = int_u_plane_offset + chroma_offset;
int chroma2 = int_v_plane_offset + chroma_offset;
return float4(
float3 yuv = float3(
GetIntOffsetColor(lum_offset),
GetIntOffsetColor(chroma1),
GetIntOffsetColor(chroma2),
1.0
GetIntOffsetColor(chroma2)
);
yuv = clamp(yuv, color_range_min, color_range_max);
return saturate(mul(float4(yuv, 1.0), color_matrix));
}
float4 PSPlanar444_Reverse(VertInOut vert_in) : TARGET
{
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
int lum_offset = y * int_width + x;
int chroma_offset = y * int_width + x;
int chroma1 = int_u_plane_offset + chroma_offset;
int chroma2 = int_v_plane_offset + chroma_offset;
float3 yuv = float3(
GetIntOffsetColor(lum_offset),
GetIntOffsetColor(chroma1),
GetIntOffsetColor(chroma2)
);
yuv = clamp(yuv, color_range_min, color_range_max);
return saturate(mul(float4(yuv, 1.0), color_matrix));
}
float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
@ -314,12 +340,13 @@ float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
int chroma = int_u_plane_offset + chroma_offset * 2;
return float4(
float3 yuv = float3(
GetIntOffsetColor(lum_offset),
GetIntOffsetColor(chroma),
GetIntOffsetColor(chroma + 1),
1.0
GetIntOffsetColor(chroma + 1)
);
yuv = clamp(yuv, color_range_min, color_range_max);
return saturate(mul(float4(yuv, 1.0), color_matrix));
}
technique Planar420
@ -403,6 +430,15 @@ technique I420_Reverse
}
}
technique I444_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPlanar444_Reverse(vert_in);
}
}
technique NV12_Reverse
{
pass

View File

@ -7,8 +7,6 @@
uniform float4x4 ViewProj;
uniform texture2d image;
uniform float4x4 color_matrix;
uniform float3 color_range_min = {0.0, 0.0, 0.0};
uniform float3 color_range_max = {1.0, 1.0, 1.0};
uniform float2 base_dimension_i;
uniform float undistort_factor = 1.0;
@ -142,11 +140,9 @@ float4 PSDrawLanczosRGBA(FragData v_in, bool undistort) : TARGET
float4 PSDrawLanczosMatrix(FragData v_in) : TARGET
{
float4 rgba = DrawLanczos(v_in, false);
float4 yuv;
yuv.xyz = clamp(rgba.xyz, color_range_min, color_range_max);
return saturate(mul(float4(yuv.xyz, 1.0), color_matrix));
float3 rgb = DrawLanczos(v_in, false).rgb;
float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
return float4(yuv, 1.0);
}
technique Draw

View File

@ -1,7 +1,4 @@
uniform float4x4 ViewProj;
uniform float4x4 color_matrix;
uniform float3 color_range_min = {0.0, 0.0, 0.0};
uniform float3 color_range_max = {1.0, 1.0, 1.0};
uniform texture2d image;
uniform float2 scale;
@ -29,13 +26,6 @@ float4 PSDrawBare(VertInOut vert_in) : TARGET
return image.Sample(def_sampler, vert_in.uv);
}
float4 PSDrawMatrix(VertInOut vert_in) : TARGET
{
float4 yuv = image.Sample(def_sampler, vert_in.uv);
yuv.xyz = clamp(yuv.xyz, color_range_min, color_range_max);
return saturate(mul(float4(yuv.xyz, 1.0), color_matrix));
}
technique Draw
{
pass
@ -44,12 +34,3 @@ technique Draw
pixel_shader = PSDrawBare(vert_in);
}
}
technique DrawMatrix
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSDrawMatrix(vert_in);
}
}

View File

@ -645,10 +645,6 @@ struct obs_source {
enum video_format async_format;
enum video_format async_cache_format;
enum gs_color_format async_texture_format;
float async_color_matrix[16];
bool async_full_range;
float async_color_range_min[3];
float async_color_range_max[3];
int async_plane_offset[2];
bool async_flip;
bool async_active;

View File

@ -315,9 +315,6 @@ void deinterlace_render(obs_source_t *s)
gs_eparam_t *dimensions = gs_effect_get_param_by_name(effect,
"dimensions");
struct vec2 size = {(float)s->async_width, (float)s->async_height};
bool yuv = format_is_yuv(s->async_format);
bool limited_range = yuv && !s->async_full_range;
const char *tech = yuv ? "DrawMatrix" : "Draw";
gs_texture_t *cur_tex = s->async_texrender ?
gs_texrender_get_texture(s->async_texrender) :
@ -334,30 +331,12 @@ void deinterlace_render(obs_source_t *s)
gs_effect_set_int(field, s->deinterlace_top_first);
gs_effect_set_vec2(dimensions, &size);
if (yuv) {
gs_eparam_t *color_matrix = gs_effect_get_param_by_name(
effect, "color_matrix");
gs_effect_set_val(color_matrix, s->async_color_matrix,
sizeof(float) * 16);
}
if (limited_range) {
const size_t size = sizeof(float) * 3;
gs_eparam_t *color_range_min = gs_effect_get_param_by_name(
effect, "color_range_min");
gs_eparam_t *color_range_max = gs_effect_get_param_by_name(
effect, "color_range_max");
gs_effect_set_val(color_range_min, s->async_color_range_min,
size);
gs_effect_set_val(color_range_max, s->async_color_range_max,
size);
}
frame2_ts = s->deinterlace_frame_ts + s->deinterlace_offset +
s->deinterlace_half_duration - TWOX_TOLERANCE;
gs_effect_set_bool(frame2, obs->video.video_time >= frame2_ts);
while (gs_effect_loop(effect, tech))
while (gs_effect_loop(effect, "Draw"))
gs_draw_sprite(NULL, s->async_flip ? GS_FLIP_V : 0,
s->async_width, s->async_height);
}

View File

@ -1329,6 +1329,7 @@ enum convert_type {
CONVERT_420,
CONVERT_422_U,
CONVERT_422_Y,
CONVERT_444,
};
static inline enum convert_type get_convert_type(enum video_format format)
@ -1338,6 +1339,8 @@ static inline enum convert_type get_convert_type(enum video_format format)
return CONVERT_420;
case VIDEO_FORMAT_NV12:
return CONVERT_NV12;
case VIDEO_FORMAT_I444:
return CONVERT_444;
case VIDEO_FORMAT_YVYU:
case VIDEO_FORMAT_YUY2:
@ -1346,7 +1349,6 @@ static inline enum convert_type get_convert_type(enum video_format format)
return CONVERT_422_U;
case VIDEO_FORMAT_Y800:
case VIDEO_FORMAT_I444:
case VIDEO_FORMAT_NONE:
case VIDEO_FORMAT_RGBA:
case VIDEO_FORMAT_BGRA:
@ -1360,12 +1362,23 @@ static inline enum convert_type get_convert_type(enum video_format format)
static inline bool set_packed422_sizes(struct obs_source *source,
const struct obs_source_frame *frame)
{
source->async_convert_height = frame->height;
source->async_convert_width = frame->width / 2;
source->async_convert_height = frame->height;
source->async_texture_format = GS_BGRA;
return true;
}
static inline bool set_planar444_sizes(struct obs_source *source,
const struct obs_source_frame *frame)
{
source->async_convert_width = frame->width;
source->async_convert_height = frame->height * 3;
source->async_texture_format = GS_R8;
source->async_plane_offset[0] = (int)(frame->data[1] - frame->data[0]);
source->async_plane_offset[1] = (int)(frame->data[2] - frame->data[0]);
return true;
}
static inline bool set_planar420_sizes(struct obs_source *source,
const struct obs_source_frame *frame)
{
@ -1406,7 +1419,9 @@ static inline bool init_gpu_conversion(struct obs_source *source,
case CONVERT_NV12:
return set_nv12_sizes(source, frame);
break;
case CONVERT_444:
return set_planar444_sizes(source, frame);
case CONVERT_NONE:
assert(false && "No conversion requested");
@ -1491,6 +1506,11 @@ static void upload_raw_frame(gs_texture_t *tex,
frame->width, false);
break;
case CONVERT_444:
gs_texture_set_image(tex, frame->data[0],
frame->width, false);
break;
case CONVERT_NONE:
assert(false && "No conversion requested");
break;
@ -1514,14 +1534,15 @@ static const char *select_conversion_technique(enum video_format format)
case VIDEO_FORMAT_NV12:
return "NV12_Reverse";
break;
case VIDEO_FORMAT_I444:
return "I444_Reverse";
case VIDEO_FORMAT_Y800:
case VIDEO_FORMAT_BGRA:
case VIDEO_FORMAT_BGRX:
case VIDEO_FORMAT_RGBA:
case VIDEO_FORMAT_NONE:
case VIDEO_FORMAT_I444:
assert(false && "No conversion requested");
break;
}
@ -1581,6 +1602,19 @@ static bool update_async_texrender(struct obs_source *source,
set_eparami(conv, "int_v_plane_offset",
(int)source->async_plane_offset[1]);
gs_effect_set_val(gs_effect_get_param_by_name(conv, "color_matrix"),
frame->color_matrix, sizeof(float) * 16);
if (!frame->full_range) {
gs_eparam_t *min_param = gs_effect_get_param_by_name(
conv, "color_range_min");
gs_effect_set_val(min_param, frame->color_range_min,
sizeof(float) * 3);
gs_eparam_t *max_param = gs_effect_get_param_by_name(
conv, "color_range_max");
gs_effect_set_val(max_param, frame->color_range_max,
sizeof(float) * 3);
}
gs_ortho(0.f, (float)cx, 0.f, (float)cy, -100.f, 100.f);
gs_draw_sprite(tex, 0, cx, cy);
@ -1603,13 +1637,6 @@ bool update_async_texture(struct obs_source *source,
uint32_t linesize;
source->async_flip = frame->flip;
source->async_full_range = frame->full_range;
memcpy(source->async_color_matrix, frame->color_matrix,
sizeof(frame->color_matrix));
memcpy(source->async_color_range_min, frame->color_range_min,
sizeof frame->color_range_min);
memcpy(source->async_color_range_max, frame->color_range_max,
sizeof frame->color_range_max);
if (source->async_gpu_conversion && texrender)
return update_async_texrender(source, frame, tex, texrender);
@ -1624,13 +1651,11 @@ bool update_async_texture(struct obs_source *source,
return false;
if (type == CONVERT_420)
decompress_420((const uint8_t* const*)frame->data,
frame->linesize,
decompress_420(frame->data, frame->linesize,
0, frame->height, ptr, linesize);
else if (type == CONVERT_NV12)
decompress_nv12((const uint8_t* const*)frame->data,
frame->linesize,
decompress_nv12(frame->data, frame->linesize,
0, frame->height, ptr, linesize);
else if (type == CONVERT_422_Y)
@ -1646,8 +1671,7 @@ bool update_async_texture(struct obs_source *source,
}
static inline void obs_source_draw_texture(struct obs_source *source,
gs_effect_t *effect, float *color_matrix,
float const *color_range_min, float const *color_range_max)
gs_effect_t *effect)
{
gs_texture_t *tex = source->async_texture;
gs_eparam_t *param;
@ -1655,23 +1679,6 @@ static inline void obs_source_draw_texture(struct obs_source *source,
if (source->async_texrender)
tex = gs_texrender_get_texture(source->async_texrender);
if (color_range_min) {
size_t const size = sizeof(float) * 3;
param = gs_effect_get_param_by_name(effect, "color_range_min");
gs_effect_set_val(param, color_range_min, size);
}
if (color_range_max) {
size_t const size = sizeof(float) * 3;
param = gs_effect_get_param_by_name(effect, "color_range_max");
gs_effect_set_val(param, color_range_max, size);
}
if (color_matrix) {
param = gs_effect_get_param_by_name(effect, "color_matrix");
gs_effect_set_val(param, color_matrix, sizeof(float) * 16);
}
param = gs_effect_get_param_by_name(effect, "image");
gs_effect_set_texture(param, tex);
@ -1680,24 +1687,18 @@ static inline void obs_source_draw_texture(struct obs_source *source,
static void obs_source_draw_async_texture(struct obs_source *source)
{
gs_effect_t *effect = gs_get_effect();
bool yuv = format_is_yuv(source->async_format);
bool limited_range = yuv && !source->async_full_range;
const char *type = yuv ? "DrawMatrix" : "Draw";
gs_effect_t *effect = gs_get_effect();
bool def_draw = (!effect);
gs_technique_t *tech = NULL;
gs_technique_t *tech = NULL;
if (def_draw) {
effect = obs_get_base_effect(OBS_EFFECT_DEFAULT);
tech = gs_effect_get_technique(effect, type);
tech = gs_effect_get_technique(effect, "Draw");
gs_technique_begin(tech);
gs_technique_begin_pass(tech, 0);
}
obs_source_draw_texture(source, effect,
yuv ? source->async_color_matrix : NULL,
limited_range ? source->async_color_range_min : NULL,
limited_range ? source->async_color_range_max : NULL);
obs_source_draw_texture(source, effect);
if (def_draw) {
gs_technique_end_pass(tech);