From 85cc7c84bc38a8d3311437890b87fcb41c3efe75 Mon Sep 17 00:00:00 2001 From: jpark37 Date: Wed, 17 Jul 2019 21:11:18 -0700 Subject: [PATCH] libobs: obs-filters: Area upscale shader Add a separate shader for area upscaling to take advantage of bilinear filtering. Iterating over texels is unnecessary in the upscale case because a target pixel can only overlap 1 or 2 texels in X and Y directions. When only overlapping one texel, adjust UVs to sample texel center to avoid filtering. Also add "base_dimension" uniform to avoid unnecessary division. Intel HD Graphics 530, 644x478 -> 1323x1080: ~836 us -> ~232 us --- libobs/data/area.effect | 130 +++++++++++++++++++++-------- libobs/obs-scene.c | 19 ++++- libobs/obs-video.c | 7 +- plugins/obs-filters/scale-filter.c | 20 ++++- 4 files changed, 135 insertions(+), 41 deletions(-) diff --git a/libobs/data/area.effect b/libobs/data/area.effect index 04e3c3ab0..1b420e432 100644 --- a/libobs/data/area.effect +++ b/libobs/data/area.effect @@ -1,13 +1,29 @@ uniform float4x4 ViewProj; +uniform float2 base_dimension; uniform float2 base_dimension_i; uniform texture2d image; -struct VertInOut { +sampler_state textureSampler { + Filter = Linear; + AddressU = Clamp; + AddressV = Clamp; +}; + +struct VertData { float4 pos : POSITION; float2 uv : TEXCOORD0; }; -VertInOut VSDefault(VertInOut vert_in) +struct VertInOut { + float2 uv : TEXCOORD0; + float4 pos : POSITION; +}; + +struct FragData { + float2 uv : TEXCOORD0; +}; + +VertInOut VSDefault(VertData vert_in) { VertInOut vert_out; vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj); @@ -15,50 +31,85 @@ VertInOut VSDefault(VertInOut vert_in) return vert_out; } -float4 PSDrawAreaRGBA(VertInOut vert_in) : TARGET +float4 PSDrawAreaRGBA(FragData frag_in) : TARGET { - float4 totalcolor = float4(0.0, 0.0, 0.0, 0.0); - - float2 uv = vert_in.uv; - float2 uvdelta = float2(ddx(uv.x), ddy(uv.y)); + float2 uv = frag_in.uv; + float2 uv_delta = float2(ddx(uv.x), ddy(uv.y)); // Handle potential OpenGL flip. - uvdelta.y = abs(uvdelta.y); + if (obs_glsl_compile) + uv_delta.y = abs(uv_delta.y); - float2 uvhalfdelta = 0.5 * uvdelta; - float2 uvmin = uv - uvhalfdelta; - float2 uvmax = uv + uvhalfdelta; + float2 uv_min = uv - 0.5 * uv_delta; + float2 uv_max = uv_min + uv_delta; - float2 imagesize = 1.0 / base_dimension_i; - float2 loadindexmin = floor(uvmin * imagesize); - float2 loadindexmax = floor(uvmax * imagesize); + float2 load_index_begin = floor(uv_min * base_dimension); + float2 load_index_end = ceil(uv_max * base_dimension); - float2 targetsize = 1.0 / uvdelta; - float2 targetpos = uv * targetsize; - float2 targetposmin = targetpos - 0.5; - float2 targetposmax = targetpos + 0.5; - float2 scale = base_dimension_i * targetsize; + float2 target_dimension = 1.0 / uv_delta; + float2 target_pos = uv * target_dimension; + float2 target_pos_min = target_pos - 0.5; + float2 target_pos_max = target_pos + 0.5; + float2 scale = base_dimension_i * target_dimension; - float loadindexy = loadindexmin.y; + float4 total_color = float4(0.0, 0.0, 0.0, 0.0); + + float load_index_y = load_index_begin.y; do { - float loadindexx = loadindexmin.x; + float source_y_min = load_index_y * scale.y; + float source_y_max = source_y_min + scale.y; + float y_min = max(source_y_min, target_pos_min.y); + float y_max = min(source_y_max, target_pos_max.y); + float height = y_max - y_min; + + float load_index_x = load_index_begin.x; do { - float2 loadindex = float2(loadindexx, loadindexy); - float2 potentialtargetmin = loadindex * scale; - float2 potentialtargetmax = potentialtargetmin + scale; - float2 targetmin = max(potentialtargetmin, targetposmin); - float2 targetmax = min(potentialtargetmax, targetposmax); - float area = (targetmax.x - targetmin.x) * (targetmax.y - targetmin.y); - float4 sample = image.Load(int3(loadindex, 0)); - totalcolor += area * sample; + float source_x_min = load_index_x * scale.x; + float source_x_max = source_x_min + scale.x; + float x_min = max(source_x_min, target_pos_min.x); + float x_max = min(source_x_max, target_pos_max.x); + float width = x_max - x_min; + float area = width * height; - ++loadindexx; - } while (loadindexx <= loadindexmax.x); + float4 color = image.Load(int3(load_index_x, load_index_y, 0)); + total_color += area * color; - ++loadindexy; - } while (loadindexy <= loadindexmax.y); + ++load_index_x; + } while (load_index_x < load_index_end.x); - return totalcolor; + ++load_index_y; + } while (load_index_y < load_index_end.y); + + return total_color; +} + +float4 PSDrawAreaUpscaleRGBA(FragData frag_in) : TARGET +{ + float2 uv = frag_in.uv; + float2 uv_delta = float2(ddx(uv.x), ddy(uv.y)); + + // Handle potential OpenGL flip. + if (obs_glsl_compile) + uv_delta.y = abs(uv_delta.y); + + float2 uv_min = uv - 0.5 * uv_delta; + float2 uv_max = uv_min + uv_delta; + + float2 load_index_first = floor(uv_min * base_dimension); + float2 load_index_last = ceil(uv_max * base_dimension) - 1.0; + + if (load_index_first.x < load_index_last.x) { + float uv_boundary_x = load_index_last.x * base_dimension_i.x; + uv.x = ((uv.x - uv_boundary_x) / uv_delta.x) * base_dimension_i.x + uv_boundary_x; + } else + uv.x = (load_index_first.x + 0.5) * base_dimension_i.x; + if (load_index_first.y < load_index_last.y) { + float uv_boundary_y = load_index_last.y * base_dimension_i.y; + uv.y = ((uv.y - uv_boundary_y) / uv_delta.y) * base_dimension_i.y + uv_boundary_y; + } else + uv.y = (load_index_first.y + 0.5) * base_dimension_i.y; + + return image.Sample(textureSampler, uv); } technique Draw @@ -66,6 +117,15 @@ technique Draw pass { vertex_shader = VSDefault(vert_in); - pixel_shader = PSDrawAreaRGBA(vert_in); + pixel_shader = PSDrawAreaRGBA(frag_in); + } +} + +technique DrawUpscale +{ + pass + { + vertex_shader = VSDefault(vert_in); + pixel_shader = PSDrawAreaUpscaleRGBA(frag_in); } } diff --git a/libobs/obs-scene.c b/libobs/obs-scene.c index 3a9d9233a..d4b1a8e1a 100644 --- a/libobs/obs-scene.c +++ b/libobs/obs-scene.c @@ -470,6 +470,7 @@ static void render_item_texture(struct obs_scene_item *item) enum obs_scale_type type = item->scale_filter; uint32_t cx = gs_texture_get_width(tex); uint32_t cy = gs_texture_get_height(tex); + const char *tech = "Draw"; if (type != OBS_SCALE_DISABLE) { if (type == OBS_SCALE_POINT) { @@ -481,6 +482,7 @@ static void render_item_texture(struct obs_scene_item *item) } else if (!close_float(item->output_scale.x, 1.0f, EPSILON) || !close_float(item->output_scale.y, 1.0f, EPSILON)) { gs_eparam_t *scale_param; + gs_eparam_t *scale_i_param; if (item->output_scale.x < 0.5f || item->output_scale.y < 0.5f) { @@ -491,15 +493,26 @@ static void render_item_texture(struct obs_scene_item *item) effect = obs->video.lanczos_effect; } else if (type == OBS_SCALE_AREA) { effect = obs->video.area_effect; + if ((item->output_scale.x >= 1.0f) && + (item->output_scale.y >= 1.0f)) + tech = "DrawUpscale"; } scale_param = gs_effect_get_param_by_name( - effect, "base_dimension_i"); + effect, "base_dimension"); if (scale_param) { + struct vec2 base_res_i = {(float)cx, (float)cy}; + + gs_effect_set_vec2(scale_param, &base_res_i); + } + + scale_i_param = gs_effect_get_param_by_name( + effect, "base_dimension_i"); + if (scale_i_param) { struct vec2 base_res_i = {1.0f / (float)cx, 1.0f / (float)cy}; - gs_effect_set_vec2(scale_param, &base_res_i); + gs_effect_set_vec2(scale_i_param, &base_res_i); } } } @@ -507,7 +520,7 @@ static void render_item_texture(struct obs_scene_item *item) gs_blend_state_push(); gs_blend_function(GS_BLEND_ONE, GS_BLEND_INVSRCALPHA); - while (gs_effect_loop(effect, "Draw")) + while (gs_effect_loop(effect, tech)) obs_source_draw(tex, 0, 0, 0, 0, 0); gs_blend_state_pop(); diff --git a/libobs/obs-video.c b/libobs/obs-video.c index bf29c4ad4..55ee81cae 100644 --- a/libobs/obs-video.c +++ b/libobs/obs-video.c @@ -208,8 +208,9 @@ static inline void render_output_texture(struct obs_core_video *video) gs_texture_t *target = video->output_texture; uint32_t width = gs_texture_get_width(target); uint32_t height = gs_texture_get_height(target); - struct vec2 base_i; + struct vec2 base, base_i; + vec2_set(&base, (float)video->base_width, (float)video->base_height); vec2_set(&base_i, 1.0f / (float)video->base_width, 1.0f / (float)video->base_height); @@ -225,6 +226,8 @@ static inline void render_output_texture(struct obs_core_video *video) gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image"); gs_eparam_t *matrix = gs_effect_get_param_by_name(effect, "color_matrix"); + gs_eparam_t *bres = + gs_effect_get_param_by_name(effect, "base_dimension"); gs_eparam_t *bres_i = gs_effect_get_param_by_name(effect, "base_dimension_i"); size_t passes, i; @@ -232,6 +235,8 @@ static inline void render_output_texture(struct obs_core_video *video) gs_set_render_target(target, NULL); set_render_size(width, height); + if (bres) + gs_effect_set_vec2(bres, &base); if (bres_i) gs_effect_set_vec2(bres_i, &base_i); diff --git a/plugins/obs-filters/scale-filter.c b/plugins/obs-filters/scale-filter.c index df659bf86..59726b7bf 100644 --- a/plugins/obs-filters/scale-filter.c +++ b/plugins/obs-filters/scale-filter.c @@ -36,7 +36,9 @@ struct scale_filter_data { gs_effect_t *effect; gs_eparam_t *image_param; gs_eparam_t *dimension_param; + gs_eparam_t *dimension_i_param; gs_eparam_t *undistort_factor_param; + struct vec2 dimension; struct vec2 dimension_i; double undistort_factor; int cx_in; @@ -49,6 +51,7 @@ struct scale_filter_data { bool target_valid; bool valid; bool undistort; + bool upscale; bool base_canvas_resolution; }; @@ -203,6 +206,7 @@ static void scale_filter_tick(void *data, float seconds) filter->cy_out = filter->cy_in; } + vec2_set(&filter->dimension, (float)cx, (float)cy); vec2_set(&filter->dimension_i, 1.0f / (float)cx, 1.0f / (float)cy); if (filter->undistort) { @@ -211,6 +215,8 @@ static void scale_filter_tick(void *data, float seconds) filter->undistort_factor = 1.0; } + filter->upscale = false; + /* ------------------------- */ lower_than_2x = filter->cx_out < cx / 2 || filter->cy_out < cy / 2; @@ -232,6 +238,8 @@ static void scale_filter_tick(void *data, float seconds) break; case OBS_SCALE_AREA: type = OBS_EFFECT_AREA; + if ((filter->cx_out >= cx) && (filter->cy_out >= cy)) + filter->upscale = true; break; } } @@ -242,9 +250,12 @@ static void scale_filter_tick(void *data, float seconds) if (type != OBS_EFFECT_DEFAULT) { filter->dimension_param = gs_effect_get_param_by_name( + filter->effect, "base_dimension"); + filter->dimension_i_param = gs_effect_get_param_by_name( filter->effect, "base_dimension_i"); } else { filter->dimension_param = NULL; + filter->dimension_i_param = NULL; } if (type == OBS_EFFECT_BICUBIC || type == OBS_EFFECT_LANCZOS) { @@ -260,7 +271,9 @@ static void scale_filter_tick(void *data, float seconds) static void scale_filter_render(void *data, gs_effect_t *effect) { struct scale_filter_data *filter = data; - const char *technique = filter->undistort ? "DrawUndistort" : "Draw"; + const char *technique = + filter->undistort ? "DrawUndistort" + : (filter->upscale ? "DrawUpscale" : "Draw"); if (!filter->valid || !filter->target_valid) { obs_source_skip_video_filter(filter->context); @@ -272,7 +285,10 @@ static void scale_filter_render(void *data, gs_effect_t *effect) return; if (filter->dimension_param) - gs_effect_set_vec2(filter->dimension_param, + gs_effect_set_vec2(filter->dimension_param, &filter->dimension); + + if (filter->dimension_i_param) + gs_effect_set_vec2(filter->dimension_i_param, &filter->dimension_i); if (filter->undistort_factor_param)