diff --git a/libobs/data/format_conversion.effect b/libobs/data/format_conversion.effect index f1d284144..0f096cd21 100644 --- a/libobs/data/format_conversion.effect +++ b/libobs/data/format_conversion.effect @@ -15,25 +15,12 @@ along with this program. If not, see . ******************************************************************************/ -//#define DEBUGGING - -uniform float u_plane_offset; -uniform float v_plane_offset; - uniform float width; uniform float height; uniform float width_i; -uniform float height_i; uniform float width_d2; -uniform float height_d2; uniform float width_d2_i; -uniform float height_d2_i; -uniform float input_width; -uniform float input_height; -uniform float input_width_i; -uniform float input_height_i; uniform float input_width_i_d2; -uniform float input_height_i_d2; uniform int int_width; uniform int int_input_width; @@ -65,8 +52,17 @@ struct VertTexPos { float4 pos : POSITION; }; +struct VertTexPosWide { + float3 uuv : TEXCOORD0; + float4 pos : POSITION; +}; + struct FragTex { - float2 uv : TEXCOORD0; + float2 uv : TEXCOORD0; +}; + +struct FragTexWide { + float3 uuv : TEXCOORD0; }; FragPos VSPos(uint id : VERTEXID) @@ -82,7 +78,7 @@ FragPos VSPos(uint id : VERTEXID) return vert_out; } -VertTexPos VSPosTex(uint id : VERTEXID) +VertTexPos VSTexPos(uint id : VERTEXID) { float idHigh = float(id >> 1); float idLow = float(id & uint(1)); @@ -99,225 +95,76 @@ VertTexPos VSPosTex(uint id : VERTEXID) return vert_out; } +VertTexPosWide VSTexPosLeft(uint id : VERTEXID) +{ + float idHigh = float(id >> 1); + float idLow = float(id & uint(1)); + + float x = idHigh * 4.0 - 1.0; + float y = idLow * 4.0 - 1.0; + + float u_right = idHigh * 2.0; + float u_left = u_right - width_i; + float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0); + + VertTexPosWide vert_out; + vert_out.uuv.x = u_left; + vert_out.uuv.y = u_right; + vert_out.uuv.z = v; + vert_out.pos = float4(x, y, 0.0, 1.0); + return vert_out; +} + /* used to prevent internal GPU precision issues width fmod in particular */ #define PRECISION_OFFSET 0.2 -float4 PSNV12(FragTex frag_in) : TARGET -{ - float v_mul = floor(frag_in.uv.y * input_height); - - float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0; - byte_offset += PRECISION_OFFSET; - - if (byte_offset < u_plane_offset) { -#ifdef DEBUGGING - return float4(1.0, 1.0, 1.0, 1.0); -#endif - - float lum_u = floor(fmod(byte_offset, width)) * width_i; - float lum_v = floor(byte_offset * width_i) * height_i; - - /* move to texel centers to sample the 4 pixels properly */ - lum_u += width_i * 0.5; - lum_v += height_i * 0.5; - - float2 sample_pos0 = float2(lum_u, lum_v); - float2 sample_pos1 = float2(lum_u += width_i, lum_v); - float2 sample_pos2 = float2(lum_u += width_i, lum_v); - float2 sample_pos3 = float2(lum_u + width_i, lum_v); - - float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb; - float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb; - float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb; - float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb; - - float4 out_val = float4( - dot(color_vec_y.xyz, rgb0) + color_vec_y.w, - dot(color_vec_y.xyz, rgb1) + color_vec_y.w, - dot(color_vec_y.xyz, rgb2) + color_vec_y.w, - dot(color_vec_y.xyz, rgb3) + color_vec_y.w - ); - - return out_val; - } else { -#ifdef DEBUGGING - return float4(0.5, 0.2, 0.5, 0.2); -#endif - - float new_offset = byte_offset - u_plane_offset; - - float ch_u = floor(fmod(new_offset, width)) * width_i; - float ch_v = floor(new_offset * width_i) * height_d2_i; - float width_i2 = width_i*2.0; - - /* move to the borders of each set of 4 pixels to force it - * to do bilinear averaging */ - ch_u += width_i; - ch_v += height_i; - - float2 sample_pos0 = float2(ch_u, ch_v); - float2 sample_pos1 = float2(ch_u + width_i2, ch_v); - - float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb; - float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb; - - return float4( - dot(color_vec_u.xyz, rgb0) + color_vec_u.w, - dot(color_vec_v.xyz, rgb0) + color_vec_v.w, - dot(color_vec_u.xyz, rgb1) + color_vec_u.w, - dot(color_vec_v.xyz, rgb1) + color_vec_v.w - ); - } -} - -float PSNV12_Y(FragPos frag_in) : TARGET +float PS_Y(FragPos frag_in) : TARGET { float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb; float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w; return y; } -float2 PSNV12_UV(FragTex frag_in) : TARGET +float2 PS_UV_Wide(FragTexWide frag_in) : TARGET { - float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb; + float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb; + float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb; + float3 rgb = (rgb_left + rgb_right) * 0.5; float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w; float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w; return float2(u, v); } -float4 PSPlanar420(FragTex frag_in) : TARGET +float PS_U(FragTex frag_in) : TARGET { - float v_mul = floor(frag_in.uv.y * input_height); - - float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0; - byte_offset += PRECISION_OFFSET; - - float2 sample_pos0, sample_pos1, sample_pos2, sample_pos3; - - if (byte_offset < u_plane_offset) { -#ifdef DEBUGGING - return float4(1.0, 1.0, 1.0, 1.0); -#endif - - float lum_u = floor(fmod(byte_offset, width)) * width_i; - float lum_v = floor(byte_offset * width_i) * height_i; - - /* move to texel centers to sample the 4 pixels properly */ - lum_u += width_i * 0.5; - lum_v += height_i * 0.5; - - sample_pos0 = float2(lum_u, lum_v); - sample_pos1 = float2(lum_u += width_i, lum_v); - sample_pos2 = float2(lum_u += width_i, lum_v); - sample_pos3 = float2(lum_u + width_i, lum_v); - - } else { -#ifdef DEBUGGING - return ((byte_offset < v_plane_offset) ? - float4(0.5, 0.5, 0.5, 0.5) : - float4(0.2, 0.2, 0.2, 0.2)); -#endif - - float new_offset = byte_offset - - ((byte_offset < v_plane_offset) ? - u_plane_offset : v_plane_offset); - - float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i; - float ch_v = floor(new_offset * width_d2_i) * height_d2_i; - float width_i2 = width_i*2.0; - - /* move to the borders of each set of 4 pixels to force it - * to do bilinear averaging */ - ch_u += width_i; - ch_v += height_i; - - /* set up coordinates for next chroma line, in case - * (width / 2) % 4 == 2, i.e. the current set of 4 pixels is split - * between the current and the next chroma line; do note that the next - * chroma line is two source lines below the current source line */ - float ch_u_n = 0. + width_i; - float ch_v_n = ch_v + height_i * 3; - - sample_pos0 = float2(ch_u, ch_v); - sample_pos1 = float2(ch_u += width_i2, ch_v); - - ch_u += width_i2; - // check if ch_u overflowed the current source and chroma line - if (ch_u > 1.0) { - sample_pos2 = float2(ch_u_n, ch_v_n); - sample_pos2 = float2(ch_u_n + width_i2, ch_v_n); - } else { - sample_pos2 = float2(ch_u, ch_v); - sample_pos3 = float2(ch_u + width_i2, ch_v); - } - } - - float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb; - float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb; - float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb; - float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb; - - float4 color_vec; - if (byte_offset < u_plane_offset) - color_vec = color_vec_y; - else if (byte_offset < v_plane_offset) - color_vec = color_vec_u; - else - color_vec = color_vec_v; - - return float4( - dot(color_vec.xyz, rgb0) + color_vec.w, - dot(color_vec.xyz, rgb1) + color_vec.w, - dot(color_vec.xyz, rgb2) + color_vec.w, - dot(color_vec.xyz, rgb3) + color_vec.w - ); + float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb; + float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w; + return u; } -float4 PSPlanar444(FragTex frag_in) : TARGET +float PS_V(FragTex frag_in) : TARGET { - float v_mul = floor(frag_in.uv.y * input_height); + float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb; + float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w; + return v; +} - float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0; - byte_offset += PRECISION_OFFSET; +float PS_U_Wide(FragTexWide frag_in) : TARGET +{ + float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb; + float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb; + float3 rgb = (rgb_left + rgb_right) * 0.5; + float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w; + return u; +} - float new_byte_offset = byte_offset; - - if (byte_offset >= v_plane_offset) - new_byte_offset -= v_plane_offset; - else if (byte_offset >= u_plane_offset) - new_byte_offset -= u_plane_offset; - - float u_val = floor(fmod(new_byte_offset, width)) * width_i; - float v_val = floor(new_byte_offset * width_i) * height_i; - - /* move to texel centers to sample the 4 pixels properly */ - u_val += width_i * 0.5; - v_val += height_i * 0.5; - - float2 sample_pos0 = float2(u_val, v_val); - float2 sample_pos1 = float2(u_val += width_i, v_val); - float2 sample_pos2 = float2(u_val += width_i, v_val); - float2 sample_pos3 = float2(u_val + width_i, v_val); - - float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb; - float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb; - float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb; - float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb; - - float4 color_vec; - if (byte_offset < u_plane_offset) - color_vec = color_vec_y; - else if (byte_offset < v_plane_offset) - color_vec = color_vec_u; - else - color_vec = color_vec_v; - - return float4( - dot(color_vec.xyz, rgb0) + color_vec.w, - dot(color_vec.xyz, rgb1) + color_vec.w, - dot(color_vec.xyz, rgb2) + color_vec.w, - dot(color_vec.xyz, rgb3) + color_vec.w - ); +float PS_V_Wide(FragTexWide frag_in) : TARGET +{ + float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb; + float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb; + float3 rgb = (rgb_left + rgb_right) * 0.5; + float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w; + return v; } float GetIntOffsetColor(int offset) @@ -473,30 +320,48 @@ float4 PSBGR3_Full(FragTex frag_in) : TARGET return float4(rgb, 1.0); } -technique Planar420 +technique Planar_Y { pass { - vertex_shader = VSPosTex(id); - pixel_shader = PSPlanar420(frag_in); + vertex_shader = VSPos(id); + pixel_shader = PS_Y(frag_in); } } -technique Planar444 +technique Planar_U { pass { - vertex_shader = VSPosTex(id); - pixel_shader = PSPlanar444(frag_in); + vertex_shader = VSTexPos(id); + pixel_shader = PS_U(frag_in); } } -technique NV12 +technique Planar_V { pass { - vertex_shader = VSPosTex(id); - pixel_shader = PSNV12(frag_in); + vertex_shader = VSTexPos(id); + pixel_shader = PS_V(frag_in); + } +} + +technique Planar_U_Left +{ + pass + { + vertex_shader = VSTexPosLeft(id); + pixel_shader = PS_U_Wide(frag_in); + } +} + +technique Planar_V_Left +{ + pass + { + vertex_shader = VSTexPosLeft(id); + pixel_shader = PS_V_Wide(frag_in); } } @@ -505,7 +370,7 @@ technique NV12_Y pass { vertex_shader = VSPos(id); - pixel_shader = PSNV12_Y(frag_in); + pixel_shader = PS_Y(frag_in); } } @@ -513,8 +378,8 @@ technique NV12_UV { pass { - vertex_shader = VSPosTex(id); - pixel_shader = PSNV12_UV(frag_in); + vertex_shader = VSTexPosLeft(id); + pixel_shader = PS_UV_Wide(frag_in); } } @@ -522,7 +387,7 @@ technique UYVY_Reverse { pass { - vertex_shader = VSPosTex(id); + vertex_shader = VSTexPos(id); pixel_shader = PSPacked422_Reverse(frag_in, 2, 0, 1, 3); } } @@ -531,7 +396,7 @@ technique YUY2_Reverse { pass { - vertex_shader = VSPosTex(id); + vertex_shader = VSTexPos(id); pixel_shader = PSPacked422_Reverse(frag_in, 1, 3, 2, 0); } } @@ -540,7 +405,7 @@ technique YVYU_Reverse { pass { - vertex_shader = VSPosTex(id); + vertex_shader = VSTexPos(id); pixel_shader = PSPacked422_Reverse(frag_in, 3, 1, 2, 0); } } @@ -549,7 +414,7 @@ technique I420_Reverse { pass { - vertex_shader = VSPosTex(id); + vertex_shader = VSTexPos(id); pixel_shader = PSPlanar420_Reverse(frag_in); } } @@ -558,7 +423,7 @@ technique I422_Reverse { pass { - vertex_shader = VSPosTex(id); + vertex_shader = VSTexPos(id); pixel_shader = PSPlanar422_Reverse(frag_in); } } @@ -567,7 +432,7 @@ technique I444_Reverse { pass { - vertex_shader = VSPosTex(id); + vertex_shader = VSTexPos(id); pixel_shader = PSPlanar444_Reverse(frag_in); } } @@ -576,7 +441,7 @@ technique NV12_Reverse { pass { - vertex_shader = VSPosTex(id); + vertex_shader = VSTexPos(id); pixel_shader = PSNV12_Reverse(frag_in); } } @@ -585,7 +450,7 @@ technique Y800_Limited { pass { - vertex_shader = VSPosTex(id); + vertex_shader = VSTexPos(id); pixel_shader = PSY800_Limited(frag_in); } } @@ -594,7 +459,7 @@ technique Y800_Full { pass { - vertex_shader = VSPosTex(id); + vertex_shader = VSTexPos(id); pixel_shader = PSY800_Full(frag_in); } } @@ -603,7 +468,7 @@ technique RGB_Limited { pass { - vertex_shader = VSPosTex(id); + vertex_shader = VSTexPos(id); pixel_shader = PSRGB_Limited(frag_in); } } @@ -612,7 +477,7 @@ technique BGR3_Limited { pass { - vertex_shader = VSPosTex(id); + vertex_shader = VSTexPos(id); pixel_shader = PSBGR3_Limited(frag_in); } } @@ -621,7 +486,7 @@ technique BGR3_Full { pass { - vertex_shader = VSPosTex(id); + vertex_shader = VSTexPos(id); pixel_shader = PSBGR3_Full(frag_in); } } diff --git a/libobs/obs-internal.h b/libobs/obs-internal.h index 06377c25f..4efb93469 100644 --- a/libobs/obs-internal.h +++ b/libobs/obs-internal.h @@ -37,6 +37,7 @@ #include "obs.h" #define NUM_TEXTURES 2 +#define NUM_CHANNELS 3 #define MICROSECOND_DEN 1000000 #define NUM_ENCODE_TEXTURES 3 #define NUM_ENCODE_TEXTURE_FRAMES_TO_WAIT 1 @@ -235,11 +236,10 @@ struct obs_tex_frame { struct obs_core_video { graphics_t *graphics; - gs_stagesurf_t *copy_surfaces[NUM_TEXTURES]; + gs_stagesurf_t *copy_surfaces[NUM_TEXTURES][NUM_CHANNELS]; gs_texture_t *render_texture; gs_texture_t *output_texture; - gs_texture_t *convert_texture; - gs_texture_t *convert_uv_texture; + gs_texture_t *convert_textures[NUM_CHANNELS]; bool texture_rendered; bool textures_copied[NUM_TEXTURES]; bool texture_converted; @@ -258,7 +258,7 @@ struct obs_core_video { gs_effect_t *bilinear_lowres_effect; gs_effect_t *premultiplied_alpha_effect; gs_samplerstate_t *point_sampler; - gs_stagesurf_t *mapped_surface; + gs_stagesurf_t *mapped_surfaces[NUM_CHANNELS]; int cur_texture; long raw_active; long gpu_encoder_active; @@ -283,11 +283,9 @@ struct obs_core_video { bool thread_initialized; bool gpu_conversion; - const char *conversion_tech; - uint32_t conversion_height; - uint32_t plane_offsets[3]; - uint32_t plane_sizes[3]; - uint32_t plane_linewidth[3]; + const char *conversion_techs[NUM_CHANNELS]; + bool conversion_needed; + float conversion_width_i; uint32_t output_width; uint32_t output_height; diff --git a/libobs/obs-video.c b/libobs/obs-video.c index b402c3be8..99bf4f9fe 100644 --- a/libobs/obs-video.c +++ b/libobs/obs-video.c @@ -109,9 +109,11 @@ static inline void set_render_size(uint32_t width, uint32_t height) static inline void unmap_last_surface(struct obs_core_video *video) { - if (video->mapped_surface) { - gs_stagesurface_unmap(video->mapped_surface); - video->mapped_surface = NULL; + for (int c = 0; c < NUM_CHANNELS; ++c) { + if (video->mapped_surfaces[c]) { + gs_stagesurface_unmap(video->mapped_surfaces[c]); + video->mapped_surfaces[c] = NULL; + } } } @@ -264,10 +266,24 @@ static inline gs_texture_t *render_output_texture(struct obs_core_video *video) return target; } -static inline void set_eparam(gs_effect_t *effect, const char *name, float val) +static void render_convert_plane(gs_effect_t *effect, gs_texture_t *texture, + gs_texture_t *target, const char *tech_name) { - gs_eparam_t *param = gs_effect_get_param_by_name(effect, name); - gs_effect_set_float(param, val); + gs_technique_t *tech = gs_effect_get_technique(effect, tech_name); + + const uint32_t width = gs_texture_get_width(target); + const uint32_t height = gs_texture_get_height(target); + + gs_set_render_target(target, NULL); + set_render_size(width, height); + + size_t passes = gs_technique_begin(tech); + for (size_t i = 0; i < passes; i++) { + gs_technique_begin_pass(tech, i); + gs_draw(GS_TRIS, 0, 3); + gs_technique_end_pass(tech); + } + gs_technique_end(tech); } static const char *render_convert_texture_name = "render_convert_texture"; @@ -276,11 +292,6 @@ static void render_convert_texture(struct obs_core_video *video, { profile_start(render_convert_texture_name); - gs_texture_t *target = video->convert_texture; - float fwidth = (float)video->output_width; - float fheight = (float)video->output_height; - size_t passes, i; - gs_effect_t *effect = video->conversion_effect; gs_eparam_t *color_vec_y = gs_effect_get_param_by_name(effect, "color_vec_y"); @@ -289,20 +300,7 @@ static void render_convert_texture(struct obs_core_video *video, gs_eparam_t *color_vec_v = gs_effect_get_param_by_name(effect, "color_vec_v"); gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image"); - gs_technique_t *tech = - gs_effect_get_technique(effect, video->conversion_tech); - - set_eparam(effect, "u_plane_offset", (float)video->plane_offsets[1]); - set_eparam(effect, "v_plane_offset", (float)video->plane_offsets[2]); - set_eparam(effect, "width", fwidth); - set_eparam(effect, "height", fheight); - set_eparam(effect, "width_i", 1.0f / fwidth); - set_eparam(effect, "height_i", 1.0f / fheight); - set_eparam(effect, "width_d2", fwidth * 0.5f); - set_eparam(effect, "height_d2", fheight * 0.5f); - set_eparam(effect, "width_d2_i", 1.0f / (fwidth * 0.5f)); - set_eparam(effect, "height_d2_i", 1.0f / (fheight * 0.5f)); - set_eparam(effect, "input_height", (float)video->conversion_height); + gs_eparam_t *width_i = gs_effect_get_param_by_name(effect, "width_i"); struct vec4 vec_y, vec_u, vec_v; vec4_set(&vec_y, video->color_matrix[4], video->color_matrix[5], @@ -311,23 +309,39 @@ static void render_convert_texture(struct obs_core_video *video, video->color_matrix[2], video->color_matrix[3]); vec4_set(&vec_v, video->color_matrix[8], video->color_matrix[9], video->color_matrix[10], video->color_matrix[11]); - gs_effect_set_vec4(color_vec_y, &vec_y); - gs_effect_set_vec4(color_vec_u, &vec_u); - gs_effect_set_vec4(color_vec_v, &vec_v); - - gs_effect_set_texture(image, texture); - - gs_set_render_target(target, NULL); - set_render_size(video->output_width, video->conversion_height); gs_enable_blending(false); - passes = gs_technique_begin(tech); - for (i = 0; i < passes; i++) { - gs_technique_begin_pass(tech, i); - gs_draw(GS_TRIS, 0, 3); - gs_technique_end_pass(tech); + + if (video->convert_textures[0]) { + gs_effect_set_texture(image, texture); + gs_effect_set_vec4(color_vec_y, &vec_y); + render_convert_plane(effect, texture, + video->convert_textures[0], + video->conversion_techs[0]); + + if (video->convert_textures[1]) { + gs_effect_set_texture(image, texture); + gs_effect_set_vec4(color_vec_u, &vec_u); + if (!video->convert_textures[2]) + gs_effect_set_vec4(color_vec_v, &vec_v); + gs_effect_set_float(width_i, video->conversion_width_i); + render_convert_plane(effect, texture, + video->convert_textures[1], + video->conversion_techs[1]); + + if (video->convert_textures[2]) { + gs_effect_set_texture(image, texture); + gs_effect_set_vec4(color_vec_v, &vec_v); + gs_effect_set_float(width_i, + video->conversion_width_i); + render_convert_plane( + effect, texture, + video->convert_textures[2], + video->conversion_techs[2]); + } + } } - gs_technique_end(tech); + gs_enable_blending(true); video->texture_converted = true; @@ -335,90 +349,32 @@ static void render_convert_texture(struct obs_core_video *video, profile_end(render_convert_texture_name); } -static void render_nv12(struct obs_core_video *video, gs_texture_t *texture, - gs_texture_t *target, const char *tech_name, - uint32_t width, uint32_t height) -{ - gs_effect_t *effect = video->conversion_effect; - gs_eparam_t *color_vec_y = - gs_effect_get_param_by_name(effect, "color_vec_y"); - gs_eparam_t *color_vec_u = - gs_effect_get_param_by_name(effect, "color_vec_u"); - gs_eparam_t *color_vec_v = - gs_effect_get_param_by_name(effect, "color_vec_v"); - gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image"); - gs_technique_t *tech = gs_effect_get_technique(effect, tech_name); - size_t passes, i; - - struct vec4 vec_y, vec_u, vec_v; - vec4_set(&vec_y, video->color_matrix[4], video->color_matrix[5], - video->color_matrix[6], video->color_matrix[7]); - vec4_set(&vec_u, video->color_matrix[0], video->color_matrix[1], - video->color_matrix[2], video->color_matrix[3]); - vec4_set(&vec_v, video->color_matrix[8], video->color_matrix[9], - video->color_matrix[10], video->color_matrix[11]); - gs_effect_set_vec4(color_vec_y, &vec_y); - gs_effect_set_vec4(color_vec_u, &vec_u); - gs_effect_set_vec4(color_vec_v, &vec_v); - - gs_effect_set_texture(image, texture); - - gs_set_render_target(target, NULL); - set_render_size(width, height); - - gs_enable_blending(false); - passes = gs_technique_begin(tech); - for (i = 0; i < passes; i++) { - gs_technique_begin_pass(tech, i); - gs_draw(GS_TRIS, 0, 3); - gs_technique_end_pass(tech); - } - gs_technique_end(tech); - gs_enable_blending(true); -} - -static const char *render_convert_nv12_name = "render_convert_texture_nv12"; -static void render_convert_texture_nv12(struct obs_core_video *video, - gs_texture_t *texture) -{ - profile_start(render_convert_nv12_name); - - render_nv12(video, texture, video->convert_texture, "NV12_Y", - video->output_width, video->output_height); - render_nv12(video, texture, video->convert_uv_texture, "NV12_UV", - video->output_width / 2, video->output_height / 2); - - video->texture_converted = true; - - profile_end(render_convert_nv12_name); -} - static const char *stage_output_texture_name = "stage_output_texture"; static inline void stage_output_texture(struct obs_core_video *video, - gs_texture_t *texture, int cur_texture) + int cur_texture) { profile_start(stage_output_texture_name); - bool texture_ready; - gs_stagesurf_t *copy = video->copy_surfaces[cur_texture]; - - if (video->gpu_conversion) { - texture = video->convert_texture; - texture_ready = video->texture_converted; - } else { - texture_ready = true; - } - unmap_last_surface(video); - if (!texture_ready) - goto end; + if (!video->gpu_conversion) { + gs_stagesurf_t *copy = video->copy_surfaces[cur_texture][0]; + if (copy) + gs_stage_texture(copy, video->output_texture); - gs_stage_texture(copy, texture); + video->textures_copied[cur_texture] = true; + } else if (video->texture_converted) { + for (int i = 0; i < NUM_CHANNELS; i++) { + gs_stagesurf_t *copy = + video->copy_surfaces[cur_texture][i]; + if (copy) + gs_stage_texture(copy, + video->convert_textures[i]); + } - video->textures_copied[cur_texture] = true; + video->textures_copied[cur_texture] = true; + } -end: profile_end(stage_output_texture_name); } @@ -458,13 +414,13 @@ static inline bool queue_frame(struct obs_core_video *video, bool raw_active, * reason. otherwise, it goes to the 'duplicate' case above, which * will ensure better performance. */ if (raw_active || vframe_info->count > 1) { - gs_copy_texture(tf.tex, video->convert_texture); + gs_copy_texture(tf.tex, video->convert_textures[0]); } else { - gs_texture_t *tex = video->convert_texture; - gs_texture_t *tex_uv = video->convert_uv_texture; + gs_texture_t *tex = video->convert_textures[0]; + gs_texture_t *tex_uv = video->convert_textures[1]; - video->convert_texture = tf.tex; - video->convert_uv_texture = tf.tex_uv; + video->convert_textures[0] = tf.tex; + video->convert_textures[1] = tf.tex_uv; tf.tex = tex; tf.tex_uv = tex_uv; @@ -529,17 +485,12 @@ static inline void render_video(struct obs_core_video *video, bool raw_active, gs_texture_t *texture = render_output_texture(video); #ifdef _WIN32 - if (gpu_active) { + if (gpu_active) gs_flush(); - } #endif - if (video->gpu_conversion) { - if (video->using_nv12_tex) - render_convert_texture_nv12(video, texture); - else - render_convert_texture(video, texture); - } + if (video->gpu_conversion) + render_convert_texture(video, texture); #ifdef _WIN32 if (gpu_active) { @@ -547,8 +498,9 @@ static inline void render_video(struct obs_core_video *video, bool raw_active, output_gpu_encoders(video, raw_active); } #endif + if (raw_active) - stage_output_texture(video, texture, cur_texture); + stage_output_texture(video, cur_texture); } gs_set_render_target(NULL, NULL); @@ -560,73 +512,41 @@ static inline void render_video(struct obs_core_video *video, bool raw_active, static inline bool download_frame(struct obs_core_video *video, int prev_texture, struct video_data *frame) { - gs_stagesurf_t *surface = video->copy_surfaces[prev_texture]; - if (!video->textures_copied[prev_texture]) return false; - if (!gs_stagesurface_map(surface, &frame->data[0], &frame->linesize[0])) - return false; + for (int channel = 0; channel < NUM_CHANNELS; ++channel) { + gs_stagesurf_t *surface = + video->copy_surfaces[prev_texture][channel]; + if (surface) { + if (!gs_stagesurface_map(surface, &frame->data[channel], + &frame->linesize[channel])) + return false; - video->mapped_surface = surface; + video->mapped_surfaces[channel] = surface; + } + } return true; } -static inline uint32_t calc_linesize(uint32_t pos, uint32_t linesize) +static const uint8_t *set_gpu_converted_plane(uint32_t width, uint32_t height, + uint32_t linesize_input, + uint32_t linesize_output, + const uint8_t *in, uint8_t *out) { - uint32_t size = pos % linesize; - return size ? size : linesize; -} - -static void copy_dealign(uint8_t *dst, uint32_t dst_pos, uint32_t dst_linesize, - const uint8_t *src, uint32_t src_pos, - uint32_t src_linesize, uint32_t remaining) -{ - while (remaining) { - uint32_t src_remainder = src_pos % src_linesize; - uint32_t dst_offset = dst_linesize - src_remainder; - uint32_t src_offset = src_linesize - src_remainder; - - if (remaining < dst_offset) { - memcpy(dst + dst_pos, src + src_pos, remaining); - src_pos += remaining; - dst_pos += remaining; - remaining = 0; - } else { - memcpy(dst + dst_pos, src + src_pos, dst_offset); - src_pos += src_offset; - dst_pos += dst_offset; - remaining -= dst_offset; + if ((width == linesize_input) && (width == linesize_output)) { + size_t total = width * height; + memcpy(out, in, total); + in += total; + } else { + for (size_t y = 0; y < height; y++) { + memcpy(out, in, width); + out += linesize_output; + in += linesize_input; } } -} -static inline uint32_t make_aligned_linesize_offset(uint32_t offset, - uint32_t dst_linesize, - uint32_t src_linesize) -{ - uint32_t remainder = offset % dst_linesize; - return (offset / dst_linesize) * src_linesize + remainder; -} - -static void fix_gpu_converted_alignment(struct obs_core_video *video, - struct video_frame *output, - const struct video_data *input) -{ - uint32_t src_linesize = input->linesize[0]; - uint32_t dst_linesize = output->linesize[0] * 4; - uint32_t src_pos = 0; - - for (size_t i = 0; i < 3; i++) { - if (video->plane_linewidth[i] == 0) - break; - - src_pos = make_aligned_linesize_offset( - video->plane_offsets[i], dst_linesize, src_linesize); - - copy_dealign(output->data[i], 0, dst_linesize, input->data[0], - src_pos, src_linesize, video->plane_sizes[i]); - } + return in; } static void set_gpu_converted_data(struct obs_core_video *video, @@ -634,41 +554,91 @@ static void set_gpu_converted_data(struct obs_core_video *video, const struct video_data *input, const struct video_output_info *info) { - if (input->linesize[0] == video->output_width * 4) { - struct video_frame frame; + if (video->using_nv12_tex) { + const uint32_t width = info->width; + const uint32_t height = info->height; - for (size_t i = 0; i < 3; i++) { - if (video->plane_linewidth[i] == 0) - break; - - frame.linesize[i] = video->plane_linewidth[i]; - frame.data[i] = - input->data[0] + video->plane_offsets[i]; - } - - video_frame_copy(output, &frame, info->format, info->height); - - } else if (video->using_nv12_tex) { - size_t width = info->width; - size_t height = info->height; - size_t height_d2 = height / 2; - uint8_t *out_y = output->data[0]; - uint8_t *out_uv = output->data[1]; - uint8_t *in = input->data[0]; - - for (size_t y = 0; y < height; y++) { - memcpy(out_y, in, width); - out_y += output->linesize[0]; - in += input->linesize[0]; - } - for (size_t y = 0; y < height_d2; y++) { - memcpy(out_uv, in, width); - out_uv += output->linesize[0]; - in += input->linesize[0]; - } + const uint8_t *const in_uv = set_gpu_converted_plane( + width, height, input->linesize[0], output->linesize[0], + input->data[0], output->data[0]); + const uint32_t height_d2 = height / 2; + set_gpu_converted_plane(width, height_d2, input->linesize[0], + output->linesize[1], in_uv, + output->data[1]); } else { - fix_gpu_converted_alignment(video, output, input); + switch (info->format) { + case VIDEO_FORMAT_I420: { + const uint32_t width = info->width; + const uint32_t height = info->height; + + set_gpu_converted_plane(width, height, + input->linesize[0], + output->linesize[0], + input->data[0], + output->data[0]); + + const uint32_t width_d2 = width / 2; + const uint32_t height_d2 = height / 2; + + set_gpu_converted_plane(width_d2, height_d2, + input->linesize[1], + output->linesize[1], + input->data[1], + output->data[1]); + + set_gpu_converted_plane(width_d2, height_d2, + input->linesize[2], + output->linesize[2], + input->data[2], + output->data[2]); + + break; + } + case VIDEO_FORMAT_NV12: { + const uint32_t width = info->width; + const uint32_t height = info->height; + + set_gpu_converted_plane(width, height, + input->linesize[0], + output->linesize[0], + input->data[0], + output->data[0]); + + const uint32_t height_d2 = height / 2; + set_gpu_converted_plane(width, height_d2, + input->linesize[1], + output->linesize[1], + input->data[1], + output->data[1]); + + break; + } + case VIDEO_FORMAT_I444: { + const uint32_t width = info->width; + const uint32_t height = info->height; + + set_gpu_converted_plane(width, height, + input->linesize[0], + output->linesize[0], + input->data[0], + output->data[0]); + + set_gpu_converted_plane(width, height, + input->linesize[1], + output->linesize[1], + input->data[1], + output->data[1]); + + set_gpu_converted_plane(width, height, + input->linesize[2], + output->linesize[2], + input->data[2], + output->data[2]); + + break; + } + } } } diff --git a/libobs/obs.c b/libobs/obs.c index 1d33fcfb1..8f694f2a9 100644 --- a/libobs/obs.c +++ b/libobs/obs.c @@ -42,117 +42,35 @@ static inline void make_video_info(struct video_output_info *vi, vi->cache_size = 6; } -#define PIXEL_SIZE 4 - -#define GET_ALIGN(val, align) (((val) + (align - 1)) & ~(align - 1)) - -static inline void set_420p_sizes(const struct obs_video_info *ovi) -{ - struct obs_core_video *video = &obs->video; - uint32_t chroma_pixels; - uint32_t total_bytes; - - chroma_pixels = (ovi->output_width * ovi->output_height / 4); - chroma_pixels = GET_ALIGN(chroma_pixels, PIXEL_SIZE); - - video->plane_offsets[0] = 0; - video->plane_offsets[1] = ovi->output_width * ovi->output_height; - video->plane_offsets[2] = video->plane_offsets[1] + chroma_pixels; - - video->plane_linewidth[0] = ovi->output_width; - video->plane_linewidth[1] = ovi->output_width / 2; - video->plane_linewidth[2] = ovi->output_width / 2; - - video->plane_sizes[0] = video->plane_offsets[1]; - video->plane_sizes[1] = video->plane_sizes[0] / 4; - video->plane_sizes[2] = video->plane_sizes[1]; - - total_bytes = video->plane_offsets[2] + chroma_pixels; - - video->conversion_height = - (total_bytes / PIXEL_SIZE + ovi->output_width - 1) / - ovi->output_width; - - video->conversion_height = GET_ALIGN(video->conversion_height, 2); - video->conversion_tech = "Planar420"; -} - -static inline void set_nv12_sizes(const struct obs_video_info *ovi) -{ - struct obs_core_video *video = &obs->video; - uint32_t chroma_pixels; - uint32_t total_bytes; - - chroma_pixels = (ovi->output_width * ovi->output_height / 2); - chroma_pixels = GET_ALIGN(chroma_pixels, PIXEL_SIZE); - - video->plane_offsets[0] = 0; - video->plane_offsets[1] = ovi->output_width * ovi->output_height; - - video->plane_linewidth[0] = ovi->output_width; - video->plane_linewidth[1] = ovi->output_width; - - video->plane_sizes[0] = video->plane_offsets[1]; - video->plane_sizes[1] = video->plane_sizes[0] / 2; - - total_bytes = video->plane_offsets[1] + chroma_pixels; - - video->conversion_height = - (total_bytes / PIXEL_SIZE + ovi->output_width - 1) / - ovi->output_width; - - video->conversion_height = GET_ALIGN(video->conversion_height, 2); - video->conversion_tech = "NV12"; -} - -static inline void set_444p_sizes(const struct obs_video_info *ovi) -{ - struct obs_core_video *video = &obs->video; - uint32_t chroma_pixels; - uint32_t total_bytes; - - chroma_pixels = (ovi->output_width * ovi->output_height); - chroma_pixels = GET_ALIGN(chroma_pixels, PIXEL_SIZE); - - video->plane_offsets[0] = 0; - video->plane_offsets[1] = chroma_pixels; - video->plane_offsets[2] = chroma_pixels + chroma_pixels; - - video->plane_linewidth[0] = ovi->output_width; - video->plane_linewidth[1] = ovi->output_width; - video->plane_linewidth[2] = ovi->output_width; - - video->plane_sizes[0] = chroma_pixels; - video->plane_sizes[1] = chroma_pixels; - video->plane_sizes[2] = chroma_pixels; - - total_bytes = video->plane_offsets[2] + chroma_pixels; - - video->conversion_height = - (total_bytes / PIXEL_SIZE + ovi->output_width - 1) / - ovi->output_width; - - video->conversion_height = GET_ALIGN(video->conversion_height, 2); - video->conversion_tech = "Planar444"; -} - static inline void calc_gpu_conversion_sizes(const struct obs_video_info *ovi) { - obs->video.conversion_height = 0; - memset(obs->video.plane_offsets, 0, sizeof(obs->video.plane_offsets)); - memset(obs->video.plane_sizes, 0, sizeof(obs->video.plane_sizes)); - memset(obs->video.plane_linewidth, 0, - sizeof(obs->video.plane_linewidth)); + struct obs_core_video *video = &obs->video; + + video->conversion_needed = false; + video->conversion_techs[0] = NULL; + video->conversion_techs[1] = NULL; + video->conversion_techs[2] = NULL; + video->conversion_width_i = 0.f; switch ((uint32_t)ovi->output_format) { case VIDEO_FORMAT_I420: - set_420p_sizes(ovi); + video->conversion_needed = true; + video->conversion_techs[0] = "Planar_Y"; + video->conversion_techs[1] = "Planar_U_Left"; + video->conversion_techs[2] = "Planar_V_Left"; + video->conversion_width_i = 1.f / (float)ovi->output_width; break; case VIDEO_FORMAT_NV12: - set_nv12_sizes(ovi); + video->conversion_needed = true; + video->conversion_techs[0] = "NV12_Y"; + video->conversion_techs[1] = "NV12_UV"; + video->conversion_width_i = 1.f / (float)ovi->output_width; break; case VIDEO_FORMAT_I444: - set_444p_sizes(ovi); + video->conversion_needed = true; + video->conversion_techs[0] = "Planar_Y"; + video->conversion_techs[1] = "Planar_U"; + video->conversion_techs[2] = "Planar_V"; break; } } @@ -167,7 +85,7 @@ static bool obs_init_gpu_conversion(struct obs_video_info *ovi) ? gs_nv12_available() : false; - if (!video->conversion_height) { + if (!video->conversion_needed) { blog(LOG_INFO, "GPU conversion not available for format: %u", (unsigned int)ovi->output_format); video->gpu_conversion = false; @@ -183,23 +101,96 @@ static bool obs_init_gpu_conversion(struct obs_video_info *ovi) #ifdef _WIN32 if (video->using_nv12_tex) { - gs_texture_create_nv12(&video->convert_texture, - &video->convert_uv_texture, + gs_texture_create_nv12(&video->convert_textures[0], + &video->convert_textures[1], ovi->output_width, ovi->output_height, GS_RENDER_TARGET | GS_SHARED_KM_TEX); - if (!video->convert_uv_texture) - return false; } else { #endif - video->convert_texture = gs_texture_create( - ovi->output_width, video->conversion_height, GS_RGBA, 1, - NULL, GS_RENDER_TARGET); + video->convert_textures[0] = + gs_texture_create(ovi->output_width, ovi->output_height, + GS_R8, 1, NULL, GS_RENDER_TARGET); + + const struct video_output_info *info = + video_output_get_info(video->video); + switch (info->format) { + case VIDEO_FORMAT_I420: + video->convert_textures[1] = gs_texture_create( + ovi->output_width / 2, ovi->output_height / 2, + GS_R8, 1, NULL, GS_RENDER_TARGET); + video->convert_textures[2] = gs_texture_create( + ovi->output_width / 2, ovi->output_height / 2, + GS_R8, 1, NULL, GS_RENDER_TARGET); + if (!video->convert_textures[2]) + return false; + break; + case VIDEO_FORMAT_NV12: + video->convert_textures[1] = gs_texture_create( + ovi->output_width / 2, ovi->output_height / 2, + GS_R8G8, 1, NULL, GS_RENDER_TARGET); + break; + case VIDEO_FORMAT_I444: + video->convert_textures[1] = gs_texture_create( + ovi->output_width, ovi->output_height, GS_R8, 1, + NULL, GS_RENDER_TARGET); + video->convert_textures[2] = gs_texture_create( + ovi->output_width, ovi->output_height, GS_R8, 1, + NULL, GS_RENDER_TARGET); + if (!video->convert_textures[2]) + return false; + break; + } #ifdef _WIN32 } #endif - if (!video->convert_texture) + if (!video->convert_textures[0]) return false; + if (!video->convert_textures[1]) + return false; + + return true; +} + +static bool obs_init_gpu_copy_surfaces(struct obs_video_info *ovi, size_t i) +{ + struct obs_core_video *video = &obs->video; + + video->copy_surfaces[i][0] = gs_stagesurface_create( + ovi->output_width, ovi->output_height, GS_R8); + if (!video->copy_surfaces[i][0]) + return false; + + const struct video_output_info *info = + video_output_get_info(video->video); + switch (info->format) { + case VIDEO_FORMAT_I420: + video->copy_surfaces[i][1] = gs_stagesurface_create( + ovi->output_width / 2, ovi->output_height / 2, GS_R8); + if (!video->copy_surfaces[i][1]) + return false; + video->copy_surfaces[i][2] = gs_stagesurface_create( + ovi->output_width / 2, ovi->output_height / 2, GS_R8); + if (!video->copy_surfaces[i][2]) + return false; + break; + case VIDEO_FORMAT_NV12: + video->copy_surfaces[i][1] = gs_stagesurface_create( + ovi->output_width / 2, ovi->output_height / 2, GS_R8G8); + if (!video->copy_surfaces[i][1]) + return false; + break; + case VIDEO_FORMAT_I444: + video->copy_surfaces[i][1] = gs_stagesurface_create( + ovi->output_width, ovi->output_height, GS_R8); + if (!video->copy_surfaces[i][1]) + return false; + video->copy_surfaces[i][2] = gs_stagesurface_create( + ovi->output_width, ovi->output_height, GS_R8); + if (!video->copy_surfaces[i][2]) + return false; + break; + } return true; } @@ -207,25 +198,29 @@ static bool obs_init_gpu_conversion(struct obs_video_info *ovi) static bool obs_init_textures(struct obs_video_info *ovi) { struct obs_core_video *video = &obs->video; - uint32_t output_height = video->gpu_conversion - ? video->conversion_height - : ovi->output_height; - size_t i; - for (i = 0; i < NUM_TEXTURES; i++) { + for (size_t i = 0; i < NUM_TEXTURES; i++) { #ifdef _WIN32 if (video->using_nv12_tex) { - video->copy_surfaces[i] = gs_stagesurface_create_nv12( - ovi->output_width, ovi->output_height); - if (!video->copy_surfaces[i]) + video->copy_surfaces[i][0] = + gs_stagesurface_create_nv12(ovi->output_width, + ovi->output_height); + if (!video->copy_surfaces[i][0]) return false; } else { #endif - video->copy_surfaces[i] = gs_stagesurface_create( - ovi->output_width, output_height, GS_RGBA); - if (!video->copy_surfaces[i]) - return false; + if (video->gpu_conversion) { + if (!obs_init_gpu_copy_surfaces(ovi, i)) + return false; + } else { + video->copy_surfaces[i][0] = + gs_stagesurface_create( + ovi->output_width, + ovi->output_height, GS_RGBA); + if (!video->copy_surfaces[i][0]) + return false; + } #ifdef _WIN32 } #endif @@ -465,23 +460,45 @@ static void obs_free_video(void) gs_enter_context(video->graphics); - if (video->mapped_surface) { - gs_stagesurface_unmap(video->mapped_surface); - video->mapped_surface = NULL; + for (size_t c = 0; c < NUM_CHANNELS; c++) { + if (video->mapped_surfaces[c]) { + gs_stagesurface_unmap( + video->mapped_surfaces[c]); + video->mapped_surfaces[c] = NULL; + } } for (size_t i = 0; i < NUM_TEXTURES; i++) { - gs_stagesurface_destroy(video->copy_surfaces[i]); - video->copy_surfaces[i] = NULL; + for (size_t c = 0; c < NUM_CHANNELS; c++) { + if (video->copy_surfaces[i][c]) { + gs_stagesurface_destroy( + video->copy_surfaces[i][c]); + video->copy_surfaces[i][c] = NULL; + } + } } gs_texture_destroy(video->render_texture); - gs_texture_destroy(video->convert_texture); - gs_texture_destroy(video->convert_uv_texture); + + for (size_t c = 0; c < NUM_CHANNELS; c++) { + if (video->convert_textures[c]) { + gs_texture_destroy(video->convert_textures[c]); + video->convert_textures[c] = NULL; + } + } + + for (size_t i = 0; i < NUM_TEXTURES; i++) { + for (size_t c = 0; c < NUM_CHANNELS; c++) { + if (video->copy_surfaces[i][c]) { + gs_stagesurface_destroy( + video->copy_surfaces[i][c]); + video->copy_surfaces[i][c] = NULL; + } + } + } + gs_texture_destroy(video->output_texture); video->render_texture = NULL; - video->convert_texture = NULL; - video->convert_uv_texture = NULL; video->output_texture = NULL; gs_leave_context();