From c4fb34897e40bad271c070f21eaafb3d545f6812 Mon Sep 17 00:00:00 2001 From: jpark37 Date: Wed, 2 Mar 2022 22:19:51 -0800 Subject: [PATCH] libobs: NV12 textures only for active GPU encoders Intel GPUs in particular are slow to copy NV12/P010 textures. We can use ordinary UNORM textures for CPU encoders. --- libobs/obs-internal.h | 7 +- libobs/obs-video.c | 215 ++++++++++++++++++++------------------- libobs/obs.c | 231 +++++++++++++++++++++++++++--------------- 3 files changed, 263 insertions(+), 190 deletions(-) diff --git a/libobs/obs-internal.h b/libobs/obs-internal.h index 79f024e6f..f7f7e3468 100644 --- a/libobs/obs-internal.h +++ b/libobs/obs-internal.h @@ -246,10 +246,15 @@ struct obs_task_info { struct obs_core_video { graphics_t *graphics; + gs_stagesurf_t *active_copy_surfaces[NUM_TEXTURES][NUM_CHANNELS]; gs_stagesurf_t *copy_surfaces[NUM_TEXTURES][NUM_CHANNELS]; + gs_texture_t *convert_textures[NUM_CHANNELS]; +#ifdef _WIN32 + gs_stagesurf_t *copy_surfaces_encode[NUM_TEXTURES]; + gs_texture_t *convert_textures_encode[NUM_CHANNELS]; +#endif gs_texture_t *render_texture; gs_texture_t *output_texture; - gs_texture_t *convert_textures[NUM_CHANNELS]; bool texture_rendered; bool textures_copied[NUM_TEXTURES]; bool texture_converted; diff --git a/libobs/obs-video.c b/libobs/obs-video.c index e98422261..a966ed0d5 100644 --- a/libobs/obs-video.c +++ b/libobs/obs-video.c @@ -298,6 +298,7 @@ static void render_convert_plane(gs_effect_t *effect, gs_texture_t *target, static const char *render_convert_texture_name = "render_convert_texture"; static void render_convert_texture(struct obs_core_video *video, + gs_texture_t *const *const convert_textures, gs_texture_t *texture) { profile_start(render_convert_texture_name); @@ -322,28 +323,28 @@ static void render_convert_texture(struct obs_core_video *video, gs_enable_blending(false); - if (video->convert_textures[0]) { + if (convert_textures[0]) { gs_effect_set_texture(image, texture); gs_effect_set_vec4(color_vec0, &vec0); - render_convert_plane(effect, video->convert_textures[0], + render_convert_plane(effect, convert_textures[0], video->conversion_techs[0]); - if (video->convert_textures[1]) { + if (convert_textures[1]) { gs_effect_set_texture(image, texture); gs_effect_set_vec4(color_vec1, &vec1); - if (!video->convert_textures[2]) + if (!convert_textures[2]) gs_effect_set_vec4(color_vec2, &vec2); gs_effect_set_float(width_i, video->conversion_width_i); - render_convert_plane(effect, video->convert_textures[1], + render_convert_plane(effect, convert_textures[1], video->conversion_techs[1]); - if (video->convert_textures[2]) { + if (convert_textures[2]) { gs_effect_set_texture(image, texture); gs_effect_set_vec4(color_vec2, &vec2); gs_effect_set_float(width_i, video->conversion_width_i); render_convert_plane( - effect, video->convert_textures[2], + effect, convert_textures[2], video->conversion_techs[2]); } } @@ -357,26 +358,32 @@ static void render_convert_texture(struct obs_core_video *video, } static const char *stage_output_texture_name = "stage_output_texture"; -static inline void stage_output_texture(struct obs_core_video *video, - int cur_texture) +static inline void +stage_output_texture(struct obs_core_video *video, int cur_texture, + gs_texture_t *const *const convert_textures, + gs_stagesurf_t *const *const copy_surfaces, + size_t channel_count) { profile_start(stage_output_texture_name); unmap_last_surface(video); if (!video->gpu_conversion) { - gs_stagesurf_t *copy = video->copy_surfaces[cur_texture][0]; - if (copy) + gs_stagesurf_t *copy = copy_surfaces[0]; + if (copy) { gs_stage_texture(copy, video->output_texture); + video->active_copy_surfaces[cur_texture][0] = copy; + } video->textures_copied[cur_texture] = true; } else if (video->texture_converted) { - for (int i = 0; i < NUM_CHANNELS; i++) { - gs_stagesurf_t *copy = - video->copy_surfaces[cur_texture][i]; - if (copy) - gs_stage_texture(copy, - video->convert_textures[i]); + for (int i = 0; i < channel_count; i++) { + gs_stagesurf_t *copy = copy_surfaces[i]; + if (copy) { + gs_stage_texture(copy, convert_textures[i]); + video->active_copy_surfaces[cur_texture][i] = + copy; + } } video->textures_copied[cur_texture] = true; @@ -421,13 +428,13 @@ static inline bool queue_frame(struct obs_core_video *video, bool raw_active, * reason. otherwise, it goes to the 'duplicate' case above, which * will ensure better performance. */ if (raw_active || vframe_info->count > 1) { - gs_copy_texture(tf.tex, video->convert_textures[0]); + gs_copy_texture(tf.tex, video->convert_textures_encode[0]); } else { - gs_texture_t *tex = video->convert_textures[0]; - gs_texture_t *tex_uv = video->convert_textures[1]; + gs_texture_t *tex = video->convert_textures_encode[0]; + gs_texture_t *tex_uv = video->convert_textures_encode[1]; - video->convert_textures[0] = tf.tex; - video->convert_textures[1] = tf.tex_uv; + video->convert_textures_encode[0] = tf.tex; + video->convert_textures_encode[1] = tf.tex_uv; tf.tex = tex; tf.tex_uv = tex_uv; @@ -489,15 +496,24 @@ static inline void render_video(struct obs_core_video *video, bool raw_active, render_main_texture(video); if (raw_active || gpu_active) { + gs_texture_t *const *convert_textures = video->convert_textures; + gs_stagesurf_t *const *copy_surfaces = + video->copy_surfaces[cur_texture]; + size_t channel_count = NUM_CHANNELS; gs_texture_t *texture = render_output_texture(video); #ifdef _WIN32 - if (gpu_active) + if (gpu_active) { + convert_textures = video->convert_textures_encode; + copy_surfaces = video->copy_surfaces_encode; + channel_count = 1; gs_flush(); + } #endif if (video->gpu_conversion) - render_convert_texture(video, texture); + render_convert_texture(video, convert_textures, + texture); #ifdef _WIN32 if (gpu_active) { @@ -507,7 +523,9 @@ static inline void render_video(struct obs_core_video *video, bool raw_active, #endif if (raw_active) - stage_output_texture(video, cur_texture); + stage_output_texture(video, cur_texture, + convert_textures, copy_surfaces, + channel_count); } gs_set_render_target(NULL, NULL); @@ -524,7 +542,7 @@ static inline bool download_frame(struct obs_core_video *video, for (int channel = 0; channel < NUM_CHANNELS; ++channel) { gs_stagesurf_t *surface = - video->copy_surfaces[prev_texture][channel]; + video->active_copy_surfaces[prev_texture][channel]; if (surface) { if (!gs_stagesurface_map(surface, &frame->data[channel], &frame->linesize[channel])) @@ -561,108 +579,91 @@ static void set_gpu_converted_data(struct obs_core_video *video, const struct video_data *input, const struct video_output_info *info) { - if (video->using_nv12_tex) { + switch (info->format) { + case VIDEO_FORMAT_I420: { const uint32_t width = info->width; const uint32_t height = info->height; - const uint8_t *const in_uv = set_gpu_converted_plane( - width, height, input->linesize[0], output->linesize[0], - input->data[0], output->data[0]); + set_gpu_converted_plane(width, height, input->linesize[0], + output->linesize[0], input->data[0], + output->data[0]); + const uint32_t width_d2 = width / 2; const uint32_t height_d2 = height / 2; - set_gpu_converted_plane(width, height_d2, input->linesize[0], - output->linesize[1], in_uv, + + set_gpu_converted_plane(width_d2, height_d2, input->linesize[1], + output->linesize[1], input->data[1], output->data[1]); - } else { - switch (info->format) { - case VIDEO_FORMAT_I420: { - const uint32_t width = info->width; - const uint32_t height = info->height; + set_gpu_converted_plane(width_d2, height_d2, input->linesize[2], + output->linesize[2], input->data[2], + output->data[2]); + + break; + } + case VIDEO_FORMAT_NV12: { + const uint32_t width = info->width; + const uint32_t height = info->height; + const uint32_t height_d2 = height / 2; + if (input->linesize[1]) { set_gpu_converted_plane(width, height, input->linesize[0], output->linesize[0], input->data[0], output->data[0]); - - const uint32_t width_d2 = width / 2; - const uint32_t height_d2 = height / 2; - - set_gpu_converted_plane(width_d2, height_d2, - input->linesize[1], - output->linesize[1], - input->data[1], - output->data[1]); - - set_gpu_converted_plane(width_d2, height_d2, - input->linesize[2], - output->linesize[2], - input->data[2], - output->data[2]); - - break; - } - case VIDEO_FORMAT_NV12: { - const uint32_t width = info->width; - const uint32_t height = info->height; - - set_gpu_converted_plane(width, height, - input->linesize[0], - output->linesize[0], - input->data[0], - output->data[0]); - - const uint32_t height_d2 = height / 2; set_gpu_converted_plane(width, height_d2, input->linesize[1], output->linesize[1], input->data[1], output->data[1]); - - break; - } - case VIDEO_FORMAT_I444: { - const uint32_t width = info->width; - const uint32_t height = info->height; - - set_gpu_converted_plane(width, height, + } else { + const uint8_t *const in_uv = set_gpu_converted_plane( + width, height, input->linesize[0], + output->linesize[0], input->data[0], + output->data[0]); + set_gpu_converted_plane(width, height_d2, input->linesize[0], - output->linesize[0], - input->data[0], - output->data[0]); - - set_gpu_converted_plane(width, height, - input->linesize[1], - output->linesize[1], - input->data[1], + output->linesize[1], in_uv, output->data[1]); - - set_gpu_converted_plane(width, height, - input->linesize[2], - output->linesize[2], - input->data[2], - output->data[2]); - - break; } - case VIDEO_FORMAT_NONE: - case VIDEO_FORMAT_YVYU: - case VIDEO_FORMAT_YUY2: - case VIDEO_FORMAT_UYVY: - case VIDEO_FORMAT_RGBA: - case VIDEO_FORMAT_BGRA: - case VIDEO_FORMAT_BGRX: - case VIDEO_FORMAT_Y800: - case VIDEO_FORMAT_BGR3: - case VIDEO_FORMAT_I422: - case VIDEO_FORMAT_I40A: - case VIDEO_FORMAT_I42A: - case VIDEO_FORMAT_YUVA: - case VIDEO_FORMAT_AYUV: - /* unimplemented */ - ; - } + break; + } + case VIDEO_FORMAT_I444: { + const uint32_t width = info->width; + const uint32_t height = info->height; + + set_gpu_converted_plane(width, height, input->linesize[0], + output->linesize[0], input->data[0], + output->data[0]); + + set_gpu_converted_plane(width, height, input->linesize[1], + output->linesize[1], input->data[1], + output->data[1]); + + set_gpu_converted_plane(width, height, input->linesize[2], + output->linesize[2], input->data[2], + output->data[2]); + + break; + } + + case VIDEO_FORMAT_NONE: + case VIDEO_FORMAT_YVYU: + case VIDEO_FORMAT_YUY2: + case VIDEO_FORMAT_UYVY: + case VIDEO_FORMAT_RGBA: + case VIDEO_FORMAT_BGRA: + case VIDEO_FORMAT_BGRX: + case VIDEO_FORMAT_Y800: + case VIDEO_FORMAT_BGR3: + case VIDEO_FORMAT_I422: + case VIDEO_FORMAT_I40A: + case VIDEO_FORMAT_I42A: + case VIDEO_FORMAT_YUVA: + case VIDEO_FORMAT_AYUV: + /* unimplemented */ + ; } } diff --git a/libobs/obs.c b/libobs/obs.c index 0b4c9b05e..7f5fd25bb 100644 --- a/libobs/obs.c +++ b/libobs/obs.c @@ -101,74 +101,99 @@ static bool obs_init_gpu_conversion(struct obs_video_info *ovi) else blog(LOG_INFO, "NV12 texture support not available"); + video->convert_textures[0] = NULL; + video->convert_textures[1] = NULL; + video->convert_textures[2] = NULL; #ifdef _WIN32 + video->convert_textures_encode[0] = NULL; + video->convert_textures_encode[1] = NULL; + video->convert_textures_encode[2] = NULL; if (video->using_nv12_tex) { - gs_texture_create_nv12(&video->convert_textures[0], - &video->convert_textures[1], - ovi->output_width, ovi->output_height, - GS_RENDER_TARGET | GS_SHARED_KM_TEX); - } else { -#endif - video->convert_textures[0] = - gs_texture_create(ovi->output_width, ovi->output_height, - GS_R8, 1, NULL, GS_RENDER_TARGET); - - const struct video_output_info *info = - video_output_get_info(video->video); - switch (info->format) { - case VIDEO_FORMAT_I420: - video->convert_textures[1] = gs_texture_create( - ovi->output_width / 2, ovi->output_height / 2, - GS_R8, 1, NULL, GS_RENDER_TARGET); - video->convert_textures[2] = gs_texture_create( - ovi->output_width / 2, ovi->output_height / 2, - GS_R8, 1, NULL, GS_RENDER_TARGET); - if (!video->convert_textures[2]) - return false; - break; - case VIDEO_FORMAT_NV12: - video->convert_textures[1] = gs_texture_create( - ovi->output_width / 2, ovi->output_height / 2, - GS_R8G8, 1, NULL, GS_RENDER_TARGET); - break; - case VIDEO_FORMAT_I444: - video->convert_textures[1] = gs_texture_create( - ovi->output_width, ovi->output_height, GS_R8, 1, - NULL, GS_RENDER_TARGET); - video->convert_textures[2] = gs_texture_create( - ovi->output_width, ovi->output_height, GS_R8, 1, - NULL, GS_RENDER_TARGET); - if (!video->convert_textures[2]) - return false; - break; - default: - break; + if (!gs_texture_create_nv12( + &video->convert_textures_encode[0], + &video->convert_textures_encode[1], + ovi->output_width, ovi->output_height, + GS_RENDER_TARGET | GS_SHARED_KM_TEX)) { + return false; } -#ifdef _WIN32 } #endif - if (!video->convert_textures[0]) - return false; - if (!video->convert_textures[1]) - return false; + bool success = true; - return true; + const struct video_output_info *info = + video_output_get_info(video->video); + switch (info->format) { + case VIDEO_FORMAT_I420: + video->convert_textures[0] = + gs_texture_create(ovi->output_width, ovi->output_height, + GS_R8, 1, NULL, GS_RENDER_TARGET); + video->convert_textures[1] = gs_texture_create( + ovi->output_width / 2, ovi->output_height / 2, GS_R8, 1, + NULL, GS_RENDER_TARGET); + video->convert_textures[2] = gs_texture_create( + ovi->output_width / 2, ovi->output_height / 2, GS_R8, 1, + NULL, GS_RENDER_TARGET); + if (!video->convert_textures[0] || + !video->convert_textures[1] || !video->convert_textures[2]) + success = false; + break; + case VIDEO_FORMAT_NV12: + video->convert_textures[0] = + gs_texture_create(ovi->output_width, ovi->output_height, + GS_R8, 1, NULL, GS_RENDER_TARGET); + video->convert_textures[1] = gs_texture_create( + ovi->output_width / 2, ovi->output_height / 2, GS_R8G8, + 1, NULL, GS_RENDER_TARGET); + if (!video->convert_textures[0] || !video->convert_textures[1]) + success = false; + break; + case VIDEO_FORMAT_I444: + video->convert_textures[0] = + gs_texture_create(ovi->output_width, ovi->output_height, + GS_R8, 1, NULL, GS_RENDER_TARGET); + video->convert_textures[1] = + gs_texture_create(ovi->output_width, ovi->output_height, + GS_R8, 1, NULL, GS_RENDER_TARGET); + video->convert_textures[2] = + gs_texture_create(ovi->output_width, ovi->output_height, + GS_R8, 1, NULL, GS_RENDER_TARGET); + if (!video->convert_textures[0] || + !video->convert_textures[1] || !video->convert_textures[2]) + success = false; + } + + if (!success) { + for (size_t c = 0; c < NUM_CHANNELS; c++) { + if (video->convert_textures[c]) { + gs_texture_destroy(video->convert_textures[c]); + video->convert_textures[c] = NULL; + } +#ifdef _WIN32 + if (video->convert_textures_encode[c]) { + gs_texture_destroy( + video->convert_textures_encode[c]); + video->convert_textures_encode[c] = NULL; + } +#endif + } + } + + return success; } static bool obs_init_gpu_copy_surfaces(struct obs_video_info *ovi, size_t i) { struct obs_core_video *video = &obs->video; - video->copy_surfaces[i][0] = gs_stagesurface_create( - ovi->output_width, ovi->output_height, GS_R8); - if (!video->copy_surfaces[i][0]) - return false; - const struct video_output_info *info = video_output_get_info(video->video); switch (info->format) { case VIDEO_FORMAT_I420: + video->copy_surfaces[i][0] = gs_stagesurface_create( + ovi->output_width, ovi->output_height, GS_R8); + if (!video->copy_surfaces[i][0]) + return false; video->copy_surfaces[i][1] = gs_stagesurface_create( ovi->output_width / 2, ovi->output_height / 2, GS_R8); if (!video->copy_surfaces[i][1]) @@ -179,12 +204,20 @@ static bool obs_init_gpu_copy_surfaces(struct obs_video_info *ovi, size_t i) return false; break; case VIDEO_FORMAT_NV12: + video->copy_surfaces[i][0] = gs_stagesurface_create( + ovi->output_width, ovi->output_height, GS_R8); + if (!video->copy_surfaces[i][0]) + return false; video->copy_surfaces[i][1] = gs_stagesurface_create( ovi->output_width / 2, ovi->output_height / 2, GS_R8G8); if (!video->copy_surfaces[i][1]) return false; break; case VIDEO_FORMAT_I444: + video->copy_surfaces[i][0] = gs_stagesurface_create( + ovi->output_width, ovi->output_height, GS_R8); + if (!video->copy_surfaces[i][0]) + return false; video->copy_surfaces[i][1] = gs_stagesurface_create( ovi->output_width, ovi->output_height, GS_R8); if (!video->copy_surfaces[i][1]) @@ -205,48 +238,78 @@ static bool obs_init_textures(struct obs_video_info *ovi) { struct obs_core_video *video = &obs->video; + bool success = true; + for (size_t i = 0; i < NUM_TEXTURES; i++) { #ifdef _WIN32 if (video->using_nv12_tex) { - video->copy_surfaces[i][0] = + video->copy_surfaces_encode[i] = gs_stagesurface_create_nv12(ovi->output_width, ovi->output_height); - if (!video->copy_surfaces[i][0]) - return false; - - } else { -#endif - if (video->gpu_conversion) { - if (!obs_init_gpu_copy_surfaces(ovi, i)) - return false; - } else { - video->copy_surfaces[i][0] = - gs_stagesurface_create( - ovi->output_width, - ovi->output_height, GS_RGBA); - if (!video->copy_surfaces[i][0]) - return false; + if (!video->copy_surfaces_encode[i]) { + success = false; + break; } -#ifdef _WIN32 } #endif + + if (video->gpu_conversion) { + if (!obs_init_gpu_copy_surfaces(ovi, i)) { + success = false; + break; + } + } else { + video->copy_surfaces[i][0] = gs_stagesurface_create( + ovi->output_width, ovi->output_height, GS_RGBA); + if (!video->copy_surfaces[i][0]) { + success = false; + break; + } + } } video->render_texture = gs_texture_create(ovi->base_width, ovi->base_height, GS_RGBA, 1, NULL, GS_RENDER_TARGET); - if (!video->render_texture) - return false; + success = false; video->output_texture = gs_texture_create(ovi->output_width, ovi->output_height, GS_RGBA, 1, NULL, GS_RENDER_TARGET); - if (!video->output_texture) - return false; + success = false; - return true; + if (!success) { + for (size_t i = 0; i < NUM_TEXTURES; i++) { + for (size_t c = 0; c < NUM_CHANNELS; c++) { + if (video->copy_surfaces[i][c]) { + gs_stagesurface_destroy( + video->copy_surfaces[i][c]); + video->copy_surfaces[i][c] = NULL; + } + } +#ifdef _WIN32 + if (video->copy_surfaces_encode[i]) { + gs_stagesurface_destroy( + video->copy_surfaces_encode[i]); + video->copy_surfaces_encode[i] = NULL; + } +#endif + } + + if (video->render_texture) { + gs_texture_destroy(video->render_texture); + video->render_texture = NULL; + } + + if (video->output_texture) { + gs_texture_destroy(video->output_texture); + video->output_texture = NULL; + } + } + + return success; } gs_effect_t *obs_load_effect(gs_effect_t **effect, const char *file) @@ -484,6 +547,13 @@ static void obs_free_video(void) video->copy_surfaces[i][c] = NULL; } } +#ifdef _WIN32 + if (video->copy_surfaces_encode[i]) { + gs_stagesurface_destroy( + video->copy_surfaces_encode[i]); + video->copy_surfaces_encode[i] = NULL; + } +#endif } gs_texture_destroy(video->render_texture); @@ -493,16 +563,13 @@ static void obs_free_video(void) gs_texture_destroy(video->convert_textures[c]); video->convert_textures[c] = NULL; } - } - - for (size_t i = 0; i < NUM_TEXTURES; i++) { - for (size_t c = 0; c < NUM_CHANNELS; c++) { - if (video->copy_surfaces[i][c]) { - gs_stagesurface_destroy( - video->copy_surfaces[i][c]); - video->copy_surfaces[i][c] = NULL; - } +#ifdef _WIN32 + if (video->convert_textures_encode[c]) { + gs_texture_destroy( + video->convert_textures_encode[c]); + video->convert_textures_encode[c] = NULL; } +#endif } gs_texture_destroy(video->output_texture);