From 11106c2fce391d522c5bed9a3379c09a3a964b2b Mon Sep 17 00:00:00 2001 From: jp9000 Date: Wed, 31 Dec 2014 01:53:13 -0800 Subject: [PATCH] libobs: Redesign/optimize frame encoding handling Previously, the design for the interaction between the encoder thread and the graphics thread was that the encoder thread would signal to the graphics thread when to start drawing each frame. The original idea behind this was to prevent mutually cascading stalls of encoding or graphics rendering (i.e., if rendering took too long, then encoding would have to catch up, then rendering would have to catch up again, and so on, cascading upon each other). The ultimate goal was to prevent encoding from impacting graphics and vise versa. However, eventually it was realized that there were some fundamental flaws with this design. 1. Stray frame duplication. You could not guarantee that a frame would render on time, so sometimes frames would unintentionally be lost if there was any sort of minor hiccup or if the thread took too long to be scheduled I'm guessing. 2. Frame timing in the rendering thread was less accurate. The only place where frame timing was accurate was in the encoder thread, and the graphics thread was at the whim of thread scheduling. On higher end computers it was typically fine, but it was just generally not guaranteed that a frame would be rendered when it was supposed to be rendered. So the solution (originally proposed by r1ch and paibox) is to instead keep the encoding and graphics threads separate as usual, but instead of the encoder thread controlling the graphics thread, the graphics thread now controls the encoder thread. The encoder thread keeps a limited cache of frames, then the graphics thread copies frames in to the cache and increments a semaphore to schedule the encoder thread to encode that data. In the cache, each frame has an encode counter. If the frame cache is full (e.g., the encoder taking too long to return frames), it will not cache a new frame, but instead will just increment the counter on the last frame in the cache to schedule that frame to encode again, ensuring that frames are on time and reducing CPU usage by lowering video complexity. If the graphics thread takes too long to render a frame, then it will add that frame with the count value set to the total amount of frames that were missed (actual legitimately duplicated frames). Because the cache gives many frames of breathing room for the encoder to encode frames, this design helps improve results especially when using encoding presets that have higher complexity and CPU usage, minimizing the risk of needlessly skipped or duplicated frames. I also managed to sneak in what should be a bit of an optimization to reduce copying of frame data, though how much of an optimization it ultimately ends up being is debatable. So to sum it up, this commit increases accuracy of frame timing, completely removes stray frame duplication, gives better results for higher complexity encoding presets, and potentially optimizes the frame pipeline a tiny bit. --- libobs/media-io/video-io.c | 205 ++++++++++++++++++++++--------------- libobs/media-io/video-io.h | 10 +- libobs/obs-internal.h | 8 +- libobs/obs-video.c | 143 ++++++++++++++------------ libobs/obs.c | 10 +- 5 files changed, 216 insertions(+), 160 deletions(-) diff --git a/libobs/media-io/video-io.c b/libobs/media-io/video-io.c index 01eb9c444..4aedd0cd6 100644 --- a/libobs/media-io/video-io.c +++ b/libobs/media-io/video-io.c @@ -27,6 +27,12 @@ #include "video-scaler.h" #define MAX_CONVERT_BUFFERS 3 +#define MAX_CACHE_SIZE 16 + +struct cached_frame_info { + struct video_data frame; + int count; +}; struct video_input { struct video_scale_info conversion; @@ -50,35 +56,26 @@ struct video_output { pthread_t thread; pthread_mutex_t data_mutex; - os_event_t *stop_event; + bool stop; - struct video_data cur_frame; - struct video_data next_frame; - bool new_frame; - - os_event_t *update_event; + os_sem_t *update_semaphore; uint64_t frame_time; - volatile uint64_t cur_video_time; uint32_t skipped_frames; uint32_t total_frames; - uint64_t last_ts; bool initialized; pthread_mutex_t input_mutex; DARRAY(struct video_input) inputs; + + size_t available_frames; + size_t first_added; + size_t last_added; + struct cached_frame_info cache[MAX_CACHE_SIZE]; }; /* ------------------------------------------------------------------------- */ -static inline void video_swapframes(struct video_output *video) -{ - if (video->new_frame) { - video->cur_frame = video->next_frame; - video->new_frame = false; - } -} - static inline bool scale_video_output(struct video_input *input, struct video_data *data) { @@ -110,71 +107,68 @@ static inline bool scale_video_output(struct video_input *input, return success; } -static inline void video_output_cur_frame(struct video_output *video) +static inline bool video_output_cur_frame(struct video_output *video) { - if (!video->cur_frame.data[0]) - return; + struct cached_frame_info *frame_info; + bool complete; + + /* -------------------------------- */ + + pthread_mutex_lock(&video->data_mutex); + + frame_info = &video->cache[video->first_added]; + + pthread_mutex_unlock(&video->data_mutex); + + /* -------------------------------- */ pthread_mutex_lock(&video->input_mutex); for (size_t i = 0; i < video->inputs.num; i++) { struct video_input *input = video->inputs.array+i; - struct video_data frame = video->cur_frame; + struct video_data frame = frame_info->frame; - if (scale_video_output(input, &frame)) { - if (frame.timestamp <= video->last_ts) - video->last_ts += video->frame_time; - else - video->last_ts = frame.timestamp; - - frame.timestamp = video->last_ts; + if (scale_video_output(input, &frame)) input->callback(input->param, &frame); - } } pthread_mutex_unlock(&video->input_mutex); -} -#define MAX_MISSED_TIMINGS 8 + /* -------------------------------- */ -static inline bool safe_sleepto(uint64_t t, uint32_t *missed_timings) -{ - if (!os_sleepto_ns(t)) - (*missed_timings)++; - else - *missed_timings = 0; + pthread_mutex_lock(&video->data_mutex); - return *missed_timings <= MAX_MISSED_TIMINGS; + frame_info->frame.timestamp += video->frame_time; + complete = --frame_info->count == 0; + + if (complete) { + if (++video->first_added == video->info.cache_size) + video->first_added = 0; + + if (++video->available_frames == video->info.cache_size) + video->last_added = video->first_added; + } + + pthread_mutex_unlock(&video->data_mutex); + + /* -------------------------------- */ + + return complete; } static void *video_thread(void *param) { - struct video_output *video = param; - uint64_t cur_time = os_gettime_ns(); - uint32_t missed_timings = 0; + struct video_output *video = param; - while (os_event_try(video->stop_event) == EAGAIN) { - /* wait half a frame, update frame */ - cur_time += (video->frame_time/2); + while (os_sem_wait(video->update_semaphore) == 0) { + if (video->stop) + break; - if (safe_sleepto(cur_time, &missed_timings)) { - video->cur_video_time = cur_time; - os_event_signal(video->update_event); - } else { + while (!video->stop && !video_output_cur_frame(video)) { + video->total_frames++; video->skipped_frames++; } - /* wait another half a frame, swap and output frames */ - cur_time += (video->frame_time/2); - safe_sleepto(cur_time, &missed_timings); - - pthread_mutex_lock(&video->data_mutex); - - video_swapframes(video); - video_output_cur_frame(video); - - pthread_mutex_unlock(&video->data_mutex); - video->total_frames++; } @@ -189,6 +183,22 @@ static inline bool valid_video_params(const struct video_output_info *info) info->fps_num != 0; } +static inline void init_cache(struct video_output *video) +{ + if (video->info.cache_size > MAX_CACHE_SIZE) + video->info.cache_size = MAX_CACHE_SIZE; + + for (size_t i = 0; i < video->info.cache_size; i++) { + struct video_frame *frame; + frame = (struct video_frame*)&video->cache[i]; + + video_frame_init(frame, video->info.format, + video->info.width, video->info.height); + } + + video->available_frames = video->info.cache_size; +} + int video_output_open(video_t **video, struct video_output_info *info) { struct video_output *out; @@ -214,13 +224,13 @@ int video_output_open(video_t **video, struct video_output_info *info) goto fail; if (pthread_mutex_init(&out->input_mutex, &attr) != 0) goto fail; - if (os_event_init(&out->stop_event, OS_EVENT_TYPE_MANUAL) != 0) - goto fail; - if (os_event_init(&out->update_event, OS_EVENT_TYPE_AUTO) != 0) + if (os_sem_init(&out->update_semaphore, 0) != 0) goto fail; if (pthread_create(&out->thread, NULL, video_thread, out) != 0) goto fail; + init_cache(out); + out->initialized = true; *video = out; return VIDEO_OUTPUT_SUCCESS; @@ -241,8 +251,10 @@ void video_output_close(video_t *video) video_input_free(&video->inputs.array[i]); da_free(video->inputs); - os_event_destroy(video->update_event); - os_event_destroy(video->stop_event); + for (size_t i = 0; i < video->info.cache_size; i++) + video_frame_free((struct video_frame*)&video->cache[i]); + + os_sem_destroy(video->update_semaphore); pthread_mutex_destroy(&video->data_mutex); pthread_mutex_destroy(&video->input_mutex); bfree(video); @@ -368,34 +380,57 @@ const struct video_output_info *video_output_get_info(const video_t *video) return video ? &video->info : NULL; } -void video_output_swap_frame(video_t *video, struct video_data *frame) +bool video_output_lock_frame(video_t *video, struct video_frame *frame, + int count, uint64_t timestamp) +{ + struct cached_frame_info *cfi; + bool locked; + + if (!video) return false; + + pthread_mutex_lock(&video->data_mutex); + + if (video->available_frames == 0) { + video->cache[video->last_added].count += count; + locked = false; + + } else { + if (video->available_frames != video->info.cache_size) { + if (++video->last_added == video->info.cache_size) + video->last_added = 0; + } + + cfi = &video->cache[video->last_added]; + cfi->frame.timestamp = timestamp; + cfi->count = count; + + memcpy(frame, &cfi->frame, sizeof(*frame)); + + locked = true; + } + + pthread_mutex_unlock(&video->data_mutex); + + return locked; +} + +void video_output_unlock_frame(video_t *video) { if (!video) return; pthread_mutex_lock(&video->data_mutex); - video->next_frame = *frame; - video->new_frame = true; + + video->available_frames--; + os_sem_post(video->update_semaphore); + pthread_mutex_unlock(&video->data_mutex); } -bool video_output_wait(video_t *video) -{ - if (!video) return false; - - os_event_wait(video->update_event); - return os_event_try(video->stop_event) == EAGAIN; -} - uint64_t video_output_get_frame_time(const video_t *video) { return video ? video->frame_time : 0; } -uint64_t video_output_get_time(const video_t *video) -{ - return video ? video->cur_video_time : 0; -} - void video_output_stop(video_t *video) { void *thread_ret; @@ -405,12 +440,20 @@ void video_output_stop(video_t *video) if (video->initialized) { video->initialized = false; - os_event_signal(video->stop_event); + video->stop = true; + os_sem_post(video->update_semaphore); pthread_join(video->thread, &thread_ret); - os_event_signal(video->update_event); } } +bool video_output_stopped(video_t *video) +{ + if (!video) + return true; + + return video->stop; +} + enum video_format video_output_get_format(const video_t *video) { return video ? video->info.format : VIDEO_FORMAT_NONE; diff --git a/libobs/media-io/video-io.h b/libobs/media-io/video-io.h index e4136e3a8..04ca64b7d 100644 --- a/libobs/media-io/video-io.h +++ b/libobs/media-io/video-io.h @@ -23,6 +23,8 @@ extern "C" { #endif +struct video_frame; + /* Base video output component. Use this to create a video output track. */ struct video_output; @@ -72,6 +74,7 @@ struct video_output_info { uint32_t fps_den; uint32_t width; uint32_t height; + size_t cache_size; enum video_colorspace colorspace; enum video_range_type range; @@ -137,11 +140,12 @@ EXPORT bool video_output_active(const video_t *video); EXPORT const struct video_output_info *video_output_get_info( const video_t *video); -EXPORT void video_output_swap_frame(video_t *video, struct video_data *frame); -EXPORT bool video_output_wait(video_t *video); +EXPORT bool video_output_lock_frame(video_t *video, struct video_frame *frame, + int count, uint64_t timestamp); +EXPORT void video_output_unlock_frame(video_t *video); EXPORT uint64_t video_output_get_frame_time(const video_t *video); -EXPORT uint64_t video_output_get_time(const video_t *video); EXPORT void video_output_stop(video_t *video); +EXPORT bool video_output_stopped(video_t *video); EXPORT enum video_format video_output_get_format(const video_t *video); EXPORT uint32_t video_output_get_width(const video_t *video); diff --git a/libobs/obs-internal.h b/libobs/obs-internal.h index 772b7ffbc..fd87c7b99 100644 --- a/libobs/obs-internal.h +++ b/libobs/obs-internal.h @@ -129,6 +129,11 @@ extern void obs_display_free(struct obs_display *display); /* ------------------------------------------------------------------------- */ /* core */ +struct obs_vframe_info { + uint64_t timestamp; + int count; +}; + struct obs_core_video { graphics_t *graphics; gs_stagesurf_t *copy_surfaces[NUM_TEXTURES]; @@ -139,8 +144,7 @@ struct obs_core_video { bool textures_output[NUM_TEXTURES]; bool textures_copied[NUM_TEXTURES]; bool textures_converted[NUM_TEXTURES]; - struct obs_source_frame convert_frames[NUM_TEXTURES]; - struct circlebuf timestamp_buffer; + struct circlebuf vframe_info_buffer; gs_effect_t *default_effect; gs_effect_t *default_rect_effect; gs_effect_t *solid_effect; diff --git a/libobs/obs-video.c b/libobs/obs-video.c index b04b3d109..d66afdc46 100644 --- a/libobs/obs-video.c +++ b/libobs/obs-video.c @@ -19,6 +19,7 @@ #include "obs-internal.h" #include "graphics/vec4.h" #include "media-io/format-conversion.h" +#include "media-io/video-frame.h" static inline void calculate_base_volume(struct obs_core_data *data, struct obs_view *view, obs_source_t *target) @@ -317,16 +318,13 @@ static inline void stage_output_texture(struct obs_core_video *video, } static inline void render_video(struct obs_core_video *video, int cur_texture, - int prev_texture, uint64_t timestamp) + int prev_texture) { gs_begin_scene(); gs_enable_depth_test(false); gs_set_cull_mode(GS_NEITHER); - circlebuf_push_back(&video->timestamp_buffer, ×tamp, - sizeof(timestamp)); - render_main_texture(video, cur_texture); render_output_texture(video, cur_texture, prev_texture); if (video->gpu_conversion) @@ -393,12 +391,10 @@ static inline uint32_t make_aligned_linesize_offset(uint32_t offset, } static void fix_gpu_converted_alignment(struct obs_core_video *video, - struct video_data *frame, int cur_texture) + struct video_frame *output, const struct video_data *input) { - struct obs_source_frame *new_frame = - &video->convert_frames[cur_texture]; - uint32_t src_linesize = frame->linesize[0]; - uint32_t dst_linesize = video->output_width * 4; + uint32_t src_linesize = input->linesize[0]; + uint32_t dst_linesize = output->linesize[0]; uint32_t src_pos = 0; for (size_t i = 0; i < 3; i++) { @@ -408,89 +404,103 @@ static void fix_gpu_converted_alignment(struct obs_core_video *video, src_pos = make_aligned_linesize_offset(video->plane_offsets[i], dst_linesize, src_linesize); - copy_dealign(new_frame->data[i], 0, dst_linesize, - frame->data[0], src_pos, src_linesize, + copy_dealign(output->data[i], 0, dst_linesize, + input->data[0], src_pos, src_linesize, video->plane_sizes[i]); } - - /* replace with cached frames */ - for (size_t i = 0; i < MAX_AV_PLANES; i++) { - frame->data[i] = new_frame->data[i]; - frame->linesize[i] = new_frame->linesize[i]; - } } -static bool set_gpu_converted_data(struct obs_core_video *video, - struct video_data *frame, int cur_texture) +static void set_gpu_converted_data(struct obs_core_video *video, + struct video_frame *output, const struct video_data *input, + const struct video_output_info *info) { - if (frame->linesize[0] == video->output_width*4) { + if (input->linesize[0] == video->output_width*4) { + struct video_frame frame; + for (size_t i = 0; i < 3; i++) { if (video->plane_linewidth[i] == 0) break; - frame->linesize[i] = video->plane_linewidth[i]; - frame->data[i] = - frame->data[0] + video->plane_offsets[i]; + frame.linesize[i] = video->plane_linewidth[i]; + frame.data[i] = + input->data[0] + video->plane_offsets[i]; } - } else { - fix_gpu_converted_alignment(video, frame, cur_texture); - } + video_frame_copy(output, &frame, info->format, info->height); - return true; + } else { + fix_gpu_converted_alignment(video, output, input); + } } -static bool convert_frame(struct obs_core_video *video, - struct video_data *frame, - const struct video_output_info *info, int cur_texture) +static void convert_frame( + struct video_frame *output, const struct video_data *input, + const struct video_output_info *info) { - struct obs_source_frame *new_frame = - &video->convert_frames[cur_texture]; - if (info->format == VIDEO_FORMAT_I420) { compress_uyvx_to_i420( - frame->data[0], frame->linesize[0], + input->data[0], input->linesize[0], 0, info->height, - new_frame->data, new_frame->linesize); + output->data, output->linesize); } else if (info->format == VIDEO_FORMAT_NV12) { compress_uyvx_to_nv12( - frame->data[0], frame->linesize[0], + input->data[0], input->linesize[0], 0, info->height, - new_frame->data, new_frame->linesize); + output->data, output->linesize); } else { blog(LOG_ERROR, "convert_frame: unsupported texture format"); - return false; } - - for (size_t i = 0; i < MAX_AV_PLANES; i++) { - frame->data[i] = new_frame->data[i]; - frame->linesize[i] = new_frame->linesize[i]; - } - - return true; } static inline void output_video_data(struct obs_core_video *video, - struct video_data *frame, int cur_texture) + struct video_data *input_frame, int count) { const struct video_output_info *info; + struct video_frame output_frame; + bool locked; + info = video_output_get_info(video->video); - if (video->gpu_conversion) { - if (!set_gpu_converted_data(video, frame, cur_texture)) - return; + locked = video_output_lock_frame(video->video, &output_frame, count, + input_frame->timestamp); + if (locked) { + if (video->gpu_conversion) { + set_gpu_converted_data(video, &output_frame, + input_frame, info); - } else if (format_is_yuv(info->format)) { - if (!convert_frame(video, frame, info, cur_texture)) - return; + } else if (format_is_yuv(info->format)) { + convert_frame(&output_frame, input_frame, info); + } + + video_output_unlock_frame(video->video); } - - video_output_swap_frame(video->video, frame); } -static inline void output_frame(uint64_t timestamp) +static inline void video_sleep(struct obs_core_video *video, + uint64_t *p_time, uint64_t interval_ns) +{ + struct obs_vframe_info vframe_info; + uint64_t cur_time = *p_time; + uint64_t t = cur_time + interval_ns; + int count; + + if (!os_sleepto_ns(t)) { + *p_time = t; + count = 1; + } else { + count = (int)((os_gettime_ns() - cur_time) / interval_ns); + *p_time = cur_time + interval_ns * count; + } + + vframe_info.timestamp = cur_time; + vframe_info.count = count; + circlebuf_push_back(&video->vframe_info_buffer, &vframe_info, + sizeof(vframe_info)); +} + +static inline void output_frame(uint64_t *cur_time, uint64_t interval) { struct obs_core_video *video = &obs->video; int cur_texture = video->cur_texture; @@ -501,37 +511,38 @@ static inline void output_frame(uint64_t timestamp) memset(&frame, 0, sizeof(struct video_data)); gs_enter_context(video->graphics); - - render_video(video, cur_texture, prev_texture, timestamp); + render_video(video, cur_texture, prev_texture); frame_ready = download_frame(video, prev_texture, &frame); - gs_flush(); - gs_leave_context(); if (frame_ready) { - circlebuf_pop_front(&video->timestamp_buffer, &frame.timestamp, - sizeof(frame.timestamp)); + struct obs_vframe_info vframe_info; + circlebuf_pop_front(&video->vframe_info_buffer, &vframe_info, + sizeof(vframe_info)); - output_video_data(video, &frame, cur_texture); + frame.timestamp = vframe_info.timestamp; + output_video_data(video, &frame, vframe_info.count); } if (++video->cur_texture == NUM_TEXTURES) video->cur_texture = 0; + + video_sleep(video, cur_time, interval); } void *obs_video_thread(void *param) { uint64_t last_time = 0; + uint64_t cur_time = os_gettime_ns(); + uint64_t interval = video_output_get_frame_time(obs->video.video); - while (video_output_wait(obs->video.video)) { - uint64_t cur_time = video_output_get_time(obs->video.video); - + while (!video_output_stopped(obs->video.video)) { last_time = tick_sources(cur_time, last_time); render_displays(); - output_frame(cur_time); + output_frame(&cur_time, interval); } UNUSED_PARAMETER(param); diff --git a/libobs/obs.c b/libobs/obs.c index c9becbb73..a79ecc428 100644 --- a/libobs/obs.c +++ b/libobs/obs.c @@ -51,6 +51,7 @@ static inline void make_video_info(struct video_output_info *vi, vi->height = ovi->output_height; vi->range = ovi->range; vi->colorspace = ovi->colorspace; + vi->cache_size = 6; } #define PIXEL_SIZE 4 @@ -163,7 +164,6 @@ static bool obs_init_gpu_conversion(struct obs_video_info *ovi) static bool obs_init_textures(struct obs_video_info *ovi) { struct obs_core_video *video = &obs->video; - bool yuv = format_is_yuv(ovi->output_format); uint32_t output_height = video->gpu_conversion ? video->conversion_height : ovi->output_height; size_t i; @@ -188,11 +188,6 @@ static bool obs_init_textures(struct obs_video_info *ovi) if (!video->output_textures[i]) return false; - - if (yuv) - obs_source_frame_init(&video->convert_frames[i], - ovi->output_format, - ovi->output_width,ovi->output_height); } return true; @@ -383,7 +378,6 @@ static void obs_free_video(void) gs_texture_destroy(video->render_textures[i]); gs_texture_destroy(video->convert_textures[i]); gs_texture_destroy(video->output_textures[i]); - obs_source_frame_free(&video->convert_frames[i]); video->copy_surfaces[i] = NULL; video->render_textures[i] = NULL; @@ -393,7 +387,7 @@ static void obs_free_video(void) gs_leave_context(); - circlebuf_free(&video->timestamp_buffer); + circlebuf_free(&video->vframe_info_buffer); memset(&video->textures_rendered, 0, sizeof(video->textures_rendered));