libobs: Add frame caching for async video sources

The temporary unoptimized code we were using before just completely
allocated a new copy of each frame every single time a new async frame
was output by the source plugin.  This just creates a cache of frames as
needed for the current format/width/height to minimize the allocation
and deallocation.  If new frames come in that are of a different
format/width/height, it'll just clear the cache.  This is a fairly
important optimization.
This commit is contained in:
jp9000 2015-01-04 00:18:36 -08:00
parent ae39e5bb77
commit c3e498d25f
2 changed files with 94 additions and 31 deletions

View File

@ -274,6 +274,11 @@ extern void obs_context_data_setname(struct obs_context_data *context,
/* ------------------------------------------------------------------------- */
/* sources */
struct async_frame {
struct obs_source_frame *frame;
bool used;
};
struct obs_source {
struct obs_context_data context;
struct obs_source_info info;
@ -334,6 +339,8 @@ struct obs_source {
int async_plane_offset[2];
bool async_flip;
bool async_active;
bool async_reset_texture;
DARRAY(struct async_frame) async_cache;
DARRAY(struct obs_source_frame*)async_frames;
pthread_mutex_t async_mutex;
uint32_t async_width;

View File

@ -249,8 +249,8 @@ void obs_source_destroy(struct obs_source *source)
for (i = 0; i < source->filters.num; i++)
obs_source_release(source->filters.array[i]);
for (i = 0; i < source->async_frames.num; i++)
obs_source_frame_destroy(source->async_frames.array[i]);
for (i = 0; i < source->async_cache.num; i++)
obs_source_frame_destroy(source->async_cache.array[i].frame);
gs_enter_context(obs->video.graphics);
gs_texrender_destroy(source->async_convert_texrender);
@ -264,6 +264,7 @@ void obs_source_destroy(struct obs_source *source)
audio_resampler_destroy(source->resampler);
gs_texrender_destroy(source->filter_texrender);
da_free(source->async_cache);
da_free(source->async_frames);
da_free(source->filters);
pthread_mutex_destroy(&source->filter_mutex);
@ -726,7 +727,7 @@ static inline enum convert_type get_convert_type(enum video_format format)
}
static inline bool set_packed422_sizes(struct obs_source *source,
struct obs_source_frame *frame)
const struct obs_source_frame *frame)
{
source->async_convert_height = frame->height;
source->async_convert_width = frame->width / 2;
@ -735,7 +736,7 @@ static inline bool set_packed422_sizes(struct obs_source *source,
}
static inline bool set_planar420_sizes(struct obs_source *source,
struct obs_source_frame *frame)
const struct obs_source_frame *frame)
{
uint32_t size = frame->width * frame->height;
size += size/2;
@ -750,7 +751,7 @@ static inline bool set_planar420_sizes(struct obs_source *source,
}
static inline bool set_nv12_sizes(struct obs_source *source,
struct obs_source_frame *frame)
const struct obs_source_frame *frame)
{
uint32_t size = frame->width * frame->height;
size += size/2;
@ -763,7 +764,7 @@ static inline bool set_nv12_sizes(struct obs_source *source,
}
static inline bool init_gpu_conversion(struct obs_source *source,
struct obs_source_frame *frame)
const struct obs_source_frame *frame)
{
switch (get_convert_type(frame->format)) {
case CONVERT_422_Y:
@ -797,17 +798,14 @@ static inline enum gs_color_format convert_video_format(
}
static inline bool set_async_texture_size(struct obs_source *source,
struct obs_source_frame *frame)
const struct obs_source_frame *frame)
{
enum convert_type prev, cur;
prev = get_convert_type(source->async_format);
cur = get_convert_type(frame->format);
if (source->async_texture) {
if (source->async_width == frame->width &&
source->async_height == frame->height &&
prev == cur)
return true;
}
enum convert_type cur = get_convert_type(frame->format);
if (!source->async_reset_texture)
return true;
source->async_reset_texture = false;
gs_texture_destroy(source->async_texture);
gs_texrender_destroy(source->async_convert_texrender);
@ -835,12 +833,7 @@ static inline bool set_async_texture_size(struct obs_source *source,
format, 1, NULL, GS_DYNAMIC);
}
if (!source->async_texture)
return false;
source->async_width = frame->width;
source->async_height = frame->height;
return true;
return !!source->async_texture;
}
static void upload_raw_frame(gs_texture_t *tex,
@ -971,7 +964,6 @@ static bool update_async_texture(struct obs_source *source,
uint8_t *ptr;
uint32_t linesize;
source->async_format = frame->format;
source->async_flip = frame->flip;
source->async_full_range = frame->full_range;
memcpy(source->async_color_matrix, frame->color_matrix,
@ -1359,12 +1351,62 @@ static void copy_frame_data(struct obs_source_frame *dst,
}
}
static inline struct obs_source_frame *cache_video(
static inline bool async_texture_changed(struct obs_source *source,
const struct obs_source_frame *frame)
{
/* TODO: use an actual cache */
struct obs_source_frame *new_frame = obs_source_frame_create(
frame->format, frame->width, frame->height);
enum convert_type prev, cur;
prev = get_convert_type(source->async_format);
cur = get_convert_type(frame->format);
return source->async_width != frame->width ||
source->async_height != frame->height ||
prev != cur;
}
static inline void free_async_cache(struct obs_source *source)
{
for (size_t i = 0; i < source->async_cache.num; i++)
obs_source_frame_destroy(source->async_cache.array[i].frame);
da_resize(source->async_cache, 0);
da_resize(source->async_frames, 0);
}
static inline struct obs_source_frame *cache_video(struct obs_source *source,
const struct obs_source_frame *frame)
{
struct obs_source_frame *new_frame = NULL;
pthread_mutex_lock(&source->async_mutex);
if (async_texture_changed(source, frame)) {
source->async_width = frame->width;
source->async_height = frame->height;
source->async_format = frame->format;
source->async_reset_texture = true;
free_async_cache(source);
}
for (size_t i = 0; i < source->async_cache.num; i++) {
struct async_frame *af = &source->async_cache.array[i];
if (!af->used) {
new_frame = af->frame;
af->used = true;
}
}
if (!new_frame) {
struct async_frame new_af;
new_frame = obs_source_frame_create(frame->format,
frame->width, frame->height);
new_af.frame = new_frame;
new_af.used = true;
da_push_back(source->async_cache, &new_af);
}
pthread_mutex_unlock(&source->async_mutex);
copy_frame_data(new_frame, frame);
return new_frame;
@ -1384,7 +1426,8 @@ void obs_source_output_video(obs_source_t *source,
if (!source)
return;
struct obs_source_frame *output = !!frame ? cache_video(frame) : NULL;
struct obs_source_frame *output = !!frame ?
cache_video(source, frame) : NULL;
pthread_mutex_lock(&source->filter_mutex);
output = filter_async_video(source, output);
@ -1610,6 +1653,19 @@ static inline bool frame_out_of_bounds(const obs_source_t *source, uint64_t ts)
return ((ts - source->last_frame_ts) > MAX_TS_VAR);
}
static void remove_async_frame(obs_source_t *source,
struct obs_source_frame *frame)
{
for (size_t i = 0; i < source->async_cache.num; i++) {
struct async_frame *f = &source->async_cache.array[i];
if (f->frame == frame) {
f->used = false;
break;
}
}
}
/* #define DEBUG_ASYNC_FRAMES 1 */
static bool ready_async_frame(obs_source_t *source, uint64_t sys_time)
@ -1623,7 +1679,7 @@ static bool ready_async_frame(obs_source_t *source, uint64_t sys_time)
if ((source->flags & OBS_SOURCE_FLAG_UNBUFFERED) != 0) {
while (source->async_frames.num > 1) {
da_erase(source->async_frames, 0);
obs_source_frame_destroy(next_frame);
remove_async_frame(source, next_frame);
next_frame = source->async_frames.array[0];
}
@ -1671,7 +1727,7 @@ static bool ready_async_frame(obs_source_t *source, uint64_t sys_time)
next_frame->timestamp);
#endif
obs_source_frame_destroy(frame);
remove_async_frame(source, frame);
if (source->async_frames.num == 1)
return true;
@ -1763,7 +1819,7 @@ void obs_source_release_frame(obs_source_t *source,
struct obs_source_frame *frame)
{
if (source && frame) {
obs_source_frame_destroy(frame);
remove_async_frame(source, frame);
obs_source_release(source);
}
}