libobs: Ensure paired encoders start up at the same time

With the new audio subsystem, audio buffering is minimal at all times. However, when the audio buffering is too small or non-existent, it would cause the audio encoders to start with a timestamp that was actually higher than the first video frame timestamp. Video would have some inherent buffering/delay, but then audio could return and encode almost immediately. This created a possible window of empty time between the first encoded video packet and the first encoded audio packet, where as audio buffering would cause the first audio packet's timestamp to always be way before the first video packet's timestamp. It would then incorrectly assume the two starting points were in sync. So instead of assuming the audio data is always first, this patch makes video wait for audio data comes in, and conversely buffers audio data until video comes in, and tries to find a starting point within that video data instead, ensuring a synced starting point whether audio buffering is active or not.
2016-01-30 12:50:51 -08:00 · 2016-01-30 12:50:51 -08:00 · 9aa18d3de5
commit 9aa18d3de5
parent a7067906f3
2 changed files with 105 additions and 37 deletions
--- a/libobs/obs-encoder.c
+++ b/libobs/obs-encoder.c
@ -830,8 +830,16 @@ static void receive_video(void *param, struct video_data *frame)
 	profile_start(receive_video_name);

 	struct obs_encoder    *encoder  = param;
+	struct obs_encoder    *pair     = encoder->paired_encoder;
 	struct encoder_frame  enc_frame;

+	if (!encoder->first_received && pair) {
+		if (!pair->first_received ||
+		    pair->first_raw_ts > frame->timestamp) {
+			goto wait_for_audio;
+		}
+	}
+
 	memset(&enc_frame, 0, sizeof(struct encoder_frame));

 	for (size_t i = 0; i < MAX_AV_PLANES; i++) {
@ -849,45 +857,19 @@ static void receive_video(void *param, struct video_data *frame)

 	encoder->cur_pts += encoder->timebase_num;

+wait_for_audio:
 	profile_end(receive_video_name);
 }

-static const char *buffer_audio_name = "buffer_audio";
-static bool buffer_audio(struct obs_encoder *encoder, struct audio_data *data)
+static void clear_audio(struct obs_encoder *encoder)
 {
-	profile_start(buffer_audio_name);
-
-	size_t samplerate = encoder->samplerate;
-	size_t size = data->frames * encoder->blocksize;
-	size_t offset_size = 0;
-
-	if (!encoder->start_ts && encoder->paired_encoder) {
-		uint64_t end_ts     = data->timestamp;
-		uint64_t v_start_ts = encoder->paired_encoder->start_ts;
-
-		/* no video yet, so don't start audio */
-		if (!v_start_ts)
-			goto fail;
-
-		/* audio starting point still not synced with video starting
-		 * point, so don't start audio */
-		end_ts += (uint64_t)data->frames * 1000000000ULL / samplerate;
-		if (end_ts <= v_start_ts)
-			goto fail;
-
-		/* ready to start audio, truncate if necessary */
-		if (data->timestamp < v_start_ts) {
-			uint64_t offset = v_start_ts - data->timestamp;
-			offset = (int)(offset * samplerate / 1000000000);
-			offset_size = (size_t)offset * encoder->blocksize;
-		}
-
-		encoder->start_ts = v_start_ts;
-
-	} else if (!encoder->start_ts && !encoder->paired_encoder) {
-		encoder->start_ts = data->timestamp;
-	}
+	for (size_t i = 0; i < encoder->planes; i++)
+		circlebuf_free(&encoder->audio_input_buffer[i]);
+}

+static inline void push_back_audio(struct obs_encoder *encoder,
+		struct audio_data *data, size_t size, size_t offset_size)
+{
 	size -= offset_size;

 	/* push in to the circular buffer */
@ -895,13 +877,92 @@ static bool buffer_audio(struct obs_encoder *encoder, struct audio_data *data)
 		for (size_t i = 0; i < encoder->planes; i++)
 			circlebuf_push_back(&encoder->audio_input_buffer[i],
 					data->data[i] + offset_size, size);
+}

-	profile_end(buffer_audio_name);
-	return true;
+static inline size_t calc_offset_size(struct obs_encoder *encoder,
+		uint64_t v_start_ts, uint64_t a_start_ts)
+{
+	uint64_t offset = v_start_ts - a_start_ts;
+	offset = (uint64_t)offset * (uint64_t)encoder->samplerate /
+		1000000000ULL;
+	return (size_t)offset * encoder->blocksize;
+}
+
+static void start_from_buffer(struct obs_encoder *encoder, uint64_t v_start_ts)
+{
+	size_t size = encoder->audio_input_buffer[0].size;
+	struct audio_data audio = {0};
+	size_t offset_size = 0;
+
+	for (size_t i = 0; i < MAX_AV_PLANES; i++) {
+		audio.data[i] = encoder->audio_input_buffer[i].data;
+		memset(&encoder->audio_input_buffer[i], 0,
+				sizeof(struct circlebuf));
+	}
+
+	if (encoder->first_raw_ts < v_start_ts)
+		offset_size = calc_offset_size(encoder, v_start_ts,
+				encoder->first_raw_ts);
+
+	push_back_audio(encoder, &audio, size, offset_size);
+
+	for (size_t i = 0; i < MAX_AV_PLANES; i++)
+		bfree(audio.data[i]);
+}
+
+static const char *buffer_audio_name = "buffer_audio";
+static bool buffer_audio(struct obs_encoder *encoder, struct audio_data *data)
+{
+	profile_start(buffer_audio_name);
+
+	size_t size = data->frames * encoder->blocksize;
+	size_t offset_size = 0;
+	bool success = true;
+
+	if (!encoder->start_ts && encoder->paired_encoder) {
+		uint64_t end_ts     = data->timestamp;
+		uint64_t v_start_ts = encoder->paired_encoder->start_ts;
+
+		/* no video yet, so don't start audio */
+		if (!v_start_ts) {
+			success = false;
+			goto fail;
+		}
+
+		/* audio starting point still not synced with video starting
+		 * point, so don't start audio */
+		end_ts += (uint64_t)data->frames * 1000000000ULL /
+			(uint64_t)encoder->samplerate;
+		if (end_ts <= v_start_ts) {
+			success = false;
+			goto fail;
+		}
+
+		/* ready to start audio, truncate if necessary */
+		if (data->timestamp < v_start_ts)
+			offset_size = calc_offset_size(encoder, v_start_ts,
+					data->timestamp);
+		if (data->timestamp <= v_start_ts)
+			clear_audio(encoder);
+
+		encoder->start_ts = v_start_ts;
+
+		/* use currently buffered audio instead */
+		if (v_start_ts < data->timestamp) {
+			start_from_buffer(encoder, v_start_ts);
+			goto skip_push;
+		}
+
+	} else if (!encoder->start_ts && !encoder->paired_encoder) {
+		encoder->start_ts = data->timestamp;
+	}

 fail:
+	push_back_audio(encoder, data, size, offset_size);
+
+skip_push:
 	profile_end(buffer_audio_name);
-	return false;
+	return success;
 }

 static void send_audio_data(struct obs_encoder *encoder)
@ -934,6 +995,12 @@ static void receive_audio(void *param, size_t mix_idx, struct audio_data *data)

 	struct obs_encoder *encoder = param;

+	if (!encoder->first_received) {
+		encoder->first_raw_ts = data->timestamp;
+		encoder->first_received = true;
+		clear_audio(encoder);
+	}
+
 	if (!buffer_audio(encoder, data))
 		goto end;

--- a/libobs/obs-internal.h
+++ b/libobs/obs-internal.h
@ -847,6 +847,7 @@ struct obs_encoder {
 	bool                            first_received;
 	struct obs_encoder              *paired_encoder;
 	int64_t                         offset_usec;
+	uint64_t                        first_raw_ts;
 	uint64_t                        start_ts;

 	pthread_mutex_t                 outputs_mutex;