libobs: Remove unnecessary frame pipelining

Remove three instances of unnecessary double-buffering. They are not needed to avoid stalls, and cause increased memory traffic when measured on Intel HD 530, presumably because texture data will remain in cache if sampled immediately after write. (Note: GPU timings from Intel GPA are volatile.) NV12, 3 Draws: RGBA -> UYVX: 628 us -> 543 us UYVX -> Y: 522 us -> 507 us UYVX -> UV: 315 us -> 187 us Total, Duration: 1594 us -> 1153 us Total, GTI Read Throughput: 25.2 MB -> 15.9 MB
2019-05-24 01:03:21 -07:00 · 2019-05-24 01:03:21 -07:00 · 8d6ed988e6
commit 8d6ed988e6
parent f130e8755b
3 changed files with 100 additions and 145 deletions
--- a/libobs/obs-internal.h
+++ b/libobs/obs-internal.h
@ -240,14 +240,13 @@ struct obs_tex_frame {
 struct obs_core_video {
 	graphics_t                      *graphics;
 	gs_stagesurf_t                  *copy_surfaces[NUM_TEXTURES];
-	gs_texture_t                    *render_textures[NUM_TEXTURES];
-	gs_texture_t                    *output_textures[NUM_TEXTURES];
-	gs_texture_t                    *convert_textures[NUM_TEXTURES];
-	gs_texture_t                    *convert_uv_textures[NUM_TEXTURES];
-	bool                            textures_rendered[NUM_TEXTURES];
-	bool                            textures_output[NUM_TEXTURES];
+	gs_texture_t                    *render_texture;
+	gs_texture_t                    *output_texture;
+	gs_texture_t                    *convert_texture;
+	gs_texture_t                    *convert_uv_texture;
+	bool                            texture_rendered;
 	bool                            textures_copied[NUM_TEXTURES];
-	bool                            textures_converted[NUM_TEXTURES];
+	bool                            texture_converted;
 	bool                            using_nv12_tex;
 	struct circlebuf                vframe_info_buffer;
 	struct circlebuf                vframe_info_buffer_gpu;
--- a/libobs/obs-video.c
+++ b/libobs/obs-video.c
@ -116,8 +116,7 @@ static inline void unmap_last_surface(struct obs_core_video *video)
 }

 static const char *render_main_texture_name = "render_main_texture";
-static inline void render_main_texture(struct obs_core_video *video,
-		int cur_texture)
+static inline void render_main_texture(struct obs_core_video *video)
 {
 	profile_start(render_main_texture_name);
 	GS_DEBUG_MARKER_BEGIN(GS_DEBUG_COLOR_MAIN_TEXTURE,
@ -126,7 +125,7 @@ static inline void render_main_texture(struct obs_core_video *video,
 	struct vec4 clear_color;
 	vec4_set(&clear_color, 0.0f, 0.0f, 0.0f, 0.0f);

-	gs_set_render_target(video->render_textures[cur_texture], NULL);
+	gs_set_render_target(video->render_texture, NULL);
 	gs_clear(GS_CLEAR_COLOR, &clear_color, 1.0f, 0);

 	set_render_size(video->base_width, video->base_height);
@ -145,7 +144,7 @@ static inline void render_main_texture(struct obs_core_video *video,

 	obs_view_render(&obs->data.main_view);

-	video->textures_rendered[cur_texture] = true;
+	video->texture_rendered = true;

 	GS_DEBUG_MARKER_END();
 	profile_end(render_main_texture_name);
@ -199,13 +198,12 @@ static inline gs_effect_t *get_scale_effect(struct obs_core_video *video,
 }

 static const char *render_output_texture_name = "render_output_texture";
-static inline void render_output_texture(struct obs_core_video *video,
-		int cur_texture, int prev_texture)
+static inline void render_output_texture(struct obs_core_video *video)
 {
 	profile_start(render_output_texture_name);

-	gs_texture_t *texture = video->render_textures[prev_texture];
-	gs_texture_t *target  = video->output_textures[cur_texture];
+	gs_texture_t *texture = video->render_texture;
+	gs_texture_t *target  = video->output_texture;
 	uint32_t     width   = gs_texture_get_width(target);
 	uint32_t     height  = gs_texture_get_height(target);
 	struct vec2  base_i;
@ -230,9 +228,6 @@ static inline void render_output_texture(struct obs_core_video *video,
 			"base_dimension_i");
 	size_t      passes, i;

-	if (!video->textures_rendered[prev_texture])
-		goto end;
-
 	gs_set_render_target(target, NULL);
 	set_render_size(width, height);

@ -252,9 +247,6 @@ static inline void render_output_texture(struct obs_core_video *video,
 	gs_technique_end(tech);
 	gs_enable_blending(true);

-	video->textures_output[cur_texture] = true;
-
-end:
 	profile_end(render_output_texture_name);
 }

@ -265,13 +257,12 @@ static inline void set_eparam(gs_effect_t *effect, const char *name, float val)
 }

 static const char *render_convert_texture_name = "render_convert_texture";
-static void render_convert_texture(struct obs_core_video *video,
-		int cur_texture, int prev_texture)
+static void render_convert_texture(struct obs_core_video *video)
 {
 	profile_start(render_convert_texture_name);

-	gs_texture_t *texture = video->output_textures[prev_texture];
-	gs_texture_t *target  = video->convert_textures[cur_texture];
+	gs_texture_t *texture = video->output_texture;
+	gs_texture_t *target  = video->convert_texture;
 	float        fwidth  = (float)video->output_width;
 	float        fheight = (float)video->output_height;
 	size_t       passes, i;
@ -281,9 +272,6 @@ static void render_convert_texture(struct obs_core_video *video,
 	gs_technique_t *tech    = gs_effect_get_technique(effect,
 			video->conversion_tech);

-	if (!video->textures_output[prev_texture])
-		goto end;
-
 	set_eparam(effect, "u_plane_offset", (float)video->plane_offsets[1]);
 	set_eparam(effect, "v_plane_offset", (float)video->plane_offsets[2]);
 	set_eparam(effect, "width",  fwidth);
@ -312,17 +300,15 @@ static void render_convert_texture(struct obs_core_video *video,
 	gs_technique_end(tech);
 	gs_enable_blending(true);

-	video->textures_converted[cur_texture] = true;
+	video->texture_converted = true;

-end:
 	profile_end(render_convert_texture_name);
 }

 static void render_nv12(struct obs_core_video *video, gs_texture_t *target,
-		int cur_texture, int prev_texture, const char *tech_name,
-		uint32_t width, uint32_t height)
+		const char *tech_name, uint32_t width, uint32_t height)
 {
-	gs_texture_t *texture = video->output_textures[prev_texture];
+	gs_texture_t *texture = video->output_texture;

 	gs_effect_t    *effect  = video->conversion_effect;
 	gs_eparam_t    *image   = gs_effect_get_param_by_name(effect, "image");
@ -343,35 +329,26 @@ static void render_nv12(struct obs_core_video *video, gs_texture_t *target,
 	}
 	gs_technique_end(tech);
 	gs_enable_blending(true);
-
-	UNUSED_PARAMETER(cur_texture);
 }

 static const char *render_convert_nv12_name = "render_convert_texture_nv12";
-static void render_convert_texture_nv12(struct obs_core_video *video,
-		int cur_texture, int prev_texture)
+static void render_convert_texture_nv12(struct obs_core_video *video)
 {
 	profile_start(render_convert_nv12_name);

-	if (!video->textures_output[prev_texture])
-		goto end;
-
-	render_nv12(video, video->convert_textures[cur_texture],
-			cur_texture, prev_texture, "NV12_Y",
+	render_nv12(video, video->convert_texture, "NV12_Y",
 			video->output_width, video->output_height);
-	render_nv12(video, video->convert_uv_textures[cur_texture],
-			cur_texture, prev_texture, "NV12_UV",
+	render_nv12(video, video->convert_uv_texture, "NV12_UV",
 			video->output_width / 2, video->output_height / 2);

-	video->textures_converted[cur_texture] = true;
+	video->texture_converted = true;

-end:
 	profile_end(render_convert_nv12_name);
 }

 static const char *stage_output_texture_name = "stage_output_texture";
 static inline void stage_output_texture(struct obs_core_video *video,
-		int cur_texture, int prev_texture)
+		int cur_texture)
 {
 	profile_start(stage_output_texture_name);

@ -380,11 +357,11 @@ static inline void stage_output_texture(struct obs_core_video *video,
 	gs_stagesurf_t *copy = video->copy_surfaces[cur_texture];

 	if (video->gpu_conversion) {
-		texture = video->convert_textures[prev_texture];
-		texture_ready = video->textures_converted[prev_texture];
+		texture = video->convert_texture;
+		texture_ready = video->texture_converted;
 	} else {
-		texture = video->output_textures[prev_texture];
-		texture_ready = video->textures_output[prev_texture];
+		texture = video->output_texture;
+		texture_ready = true;
 	}

 	unmap_last_surface(video);
@ -402,7 +379,7 @@ end:

 #ifdef _WIN32
 static inline bool queue_frame(struct obs_core_video *video, bool raw_active,
-		struct obs_vframe_info *vframe_info, int prev_texture)
+		struct obs_vframe_info *vframe_info)
 {
 	bool duplicate = !video->gpu_encoder_avail_queue.size ||
 		(video->gpu_encoder_queue.size && vframe_info->count > 1);
@ -435,13 +412,13 @@ static inline bool queue_frame(struct obs_core_video *video, bool raw_active,
 	 * reason.  otherwise, it goes to the 'duplicate' case above, which
 	 * will ensure better performance. */
 	if (raw_active || vframe_info->count > 1) {
-		gs_copy_texture(tf.tex, video->convert_textures[prev_texture]);
+		gs_copy_texture(tf.tex, video->convert_texture);
 	} else {
-		gs_texture_t *tex = video->convert_textures[prev_texture];
-		gs_texture_t *tex_uv = video->convert_uv_textures[prev_texture];
+		gs_texture_t *tex = video->convert_texture;
+		gs_texture_t *tex_uv = video->convert_uv_texture;

-		video->convert_textures[prev_texture] = tf.tex;
-		video->convert_uv_textures[prev_texture] = tf.tex_uv;
+		video->convert_texture = tf.tex;
+		video->convert_uv_texture = tf.tex_uv;

 		tf.tex = tex;
 		tf.tex_uv = tex_uv;
@ -463,18 +440,17 @@ finish:
 extern void full_stop(struct obs_encoder *encoder);

 static inline void encode_gpu(struct obs_core_video *video, bool raw_active,
-		struct obs_vframe_info *vframe_info, int prev_texture)
+		struct obs_vframe_info *vframe_info)
 {
-	while (queue_frame(video, raw_active, vframe_info, prev_texture));
+	while (queue_frame(video, raw_active, vframe_info));
 }

 static const char *output_gpu_encoders_name = "output_gpu_encoders";
-static void output_gpu_encoders(struct obs_core_video *video, bool raw_active,
-		int prev_texture)
+static void output_gpu_encoders(struct obs_core_video *video, bool raw_active)
 {
 	profile_start(output_gpu_encoders_name);

-	if (!video->textures_converted[prev_texture])
+	if (!video->texture_converted)
 		goto end;
 	if (!video->vframe_info_buffer_gpu.size)
 		goto end;
@ -484,7 +460,7 @@ static void output_gpu_encoders(struct obs_core_video *video, bool raw_active,
 			sizeof(vframe_info));

 	pthread_mutex_lock(&video->gpu_encoder_mutex);
-	encode_gpu(video, raw_active, &vframe_info, prev_texture);
+	encode_gpu(video, raw_active, &vframe_info);
 	pthread_mutex_unlock(&video->gpu_encoder_mutex);

 end:
@ -492,45 +468,40 @@ end:
 }
 #endif

-static inline void render_video(struct obs_core_video *video,
-		bool raw_active, const bool gpu_active,
-		int cur_texture, int prev_texture)
+static inline void render_video(struct obs_core_video *video, bool raw_active,
+		const bool gpu_active, int cur_texture)
 {
 	gs_begin_scene();

 	gs_enable_depth_test(false);
 	gs_set_cull_mode(GS_NEITHER);

-	render_main_texture(video, cur_texture);
+	render_main_texture(video);

 	if (raw_active || gpu_active) {
-		render_output_texture(video, cur_texture, prev_texture);
+		render_output_texture(video);

 #ifdef _WIN32
 		if (gpu_active) {
 			gs_flush();
 		}
 #endif
-	}

-	if (raw_active || gpu_active) {
 		if (video->gpu_conversion) {
 			if (video->using_nv12_tex)
-				render_convert_texture_nv12(video,
-						cur_texture, prev_texture);
+				render_convert_texture_nv12(video);
 			else
-				render_convert_texture(video,
-						cur_texture, prev_texture);
+				render_convert_texture(video);
 		}

 #ifdef _WIN32
 		if (gpu_active) {
 			gs_flush();
-			output_gpu_encoders(video, raw_active, prev_texture);
+			output_gpu_encoders(video, raw_active);
 		}
 #endif
 		if (raw_active)
-			stage_output_texture(video, cur_texture, prev_texture);
+			stage_output_texture(video, cur_texture);
 	}

 	gs_set_render_target(NULL, NULL);
@ -777,7 +748,7 @@ static inline void output_frame(bool raw_active, const bool gpu_active)
 	profile_start(output_frame_render_video_name);
 	GS_DEBUG_MARKER_BEGIN(GS_DEBUG_COLOR_RENDER_VIDEO,
 			output_frame_render_video_name);
-	render_video(video, raw_active, gpu_active, cur_texture, prev_texture);
+	render_video(video, raw_active, gpu_active, cur_texture);
 	GS_DEBUG_MARKER_END();
 	profile_end(output_frame_render_video_name);

@ -814,9 +785,8 @@ static inline void output_frame(bool raw_active, const bool gpu_active)
 static void clear_base_frame_data(void)
 {
 	struct obs_core_video *video = &obs->video;
-	memset(video->textures_rendered, 0, sizeof(video->textures_rendered));
-	memset(video->textures_output, 0, sizeof(video->textures_output));
-	memset(video->textures_converted, 0, sizeof(video->textures_converted));
+	video->texture_rendered = false;
+	video->texture_converted = false;
 	circlebuf_free(&video->vframe_info_buffer);
 	video->cur_texture = 0;
 }
--- a/libobs/obs.c
+++ b/libobs/obs.c
@ -181,29 +181,27 @@ static bool obs_init_gpu_conversion(struct obs_video_info *ovi)
 	else
 		blog(LOG_INFO, "NV12 texture support not available");

-	for (size_t i = 0; i < NUM_TEXTURES; i++) {
 #ifdef _WIN32
-		if (video->using_nv12_tex) {
-			gs_texture_create_nv12(
-					&video->convert_textures[i],
-					&video->convert_uv_textures[i],
-					ovi->output_width, ovi->output_height,
-					GS_RENDER_TARGET | GS_SHARED_KM_TEX);
-			if (!video->convert_uv_textures[i])
-				return false;
-		} else {
+	if (video->using_nv12_tex) {
+		gs_texture_create_nv12(
+				&video->convert_texture,
+				&video->convert_uv_texture,
+				ovi->output_width, ovi->output_height,
+				GS_RENDER_TARGET | GS_SHARED_KM_TEX);
+		if (!video->convert_uv_texture)
+			return false;
+	} else {
 #endif
-			video->convert_textures[i] = gs_texture_create(
-					ovi->output_width,
-					video->conversion_height,
-					GS_RGBA, 1, NULL, GS_RENDER_TARGET);
+		video->convert_texture = gs_texture_create(
+				ovi->output_width,
+				video->conversion_height,
+				GS_RGBA, 1, NULL, GS_RENDER_TARGET);
 #ifdef _WIN32
-		}
+	}
 #endif

-		if (!video->convert_textures[i])
-			return false;
-	}
+	if (!video->convert_texture)
+		return false;

 	return true;
 }
@ -233,22 +231,22 @@ static bool obs_init_textures(struct obs_video_info *ovi)
 #ifdef _WIN32
 		}
 #endif
-
-		video->render_textures[i] = gs_texture_create(
-				ovi->base_width, ovi->base_height,
-				GS_RGBA, 1, NULL, GS_RENDER_TARGET);
-
-		if (!video->render_textures[i])
-			return false;
-
-		video->output_textures[i] = gs_texture_create(
-				ovi->output_width, ovi->output_height,
-				GS_RGBA, 1, NULL, GS_RENDER_TARGET);
-
-		if (!video->output_textures[i])
-			return false;
 	}

+	video->render_texture = gs_texture_create(
+		ovi->base_width, ovi->base_height,
+		GS_RGBA, 1, NULL, GS_RENDER_TARGET);
+
+	if (!video->render_texture)
+		return false;
+
+	video->output_texture = gs_texture_create(
+		ovi->output_width, ovi->output_height,
+		GS_RGBA, 1, NULL, GS_RENDER_TARGET);
+
+	if (!video->output_texture)
+		return false;
+
 	return true;
 }

@ -485,31 +483,27 @@ static void obs_free_video(void)

 		for (size_t i = 0; i < NUM_TEXTURES; i++) {
 			gs_stagesurface_destroy(video->copy_surfaces[i]);
-			gs_texture_destroy(video->render_textures[i]);
-			gs_texture_destroy(video->convert_textures[i]);
-			gs_texture_destroy(video->convert_uv_textures[i]);
-			gs_texture_destroy(video->output_textures[i]);
-
-			video->copy_surfaces[i]       = NULL;
-			video->render_textures[i]     = NULL;
-			video->convert_textures[i]    = NULL;
-			video->convert_uv_textures[i] = NULL;
-			video->output_textures[i]     = NULL;
+			video->copy_surfaces[i] = NULL;
 		}

+		gs_texture_destroy(video->render_texture);
+		gs_texture_destroy(video->convert_texture);
+		gs_texture_destroy(video->convert_uv_texture);
+		gs_texture_destroy(video->output_texture);
+		video->render_texture     = NULL;
+		video->convert_texture    = NULL;
+		video->convert_uv_texture = NULL;
+		video->output_texture     = NULL;
+
 		gs_leave_context();

 		circlebuf_free(&video->vframe_info_buffer);
 		circlebuf_free(&video->vframe_info_buffer_gpu);

-		memset(&video->textures_rendered, 0,
-				sizeof(video->textures_rendered));
-		memset(&video->textures_output, 0,
-				sizeof(video->textures_output));
-		memset(&video->textures_copied, 0,
+		video->texture_rendered = false;;
+		memset(video->textures_copied, 0,
 				sizeof(video->textures_copied));
-		memset(&video->textures_converted, 0,
-				sizeof(video->textures_converted));
+		video->texture_converted = false;;

 		pthread_mutex_destroy(&video->gpu_encoder_mutex);
 		pthread_mutex_init_value(&video->gpu_encoder_mutex);
@ -1631,22 +1625,18 @@ void obs_render_main_view(void)

 void obs_render_main_texture(void)
 {
-	struct obs_core_video *video = &obs->video;
+	struct obs_core_video *video;
 	gs_texture_t *tex;
 	gs_effect_t *effect;
 	gs_eparam_t *param;
-	int last_tex;

 	if (!obs) return;

-	last_tex = video->cur_texture == 0
-		? NUM_TEXTURES - 1
-		: video->cur_texture - 1;
-
-	if (!video->textures_rendered[last_tex])
+	video = &obs->video;
+	if (!video->texture_rendered)
 		return;

-	tex = video->render_textures[last_tex];
+	tex = video->render_texture;
 	effect = obs_get_base_effect(OBS_EFFECT_DEFAULT);
 	param = gs_effect_get_param_by_name(effect, "image");
 	gs_effect_set_texture(param, tex);
@ -1662,19 +1652,15 @@ void obs_render_main_texture(void)

 gs_texture_t *obs_get_main_texture(void)
 {
-	struct obs_core_video *video = &obs->video;
-	int last_tex;
+	struct obs_core_video *video;

 	if (!obs) return NULL;

-	last_tex = video->cur_texture == 0
-		? NUM_TEXTURES - 1
-		: video->cur_texture - 1;
-
-	if (!video->textures_rendered[last_tex])
+	video = &obs->video;
+	if (!video->texture_rendered)
 		return NULL;

-	return video->render_textures[last_tex];
+	return video->render_texture;
 }

 void obs_set_master_volume(float volume)