libobs: Use tex.Load for reverse NV12/I420 funcs

Eventually, most things should be replaced with Load where applicable (though in some cases sub-pixel sampling is desired). This commit also fixes a bug where NV12 async sources wouldn't render correctly.
2017-05-06 01:22:51 -07:00 · 2017-05-06 01:22:51 -07:00 · e7f754df97
commit e7f754df97
parent e04ab3da7f
2 changed files with 41 additions and 52 deletions
--- a/libobs/data/format_conversion.effect
+++ b/libobs/data/format_conversion.effect
@ -37,6 +37,11 @@ uniform float     input_height_i;
 uniform float     input_width_i_d2;
 uniform float     input_height_i_d2;

+uniform int       int_width;
+uniform int       int_input_width;
+uniform int       int_u_plane_offset;
+uniform int       int_v_plane_offset;
+
 uniform texture2d image;

 sampler_state def_sampler {
@ -235,6 +240,12 @@ float4 PSPlanar444(VertInOut vert_in) : TARGET
 		return out_val[2];
 }

+float GetIntOffsetColor(int offset)
+{
+	return image.Load(int2(offset % int_input_width,
+	                       offset / int_input_width)).r;
+}
+
 float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
 		int y0_pos, int y1_pos) : TARGET
 {
@ -250,59 +261,37 @@ float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
 			texel[u_pos], texel[v_pos], 1.0);
 }

-float GetOffsetColor(float offset)
-{
-	float2 uv;
-
-	offset += PRECISION_OFFSET;
-	uv.x = floor(fmod(offset, input_width)) * input_width_i;
-	uv.y = floor(offset * input_width_i)    * input_height_i;
-
-	uv.xy += float2(input_width_i_d2, input_height_i_d2);
-
-	return image.Sample(def_sampler, uv).r;
-}
-
 float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
 {
-	float x = vert_in.uv.x;
-	float y = vert_in.uv.y;
-	float x_offset   = floor(x * width  + PRECISION_OFFSET);
-	float y_offset   = floor(y * height + PRECISION_OFFSET);
+	int x = int(vert_in.uv.x * width  + PRECISION_OFFSET);
+	int y = int(vert_in.uv.y * height + PRECISION_OFFSET);

-	float lum_offset = y_offset * width + x_offset + PRECISION_OFFSET;
-	lum_offset       = floor(lum_offset);
-
-	float ch_offset  = floor(y_offset * 0.5 + PRECISION_OFFSET) * width_d2 +
-		(x_offset * 0.5) + PRECISION_OFFSET;
-	ch_offset        = floor(ch_offset);
+	int lum_offset = y * int_width + x;
+	int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
+	int chroma1    = int_u_plane_offset + chroma_offset;
+	int chroma2    = int_v_plane_offset + chroma_offset;

 	return float4(
-		GetOffsetColor(lum_offset),
-		GetOffsetColor(u_plane_offset + ch_offset),
-		GetOffsetColor(v_plane_offset + ch_offset),
+		GetIntOffsetColor(lum_offset),
+		GetIntOffsetColor(chroma1),
+		GetIntOffsetColor(chroma2),
 		1.0
 	);
 }

 float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
 {
-	float x = vert_in.uv.x;
-	float y = vert_in.uv.y;
-	float x_offset   = floor(x * width  + PRECISION_OFFSET);
-	float y_offset   = floor(y * height + PRECISION_OFFSET);
+	int x = int(vert_in.uv.x * width  + PRECISION_OFFSET);
+	int y = int(vert_in.uv.y * height + PRECISION_OFFSET);

-	float lum_offset = y_offset * width + x_offset + PRECISION_OFFSET;
-	lum_offset       = floor(lum_offset);
-
-	float ch_offset  = floor(y_offset * 0.5 + PRECISION_OFFSET) * width_d2 +
-		(x_offset * 0.5);
-	ch_offset        = floor(ch_offset * 2.0 + PRECISION_OFFSET);
+	int lum_offset    = y * int_width + x;
+	int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
+	int chroma        = int_u_plane_offset + chroma_offset * 2;

 	return float4(
-		GetOffsetColor(lum_offset),
-		GetOffsetColor(u_plane_offset + ch_offset),
-		GetOffsetColor(u_plane_offset + ch_offset + 1.0),
+		GetIntOffsetColor(lum_offset),
+		GetIntOffsetColor(chroma),
+		GetIntOffsetColor(chroma + 1),
 		1.0
 	);
 }
--- a/libobs/obs-source.c
+++ b/libobs/obs-source.c
@ -1490,6 +1490,12 @@ static inline void set_eparam(gs_effect_t *effect, const char *name, float val)
 	gs_effect_set_float(param, val);
 }

+static inline void set_eparami(gs_effect_t *effect, const char *name, int val)
+{
+	gs_eparam_t *param = gs_effect_get_param_by_name(effect, name);
+	gs_effect_set_int(param, val);
+}
+
 static bool update_async_texrender(struct obs_source *source,
 		const struct obs_source_frame *frame,
 		gs_texture_t *tex, gs_texrender_t *texrender)
@ -1517,22 +1523,16 @@ static bool update_async_texrender(struct obs_source *source,
 	gs_effect_set_texture(gs_effect_get_param_by_name(conv, "image"), tex);
 	set_eparam(conv, "width",  (float)cx);
 	set_eparam(conv, "height", (float)cy);
-	set_eparam(conv, "width_i",  1.0f / cx);
-	set_eparam(conv, "height_i", 1.0f / cy);
 	set_eparam(conv, "width_d2",  cx * 0.5f);
-	set_eparam(conv, "height_d2", cy * 0.5f);
 	set_eparam(conv, "width_d2_i",  1.0f / (cx * 0.5f));
-	set_eparam(conv, "height_d2_i", 1.0f / (cy * 0.5f));
-	set_eparam(conv, "input_width",  convert_width);
-	set_eparam(conv, "input_height", convert_height);
-	set_eparam(conv, "input_width_i",  1.0f / convert_width);
-	set_eparam(conv, "input_height_i", 1.0f / convert_height);
 	set_eparam(conv, "input_width_i_d2",  (1.0f / convert_width)  * 0.5f);
-	set_eparam(conv, "input_height_i_d2", (1.0f / convert_height) * 0.5f);
-	set_eparam(conv, "u_plane_offset",
-			(float)source->async_plane_offset[0]);
-	set_eparam(conv, "v_plane_offset",
-			(float)source->async_plane_offset[1]);
+
+	set_eparami(conv, "int_width", (int)cx);
+	set_eparami(conv, "int_input_width", (int)source->async_convert_width);
+	set_eparami(conv, "int_u_plane_offset",
+			(int)source->async_plane_offset[0]);
+	set_eparami(conv, "int_v_plane_offset",
+			(int)source->async_plane_offset[1]);

 	gs_ortho(0.f, (float)cx, 0.f, (float)cy, -100.f, 100.f);