libobs: Use tex.Load for reverse NV12/I420 funcs

Eventually, most things should be replaced with Load where applicable
(though in some cases sub-pixel sampling is desired).

This commit also fixes a bug where NV12 async sources wouldn't render
correctly.
This commit is contained in:
jp9000 2017-05-06 01:22:51 -07:00
parent e04ab3da7f
commit e7f754df97
2 changed files with 41 additions and 52 deletions

View File

@ -37,6 +37,11 @@ uniform float input_height_i;
uniform float input_width_i_d2;
uniform float input_height_i_d2;
uniform int int_width;
uniform int int_input_width;
uniform int int_u_plane_offset;
uniform int int_v_plane_offset;
uniform texture2d image;
sampler_state def_sampler {
@ -235,6 +240,12 @@ float4 PSPlanar444(VertInOut vert_in) : TARGET
return out_val[2];
}
float GetIntOffsetColor(int offset)
{
return image.Load(int2(offset % int_input_width,
offset / int_input_width)).r;
}
float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
int y0_pos, int y1_pos) : TARGET
{
@ -250,59 +261,37 @@ float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
texel[u_pos], texel[v_pos], 1.0);
}
float GetOffsetColor(float offset)
{
float2 uv;
offset += PRECISION_OFFSET;
uv.x = floor(fmod(offset, input_width)) * input_width_i;
uv.y = floor(offset * input_width_i) * input_height_i;
uv.xy += float2(input_width_i_d2, input_height_i_d2);
return image.Sample(def_sampler, uv).r;
}
float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
{
float x = vert_in.uv.x;
float y = vert_in.uv.y;
float x_offset = floor(x * width + PRECISION_OFFSET);
float y_offset = floor(y * height + PRECISION_OFFSET);
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
float lum_offset = y_offset * width + x_offset + PRECISION_OFFSET;
lum_offset = floor(lum_offset);
float ch_offset = floor(y_offset * 0.5 + PRECISION_OFFSET) * width_d2 +
(x_offset * 0.5) + PRECISION_OFFSET;
ch_offset = floor(ch_offset);
int lum_offset = y * int_width + x;
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
int chroma1 = int_u_plane_offset + chroma_offset;
int chroma2 = int_v_plane_offset + chroma_offset;
return float4(
GetOffsetColor(lum_offset),
GetOffsetColor(u_plane_offset + ch_offset),
GetOffsetColor(v_plane_offset + ch_offset),
GetIntOffsetColor(lum_offset),
GetIntOffsetColor(chroma1),
GetIntOffsetColor(chroma2),
1.0
);
}
float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
{
float x = vert_in.uv.x;
float y = vert_in.uv.y;
float x_offset = floor(x * width + PRECISION_OFFSET);
float y_offset = floor(y * height + PRECISION_OFFSET);
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
float lum_offset = y_offset * width + x_offset + PRECISION_OFFSET;
lum_offset = floor(lum_offset);
float ch_offset = floor(y_offset * 0.5 + PRECISION_OFFSET) * width_d2 +
(x_offset * 0.5);
ch_offset = floor(ch_offset * 2.0 + PRECISION_OFFSET);
int lum_offset = y * int_width + x;
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
int chroma = int_u_plane_offset + chroma_offset * 2;
return float4(
GetOffsetColor(lum_offset),
GetOffsetColor(u_plane_offset + ch_offset),
GetOffsetColor(u_plane_offset + ch_offset + 1.0),
GetIntOffsetColor(lum_offset),
GetIntOffsetColor(chroma),
GetIntOffsetColor(chroma + 1),
1.0
);
}

View File

@ -1490,6 +1490,12 @@ static inline void set_eparam(gs_effect_t *effect, const char *name, float val)
gs_effect_set_float(param, val);
}
static inline void set_eparami(gs_effect_t *effect, const char *name, int val)
{
gs_eparam_t *param = gs_effect_get_param_by_name(effect, name);
gs_effect_set_int(param, val);
}
static bool update_async_texrender(struct obs_source *source,
const struct obs_source_frame *frame,
gs_texture_t *tex, gs_texrender_t *texrender)
@ -1517,22 +1523,16 @@ static bool update_async_texrender(struct obs_source *source,
gs_effect_set_texture(gs_effect_get_param_by_name(conv, "image"), tex);
set_eparam(conv, "width", (float)cx);
set_eparam(conv, "height", (float)cy);
set_eparam(conv, "width_i", 1.0f / cx);
set_eparam(conv, "height_i", 1.0f / cy);
set_eparam(conv, "width_d2", cx * 0.5f);
set_eparam(conv, "height_d2", cy * 0.5f);
set_eparam(conv, "width_d2_i", 1.0f / (cx * 0.5f));
set_eparam(conv, "height_d2_i", 1.0f / (cy * 0.5f));
set_eparam(conv, "input_width", convert_width);
set_eparam(conv, "input_height", convert_height);
set_eparam(conv, "input_width_i", 1.0f / convert_width);
set_eparam(conv, "input_height_i", 1.0f / convert_height);
set_eparam(conv, "input_width_i_d2", (1.0f / convert_width) * 0.5f);
set_eparam(conv, "input_height_i_d2", (1.0f / convert_height) * 0.5f);
set_eparam(conv, "u_plane_offset",
(float)source->async_plane_offset[0]);
set_eparam(conv, "v_plane_offset",
(float)source->async_plane_offset[1]);
set_eparami(conv, "int_width", (int)cx);
set_eparami(conv, "int_input_width", (int)source->async_convert_width);
set_eparami(conv, "int_u_plane_offset",
(int)source->async_plane_offset[0]);
set_eparami(conv, "int_v_plane_offset",
(int)source->async_plane_offset[1]);
gs_ortho(0.f, (float)cx, 0.f, (float)cy, -100.f, 100.f);