obs-studio/libobs/data/format_conversion.effect
jp9000 f9b5da513a libobs: Fix tex.Load lookup (needs int3, not int2)
libobs' shader language is basically HLSL, and tex.Load uses an int3 for
2D textures, with texture mipmap index for the last component.  This bug
bypassed testing because the front-end automatically switches to OpenGL
if D3D11 initialization fails, and when converted to GLSL, works fine
because texelFetch only requires two components.  This also means
there's a bug in GLSL shader conversion code, because it's essentially
ignoring the third component when it shouldn't be.
2017-05-06 10:39:42 -07:00

371 lines
9.2 KiB
Plaintext

/******************************************************************************
Copyright (C) 2014 by Hugh Bailey <obs.jim@gmail.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
//#define DEBUGGING
uniform float4x4 ViewProj;
uniform float u_plane_offset;
uniform float v_plane_offset;
uniform float width;
uniform float height;
uniform float width_i;
uniform float height_i;
uniform float width_d2;
uniform float height_d2;
uniform float width_d2_i;
uniform float height_d2_i;
uniform float input_width;
uniform float input_height;
uniform float input_width_i;
uniform float input_height_i;
uniform float input_width_i_d2;
uniform float input_height_i_d2;
uniform int int_width;
uniform int int_input_width;
uniform int int_u_plane_offset;
uniform int int_v_plane_offset;
uniform texture2d image;
sampler_state def_sampler {
Filter = Linear;
AddressU = Clamp;
AddressV = Clamp;
};
struct VertInOut {
float4 pos : POSITION;
float2 uv : TEXCOORD0;
};
VertInOut VSDefault(VertInOut vert_in)
{
VertInOut vert_out;
vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj);
vert_out.uv = vert_in.uv;
return vert_out;
}
/* used to prevent internal GPU precision issues width fmod in particular */
#define PRECISION_OFFSET 0.2
float4 PSNV12(VertInOut vert_in) : TARGET
{
float v_mul = floor(vert_in.uv.y * input_height);
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
byte_offset += PRECISION_OFFSET;
float2 sample_pos[4];
if (byte_offset < u_plane_offset) {
#ifdef DEBUGGING
return float4(1.0, 1.0, 1.0, 1.0);
#endif
float lum_u = floor(fmod(byte_offset, width)) * width_i;
float lum_v = floor(byte_offset * width_i) * height_i;
/* move to texel centers to sample the 4 pixels properly */
lum_u += width_i * 0.5;
lum_v += height_i * 0.5;
sample_pos[0] = float2(lum_u, lum_v);
sample_pos[1] = float2(lum_u += width_i, lum_v);
sample_pos[2] = float2(lum_u += width_i, lum_v);
sample_pos[3] = float2(lum_u + width_i, lum_v);
float4x4 out_val = float4x4(
image.Sample(def_sampler, sample_pos[0]),
image.Sample(def_sampler, sample_pos[1]),
image.Sample(def_sampler, sample_pos[2]),
image.Sample(def_sampler, sample_pos[3])
);
return transpose(out_val)[1];
} else {
#ifdef DEBUGGING
return float4(0.5, 0.2, 0.5, 0.2);
#endif
float new_offset = byte_offset - u_plane_offset;
float ch_u = floor(fmod(new_offset, width)) * width_i;
float ch_v = floor(new_offset * width_i) * height_d2_i;
float width_i2 = width_i*2.0;
/* move to the borders of each set of 4 pixels to force it
* to do bilinear averaging */
ch_u += width_i;
ch_v += height_i;
sample_pos[0] = float2(ch_u, ch_v);
sample_pos[1] = float2(ch_u + width_i2, ch_v);
return float4(
image.Sample(def_sampler, sample_pos[0]).rb,
image.Sample(def_sampler, sample_pos[1]).rb
);
}
}
float4 PSPlanar420(VertInOut vert_in) : TARGET
{
float v_mul = floor(vert_in.uv.y * input_height);
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
byte_offset += PRECISION_OFFSET;
float2 sample_pos[4];
if (byte_offset < u_plane_offset) {
#ifdef DEBUGGING
return float4(1.0, 1.0, 1.0, 1.0);
#endif
float lum_u = floor(fmod(byte_offset, width)) * width_i;
float lum_v = floor(byte_offset * width_i) * height_i;
/* move to texel centers to sample the 4 pixels properly */
lum_u += width_i * 0.5;
lum_v += height_i * 0.5;
sample_pos[0] = float2(lum_u, lum_v);
sample_pos[1] = float2(lum_u += width_i, lum_v);
sample_pos[2] = float2(lum_u += width_i, lum_v);
sample_pos[3] = float2(lum_u + width_i, lum_v);
} else {
#ifdef DEBUGGING
return ((byte_offset < v_plane_offset) ?
float4(0.5, 0.5, 0.5, 0.5) :
float4(0.2, 0.2, 0.2, 0.2));
#endif
float new_offset = byte_offset -
((byte_offset < v_plane_offset) ?
u_plane_offset : v_plane_offset);
float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i;
float ch_v = floor(new_offset * width_d2_i) * height_d2_i;
float width_i2 = width_i*2.0;
/* move to the borders of each set of 4 pixels to force it
* to do bilinear averaging */
ch_u += width_i;
ch_v += height_i;
sample_pos[0] = float2(ch_u, ch_v);
sample_pos[1] = float2(ch_u += width_i2, ch_v);
sample_pos[2] = float2(ch_u += width_i2, ch_v);
sample_pos[3] = float2(ch_u + width_i2, ch_v);
}
float4x4 out_val = float4x4(
image.Sample(def_sampler, sample_pos[0]),
image.Sample(def_sampler, sample_pos[1]),
image.Sample(def_sampler, sample_pos[2]),
image.Sample(def_sampler, sample_pos[3])
);
out_val = transpose(out_val);
if (byte_offset < u_plane_offset)
return out_val[1];
else if (byte_offset < v_plane_offset)
return out_val[0];
else
return out_val[2];
}
float4 PSPlanar444(VertInOut vert_in) : TARGET
{
float v_mul = floor(vert_in.uv.y * input_height);
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
byte_offset += PRECISION_OFFSET;
float new_byte_offset = byte_offset;
if (byte_offset >= v_plane_offset)
new_byte_offset -= v_plane_offset;
else if (byte_offset >= u_plane_offset)
new_byte_offset -= u_plane_offset;
float2 sample_pos[4];
float u_val = floor(fmod(new_byte_offset, width)) * width_i;
float v_val = floor(new_byte_offset * width_i) * height_i;
/* move to texel centers to sample the 4 pixels properly */
u_val += width_i * 0.5;
v_val += height_i * 0.5;
sample_pos[0] = float2(u_val, v_val);
sample_pos[1] = float2(u_val += width_i, v_val);
sample_pos[2] = float2(u_val += width_i, v_val);
sample_pos[3] = float2(u_val + width_i, v_val);
float4x4 out_val = float4x4(
image.Sample(def_sampler, sample_pos[0]),
image.Sample(def_sampler, sample_pos[1]),
image.Sample(def_sampler, sample_pos[2]),
image.Sample(def_sampler, sample_pos[3])
);
out_val = transpose(out_val);
if (byte_offset < u_plane_offset)
return out_val[1];
else if (byte_offset < v_plane_offset)
return out_val[0];
else
return out_val[2];
}
float GetIntOffsetColor(int offset)
{
return image.Load(int3(offset % int_input_width,
offset / int_input_width,
0)).r;
}
float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
int y0_pos, int y1_pos) : TARGET
{
float y = vert_in.uv.y;
float odd = floor(fmod(width * vert_in.uv.x + PRECISION_OFFSET, 2.0));
float x = floor(width_d2 * vert_in.uv.x + PRECISION_OFFSET) *
width_d2_i;
x += input_width_i_d2;
float4 texel = image.Sample(def_sampler, float2(x, y));
return float4(odd > 0.5 ? texel[y1_pos] : texel[y0_pos],
texel[u_pos], texel[v_pos], 1.0);
}
float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
{
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
int lum_offset = y * int_width + x;
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
int chroma1 = int_u_plane_offset + chroma_offset;
int chroma2 = int_v_plane_offset + chroma_offset;
return float4(
GetIntOffsetColor(lum_offset),
GetIntOffsetColor(chroma1),
GetIntOffsetColor(chroma2),
1.0
);
}
float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
{
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
int lum_offset = y * int_width + x;
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
int chroma = int_u_plane_offset + chroma_offset * 2;
return float4(
GetIntOffsetColor(lum_offset),
GetIntOffsetColor(chroma),
GetIntOffsetColor(chroma + 1),
1.0
);
}
technique Planar420
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPlanar420(vert_in);
}
}
technique Planar444
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPlanar444(vert_in);
}
}
technique NV12
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSNV12(vert_in);
}
}
technique UYVY_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPacked422_Reverse(vert_in, 2, 0, 1, 3);
}
}
technique YUY2_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPacked422_Reverse(vert_in, 1, 3, 2, 0);
}
}
technique YVYU_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPacked422_Reverse(vert_in, 3, 1, 2, 0);
}
}
technique I420_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPlanar420_Reverse(vert_in);
}
}
technique NV12_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSNV12_Reverse(vert_in);
}
}