69c215345a
Currently several shaders need "DrawMatrix" techniques to support the possibility that the input texture is a "YUV" format. Also, "DrawMatrix" is overloaded for translation in both directions when it is written for RGB to "YUV" only. A cleaner solution is to handle "YUV" to RGB up-front as part of format conversion, and ensure only RGB inputs reach the other shaders. This is necessary to someday perform correct scale filtering without the cost of redundant "YUV" conversions per texture tap. A necessary prerequisite for this is to add conversion support for VIDEO_FORMAT_I444, and that is now in place. There was already a hack in place to cover VIDEO_FORMAT_Y800. All other "YUV" formats already have conversion functions. "DrawMatrix" has been removed from shaders that only supported "YUV" to RGB conversions. It still exists in shaders that perform RGB to "YUV" conversions, and the implementations have been sanitized accordingly.
450 lines
11 KiB
Plaintext
450 lines
11 KiB
Plaintext
/******************************************************************************
|
|
Copyright (C) 2014 by Hugh Bailey <obs.jim@gmail.com>
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
******************************************************************************/
|
|
|
|
//#define DEBUGGING
|
|
|
|
uniform float4x4 ViewProj;
|
|
|
|
uniform float u_plane_offset;
|
|
uniform float v_plane_offset;
|
|
|
|
uniform float width;
|
|
uniform float height;
|
|
uniform float width_i;
|
|
uniform float height_i;
|
|
uniform float width_d2;
|
|
uniform float height_d2;
|
|
uniform float width_d2_i;
|
|
uniform float height_d2_i;
|
|
uniform float input_width;
|
|
uniform float input_height;
|
|
uniform float input_width_i;
|
|
uniform float input_height_i;
|
|
uniform float input_width_i_d2;
|
|
uniform float input_height_i_d2;
|
|
|
|
uniform int int_width;
|
|
uniform int int_input_width;
|
|
uniform int int_u_plane_offset;
|
|
uniform int int_v_plane_offset;
|
|
|
|
uniform float4x4 color_matrix;
|
|
uniform float3 color_range_min = {0.0, 0.0, 0.0};
|
|
uniform float3 color_range_max = {1.0, 1.0, 1.0};
|
|
|
|
uniform texture2d image;
|
|
|
|
sampler_state def_sampler {
|
|
Filter = Linear;
|
|
AddressU = Clamp;
|
|
AddressV = Clamp;
|
|
};
|
|
|
|
struct VertInOut {
|
|
float4 pos : POSITION;
|
|
float2 uv : TEXCOORD0;
|
|
};
|
|
|
|
VertInOut VSDefault(VertInOut vert_in)
|
|
{
|
|
VertInOut vert_out;
|
|
vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj);
|
|
vert_out.uv = vert_in.uv;
|
|
return vert_out;
|
|
}
|
|
|
|
/* used to prevent internal GPU precision issues width fmod in particular */
|
|
#define PRECISION_OFFSET 0.2
|
|
|
|
float4 PSNV12(VertInOut vert_in) : TARGET
|
|
{
|
|
float v_mul = floor(vert_in.uv.y * input_height);
|
|
|
|
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
|
|
byte_offset += PRECISION_OFFSET;
|
|
|
|
float2 sample_pos[4];
|
|
|
|
if (byte_offset < u_plane_offset) {
|
|
#ifdef DEBUGGING
|
|
return float4(1.0, 1.0, 1.0, 1.0);
|
|
#endif
|
|
|
|
float lum_u = floor(fmod(byte_offset, width)) * width_i;
|
|
float lum_v = floor(byte_offset * width_i) * height_i;
|
|
|
|
/* move to texel centers to sample the 4 pixels properly */
|
|
lum_u += width_i * 0.5;
|
|
lum_v += height_i * 0.5;
|
|
|
|
sample_pos[0] = float2(lum_u, lum_v);
|
|
sample_pos[1] = float2(lum_u += width_i, lum_v);
|
|
sample_pos[2] = float2(lum_u += width_i, lum_v);
|
|
sample_pos[3] = float2(lum_u + width_i, lum_v);
|
|
|
|
float4x4 out_val = float4x4(
|
|
image.Sample(def_sampler, sample_pos[0]),
|
|
image.Sample(def_sampler, sample_pos[1]),
|
|
image.Sample(def_sampler, sample_pos[2]),
|
|
image.Sample(def_sampler, sample_pos[3])
|
|
);
|
|
|
|
return transpose(out_val)[1];
|
|
} else {
|
|
#ifdef DEBUGGING
|
|
return float4(0.5, 0.2, 0.5, 0.2);
|
|
#endif
|
|
|
|
float new_offset = byte_offset - u_plane_offset;
|
|
|
|
float ch_u = floor(fmod(new_offset, width)) * width_i;
|
|
float ch_v = floor(new_offset * width_i) * height_d2_i;
|
|
float width_i2 = width_i*2.0;
|
|
|
|
/* move to the borders of each set of 4 pixels to force it
|
|
* to do bilinear averaging */
|
|
ch_u += width_i;
|
|
ch_v += height_i;
|
|
|
|
sample_pos[0] = float2(ch_u, ch_v);
|
|
sample_pos[1] = float2(ch_u + width_i2, ch_v);
|
|
|
|
return float4(
|
|
image.Sample(def_sampler, sample_pos[0]).rb,
|
|
image.Sample(def_sampler, sample_pos[1]).rb
|
|
);
|
|
}
|
|
}
|
|
|
|
float PSNV12_Y(VertInOut vert_in) : TARGET
|
|
{
|
|
return image.Sample(def_sampler, vert_in.uv.xy).y;
|
|
}
|
|
|
|
float2 PSNV12_UV(VertInOut vert_in) : TARGET
|
|
{
|
|
return image.Sample(def_sampler, vert_in.uv.xy).xz;
|
|
}
|
|
|
|
float4 PSPlanar420(VertInOut vert_in) : TARGET
|
|
{
|
|
float v_mul = floor(vert_in.uv.y * input_height);
|
|
|
|
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
|
|
byte_offset += PRECISION_OFFSET;
|
|
|
|
float2 sample_pos[4];
|
|
|
|
if (byte_offset < u_plane_offset) {
|
|
#ifdef DEBUGGING
|
|
return float4(1.0, 1.0, 1.0, 1.0);
|
|
#endif
|
|
|
|
float lum_u = floor(fmod(byte_offset, width)) * width_i;
|
|
float lum_v = floor(byte_offset * width_i) * height_i;
|
|
|
|
/* move to texel centers to sample the 4 pixels properly */
|
|
lum_u += width_i * 0.5;
|
|
lum_v += height_i * 0.5;
|
|
|
|
sample_pos[0] = float2(lum_u, lum_v);
|
|
sample_pos[1] = float2(lum_u += width_i, lum_v);
|
|
sample_pos[2] = float2(lum_u += width_i, lum_v);
|
|
sample_pos[3] = float2(lum_u + width_i, lum_v);
|
|
|
|
} else {
|
|
#ifdef DEBUGGING
|
|
return ((byte_offset < v_plane_offset) ?
|
|
float4(0.5, 0.5, 0.5, 0.5) :
|
|
float4(0.2, 0.2, 0.2, 0.2));
|
|
#endif
|
|
|
|
float new_offset = byte_offset -
|
|
((byte_offset < v_plane_offset) ?
|
|
u_plane_offset : v_plane_offset);
|
|
|
|
float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i;
|
|
float ch_v = floor(new_offset * width_d2_i) * height_d2_i;
|
|
float width_i2 = width_i*2.0;
|
|
|
|
/* move to the borders of each set of 4 pixels to force it
|
|
* to do bilinear averaging */
|
|
ch_u += width_i;
|
|
ch_v += height_i;
|
|
|
|
/* set up coordinates for next chroma line, in case
|
|
* (width / 2) % 4 == 2, i.e. the current set of 4 pixels is split
|
|
* between the current and the next chroma line; do note that the next
|
|
* chroma line is two source lines below the current source line */
|
|
float ch_u_n = 0. + width_i;
|
|
float ch_v_n = ch_v + height_i * 3;
|
|
|
|
sample_pos[0] = float2(ch_u, ch_v);
|
|
sample_pos[1] = float2(ch_u += width_i2, ch_v);
|
|
|
|
ch_u += width_i2;
|
|
// check if ch_u overflowed the current source and chroma line
|
|
if (ch_u > 1.0) {
|
|
sample_pos[2] = float2(ch_u_n, ch_v_n);
|
|
sample_pos[2] = float2(ch_u_n + width_i2, ch_v_n);
|
|
} else {
|
|
sample_pos[2] = float2(ch_u, ch_v);
|
|
sample_pos[3] = float2(ch_u + width_i2, ch_v);
|
|
}
|
|
}
|
|
|
|
float4x4 out_val = float4x4(
|
|
image.Sample(def_sampler, sample_pos[0]),
|
|
image.Sample(def_sampler, sample_pos[1]),
|
|
image.Sample(def_sampler, sample_pos[2]),
|
|
image.Sample(def_sampler, sample_pos[3])
|
|
);
|
|
|
|
out_val = transpose(out_val);
|
|
|
|
if (byte_offset < u_plane_offset)
|
|
return out_val[1];
|
|
else if (byte_offset < v_plane_offset)
|
|
return out_val[0];
|
|
else
|
|
return out_val[2];
|
|
}
|
|
|
|
float4 PSPlanar444(VertInOut vert_in) : TARGET
|
|
{
|
|
float v_mul = floor(vert_in.uv.y * input_height);
|
|
|
|
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
|
|
byte_offset += PRECISION_OFFSET;
|
|
|
|
float new_byte_offset = byte_offset;
|
|
|
|
if (byte_offset >= v_plane_offset)
|
|
new_byte_offset -= v_plane_offset;
|
|
else if (byte_offset >= u_plane_offset)
|
|
new_byte_offset -= u_plane_offset;
|
|
|
|
float2 sample_pos[4];
|
|
|
|
float u_val = floor(fmod(new_byte_offset, width)) * width_i;
|
|
float v_val = floor(new_byte_offset * width_i) * height_i;
|
|
|
|
/* move to texel centers to sample the 4 pixels properly */
|
|
u_val += width_i * 0.5;
|
|
v_val += height_i * 0.5;
|
|
|
|
sample_pos[0] = float2(u_val, v_val);
|
|
sample_pos[1] = float2(u_val += width_i, v_val);
|
|
sample_pos[2] = float2(u_val += width_i, v_val);
|
|
sample_pos[3] = float2(u_val + width_i, v_val);
|
|
|
|
float4x4 out_val = float4x4(
|
|
image.Sample(def_sampler, sample_pos[0]),
|
|
image.Sample(def_sampler, sample_pos[1]),
|
|
image.Sample(def_sampler, sample_pos[2]),
|
|
image.Sample(def_sampler, sample_pos[3])
|
|
);
|
|
|
|
out_val = transpose(out_val);
|
|
|
|
if (byte_offset < u_plane_offset)
|
|
return out_val[1];
|
|
else if (byte_offset < v_plane_offset)
|
|
return out_val[0];
|
|
else
|
|
return out_val[2];
|
|
}
|
|
|
|
float GetIntOffsetColor(int offset)
|
|
{
|
|
return image.Load(int3(offset % int_input_width,
|
|
offset / int_input_width,
|
|
0)).r;
|
|
}
|
|
|
|
float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
|
|
int y0_pos, int y1_pos) : TARGET
|
|
{
|
|
float y = vert_in.uv.y;
|
|
float odd = floor(fmod(width * vert_in.uv.x + PRECISION_OFFSET, 2.0));
|
|
float x = floor(width_d2 * vert_in.uv.x + PRECISION_OFFSET) *
|
|
width_d2_i;
|
|
|
|
x += input_width_i_d2;
|
|
|
|
float4 texel = image.Sample(def_sampler, float2(x, y));
|
|
float3 yuv = float3(odd > 0.5 ? texel[y1_pos] : texel[y0_pos],
|
|
texel[u_pos], texel[v_pos]);
|
|
yuv = clamp(yuv, color_range_min, color_range_max);
|
|
return saturate(mul(float4(yuv, 1.0), color_matrix));
|
|
}
|
|
|
|
float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
|
|
{
|
|
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
|
|
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
|
|
|
|
int lum_offset = y * int_width + x;
|
|
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
|
|
int chroma1 = int_u_plane_offset + chroma_offset;
|
|
int chroma2 = int_v_plane_offset + chroma_offset;
|
|
|
|
float3 yuv = float3(
|
|
GetIntOffsetColor(lum_offset),
|
|
GetIntOffsetColor(chroma1),
|
|
GetIntOffsetColor(chroma2)
|
|
);
|
|
yuv = clamp(yuv, color_range_min, color_range_max);
|
|
return saturate(mul(float4(yuv, 1.0), color_matrix));
|
|
}
|
|
|
|
float4 PSPlanar444_Reverse(VertInOut vert_in) : TARGET
|
|
{
|
|
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
|
|
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
|
|
|
|
int lum_offset = y * int_width + x;
|
|
int chroma_offset = y * int_width + x;
|
|
int chroma1 = int_u_plane_offset + chroma_offset;
|
|
int chroma2 = int_v_plane_offset + chroma_offset;
|
|
|
|
float3 yuv = float3(
|
|
GetIntOffsetColor(lum_offset),
|
|
GetIntOffsetColor(chroma1),
|
|
GetIntOffsetColor(chroma2)
|
|
);
|
|
yuv = clamp(yuv, color_range_min, color_range_max);
|
|
return saturate(mul(float4(yuv, 1.0), color_matrix));
|
|
}
|
|
|
|
float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
|
|
{
|
|
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
|
|
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
|
|
|
|
int lum_offset = y * int_width + x;
|
|
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
|
|
int chroma = int_u_plane_offset + chroma_offset * 2;
|
|
|
|
float3 yuv = float3(
|
|
GetIntOffsetColor(lum_offset),
|
|
GetIntOffsetColor(chroma),
|
|
GetIntOffsetColor(chroma + 1)
|
|
);
|
|
yuv = clamp(yuv, color_range_min, color_range_max);
|
|
return saturate(mul(float4(yuv, 1.0), color_matrix));
|
|
}
|
|
|
|
technique Planar420
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vert_in);
|
|
pixel_shader = PSPlanar420(vert_in);
|
|
}
|
|
}
|
|
|
|
technique Planar444
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vert_in);
|
|
pixel_shader = PSPlanar444(vert_in);
|
|
}
|
|
}
|
|
|
|
technique NV12
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vert_in);
|
|
pixel_shader = PSNV12(vert_in);
|
|
}
|
|
}
|
|
|
|
technique NV12_Y
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vert_in);
|
|
pixel_shader = PSNV12_Y(vert_in);
|
|
}
|
|
}
|
|
|
|
technique NV12_UV
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vert_in);
|
|
pixel_shader = PSNV12_UV(vert_in);
|
|
}
|
|
}
|
|
|
|
technique UYVY_Reverse
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vert_in);
|
|
pixel_shader = PSPacked422_Reverse(vert_in, 2, 0, 1, 3);
|
|
}
|
|
}
|
|
|
|
technique YUY2_Reverse
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vert_in);
|
|
pixel_shader = PSPacked422_Reverse(vert_in, 1, 3, 2, 0);
|
|
}
|
|
}
|
|
|
|
technique YVYU_Reverse
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vert_in);
|
|
pixel_shader = PSPacked422_Reverse(vert_in, 3, 1, 2, 0);
|
|
}
|
|
}
|
|
|
|
technique I420_Reverse
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vert_in);
|
|
pixel_shader = PSPlanar420_Reverse(vert_in);
|
|
}
|
|
}
|
|
|
|
technique I444_Reverse
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vert_in);
|
|
pixel_shader = PSPlanar444_Reverse(vert_in);
|
|
}
|
|
}
|
|
|
|
technique NV12_Reverse
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vert_in);
|
|
pixel_shader = PSNV12_Reverse(vert_in);
|
|
}
|
|
}
|