/****************************************************************************** Copyright (C) 2014 by Hugh Bailey This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . ******************************************************************************/ //#define DEBUGGING uniform float u_plane_offset; uniform float v_plane_offset; uniform float width; uniform float height; uniform float width_i; uniform float height_i; uniform float width_d2; uniform float height_d2; uniform float width_d2_i; uniform float height_d2_i; uniform float input_width; uniform float input_height; uniform float input_width_i; uniform float input_height_i; uniform float input_width_i_d2; uniform float input_height_i_d2; uniform int int_width; uniform int int_input_width; uniform int int_u_plane_offset; uniform int int_v_plane_offset; uniform float4x4 color_matrix; uniform float3 color_range_min = {0.0, 0.0, 0.0}; uniform float3 color_range_max = {1.0, 1.0, 1.0}; uniform float4 color_vec_y; uniform float4 color_vec_u; uniform float4 color_vec_v; uniform texture2d image; sampler_state def_sampler { Filter = Linear; AddressU = Clamp; AddressV = Clamp; }; struct FragPos { float4 pos : POSITION; }; struct VertTexPos { float2 uv : TEXCOORD0; float4 pos : POSITION; }; struct FragTex { float2 uv : TEXCOORD0; }; FragPos VSPos(uint id : VERTEXID) { float idHigh = float(id >> 1); float idLow = float(id & uint(1)); float x = idHigh * 4.0 - 1.0; float y = idLow * 4.0 - 1.0; FragPos vert_out; vert_out.pos = float4(x, y, 0.0, 1.0); return vert_out; } VertTexPos VSPosTex(uint id : VERTEXID) { float idHigh = float(id >> 1); float idLow = float(id & uint(1)); float x = idHigh * 4.0 - 1.0; float y = idLow * 4.0 - 1.0; float u = idHigh * 2.0; float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0); VertTexPos vert_out; vert_out.uv = float2(u, v); vert_out.pos = float4(x, y, 0.0, 1.0); return vert_out; } /* used to prevent internal GPU precision issues width fmod in particular */ #define PRECISION_OFFSET 0.2 float4 PSNV12(FragTex frag_in) : TARGET { float v_mul = floor(frag_in.uv.y * input_height); float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0; byte_offset += PRECISION_OFFSET; if (byte_offset < u_plane_offset) { #ifdef DEBUGGING return float4(1.0, 1.0, 1.0, 1.0); #endif float lum_u = floor(fmod(byte_offset, width)) * width_i; float lum_v = floor(byte_offset * width_i) * height_i; /* move to texel centers to sample the 4 pixels properly */ lum_u += width_i * 0.5; lum_v += height_i * 0.5; float2 sample_pos0 = float2(lum_u, lum_v); float2 sample_pos1 = float2(lum_u += width_i, lum_v); float2 sample_pos2 = float2(lum_u += width_i, lum_v); float2 sample_pos3 = float2(lum_u + width_i, lum_v); float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb; float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb; float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb; float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb; float4 out_val = float4( dot(color_vec_y.xyz, rgb0) + color_vec_y.w, dot(color_vec_y.xyz, rgb1) + color_vec_y.w, dot(color_vec_y.xyz, rgb2) + color_vec_y.w, dot(color_vec_y.xyz, rgb3) + color_vec_y.w ); return out_val; } else { #ifdef DEBUGGING return float4(0.5, 0.2, 0.5, 0.2); #endif float new_offset = byte_offset - u_plane_offset; float ch_u = floor(fmod(new_offset, width)) * width_i; float ch_v = floor(new_offset * width_i) * height_d2_i; float width_i2 = width_i*2.0; /* move to the borders of each set of 4 pixels to force it * to do bilinear averaging */ ch_u += width_i; ch_v += height_i; float2 sample_pos0 = float2(ch_u, ch_v); float2 sample_pos1 = float2(ch_u + width_i2, ch_v); float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb; float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb; return float4( dot(color_vec_u.xyz, rgb0) + color_vec_u.w, dot(color_vec_v.xyz, rgb0) + color_vec_v.w, dot(color_vec_u.xyz, rgb1) + color_vec_u.w, dot(color_vec_v.xyz, rgb1) + color_vec_v.w ); } } float PSNV12_Y(FragPos frag_in) : TARGET { float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb; float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w; return y; } float2 PSNV12_UV(FragTex frag_in) : TARGET { float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb; float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w; float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w; return float2(u, v); } float4 PSPlanar420(FragTex frag_in) : TARGET { float v_mul = floor(frag_in.uv.y * input_height); float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0; byte_offset += PRECISION_OFFSET; float2 sample_pos0, sample_pos1, sample_pos2, sample_pos3; if (byte_offset < u_plane_offset) { #ifdef DEBUGGING return float4(1.0, 1.0, 1.0, 1.0); #endif float lum_u = floor(fmod(byte_offset, width)) * width_i; float lum_v = floor(byte_offset * width_i) * height_i; /* move to texel centers to sample the 4 pixels properly */ lum_u += width_i * 0.5; lum_v += height_i * 0.5; sample_pos0 = float2(lum_u, lum_v); sample_pos1 = float2(lum_u += width_i, lum_v); sample_pos2 = float2(lum_u += width_i, lum_v); sample_pos3 = float2(lum_u + width_i, lum_v); } else { #ifdef DEBUGGING return ((byte_offset < v_plane_offset) ? float4(0.5, 0.5, 0.5, 0.5) : float4(0.2, 0.2, 0.2, 0.2)); #endif float new_offset = byte_offset - ((byte_offset < v_plane_offset) ? u_plane_offset : v_plane_offset); float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i; float ch_v = floor(new_offset * width_d2_i) * height_d2_i; float width_i2 = width_i*2.0; /* move to the borders of each set of 4 pixels to force it * to do bilinear averaging */ ch_u += width_i; ch_v += height_i; /* set up coordinates for next chroma line, in case * (width / 2) % 4 == 2, i.e. the current set of 4 pixels is split * between the current and the next chroma line; do note that the next * chroma line is two source lines below the current source line */ float ch_u_n = 0. + width_i; float ch_v_n = ch_v + height_i * 3; sample_pos0 = float2(ch_u, ch_v); sample_pos1 = float2(ch_u += width_i2, ch_v); ch_u += width_i2; // check if ch_u overflowed the current source and chroma line if (ch_u > 1.0) { sample_pos2 = float2(ch_u_n, ch_v_n); sample_pos2 = float2(ch_u_n + width_i2, ch_v_n); } else { sample_pos2 = float2(ch_u, ch_v); sample_pos3 = float2(ch_u + width_i2, ch_v); } } float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb; float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb; float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb; float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb; float4 color_vec; if (byte_offset < u_plane_offset) color_vec = color_vec_y; else if (byte_offset < v_plane_offset) color_vec = color_vec_u; else color_vec = color_vec_v; return float4( dot(color_vec.xyz, rgb0) + color_vec.w, dot(color_vec.xyz, rgb1) + color_vec.w, dot(color_vec.xyz, rgb2) + color_vec.w, dot(color_vec.xyz, rgb3) + color_vec.w ); } float4 PSPlanar444(FragTex frag_in) : TARGET { float v_mul = floor(frag_in.uv.y * input_height); float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0; byte_offset += PRECISION_OFFSET; float new_byte_offset = byte_offset; if (byte_offset >= v_plane_offset) new_byte_offset -= v_plane_offset; else if (byte_offset >= u_plane_offset) new_byte_offset -= u_plane_offset; float u_val = floor(fmod(new_byte_offset, width)) * width_i; float v_val = floor(new_byte_offset * width_i) * height_i; /* move to texel centers to sample the 4 pixels properly */ u_val += width_i * 0.5; v_val += height_i * 0.5; float2 sample_pos0 = float2(u_val, v_val); float2 sample_pos1 = float2(u_val += width_i, v_val); float2 sample_pos2 = float2(u_val += width_i, v_val); float2 sample_pos3 = float2(u_val + width_i, v_val); float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb; float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb; float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb; float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb; float4 color_vec; if (byte_offset < u_plane_offset) color_vec = color_vec_y; else if (byte_offset < v_plane_offset) color_vec = color_vec_u; else color_vec = color_vec_v; return float4( dot(color_vec.xyz, rgb0) + color_vec.w, dot(color_vec.xyz, rgb1) + color_vec.w, dot(color_vec.xyz, rgb2) + color_vec.w, dot(color_vec.xyz, rgb3) + color_vec.w ); } float GetIntOffsetColor(int offset) { return image.Load(int3(offset % int_input_width, offset / int_input_width, 0)).r; } float4 PSPacked422_Reverse(FragTex frag_in, int u_pos, int v_pos, int y0_pos, int y1_pos) : TARGET { float y = frag_in.uv.y; float odd = floor(fmod(width * frag_in.uv.x + PRECISION_OFFSET, 2.0)); float x = floor(width_d2 * frag_in.uv.x + PRECISION_OFFSET) * width_d2_i; x += input_width_i_d2; float4 texel = image.Sample(def_sampler, float2(x, y)); float3 yuv = float3(odd > 0.5 ? texel[y1_pos] : texel[y0_pos], texel[u_pos], texel[v_pos]); yuv = clamp(yuv, color_range_min, color_range_max); return saturate(mul(float4(yuv, 1.0), color_matrix)); } float4 PSPlanar420_Reverse(FragTex frag_in) : TARGET { int x = int(frag_in.uv.x * width + PRECISION_OFFSET); int y = int(frag_in.uv.y * height + PRECISION_OFFSET); int lum_offset = y * int_width + x; int chroma_offset = (y / 2) * (int_width / 2) + x / 2; int chroma1 = int_u_plane_offset + chroma_offset; int chroma2 = int_v_plane_offset + chroma_offset; float3 yuv = float3( GetIntOffsetColor(lum_offset), GetIntOffsetColor(chroma1), GetIntOffsetColor(chroma2) ); yuv = clamp(yuv, color_range_min, color_range_max); return saturate(mul(float4(yuv, 1.0), color_matrix)); } float4 PSPlanar444_Reverse(FragTex frag_in) : TARGET { int x = int(frag_in.uv.x * width + PRECISION_OFFSET); int y = int(frag_in.uv.y * height + PRECISION_OFFSET); int lum_offset = y * int_width + x; int chroma_offset = y * int_width + x; int chroma1 = int_u_plane_offset + chroma_offset; int chroma2 = int_v_plane_offset + chroma_offset; float3 yuv = float3( GetIntOffsetColor(lum_offset), GetIntOffsetColor(chroma1), GetIntOffsetColor(chroma2) ); yuv = clamp(yuv, color_range_min, color_range_max); return saturate(mul(float4(yuv, 1.0), color_matrix)); } float4 PSNV12_Reverse(FragTex frag_in) : TARGET { int x = int(frag_in.uv.x * width + PRECISION_OFFSET); int y = int(frag_in.uv.y * height + PRECISION_OFFSET); int lum_offset = y * int_width + x; int chroma_offset = (y / 2) * (int_width / 2) + x / 2; int chroma = int_u_plane_offset + chroma_offset * 2; float3 yuv = float3( GetIntOffsetColor(lum_offset), GetIntOffsetColor(chroma), GetIntOffsetColor(chroma + 1) ); yuv = clamp(yuv, color_range_min, color_range_max); return saturate(mul(float4(yuv, 1.0), color_matrix)); } float4 PSY800_Limited(FragTex frag_in) : TARGET { int x = int(frag_in.uv.x * width + PRECISION_OFFSET); int y = int(frag_in.uv.y * height + PRECISION_OFFSET); float limited = image.Load(int3(x, y, 0)).x; float full = saturate((limited - (16.0 / 255.0)) * (255.0 / 219.0)); return float4(full, full, full, 1.0); } float4 PSY800_Full(FragTex frag_in) : TARGET { int x = int(frag_in.uv.x * width + PRECISION_OFFSET); int y = int(frag_in.uv.y * height + PRECISION_OFFSET); float3 full = image.Load(int3(x, y, 0)).xxx; return float4(full, 1.0); } float4 PSRGB_Limited(FragTex frag_in) : TARGET { int x = int(frag_in.uv.x * width + PRECISION_OFFSET); int y = int(frag_in.uv.y * height + PRECISION_OFFSET); float4 rgba = image.Load(int3(x, y, 0)); rgba.rgb = saturate((rgba.rgb - (16.0 / 255.0)) * (255.0 / 219.0)); return rgba; } float4 PSBGR3_Limited(FragTex frag_in) : TARGET { int x = int(frag_in.uv.x * width * 3.0 + PRECISION_OFFSET); int y = int(frag_in.uv.y * height + PRECISION_OFFSET); float b = image.Load(int3(x - 1, y, 0)).x; float g = image.Load(int3(x, y, 0)).x; float r = image.Load(int3(x + 1, y, 0)).x; float3 rgb = float3(r, g, b); rgb = saturate((rgb - (16.0 / 255.0)) * (255.0 / 219.0)); return float4(rgb, 1.0); } float4 PSBGR3_Full(FragTex frag_in) : TARGET { int x = int(frag_in.uv.x * width * 3.0 + PRECISION_OFFSET); int y = int(frag_in.uv.y * height + PRECISION_OFFSET); float b = image.Load(int3(x - 1, y, 0)).x; float g = image.Load(int3(x, y, 0)).x; float r = image.Load(int3(x + 1, y, 0)).x; float3 rgb = float3(r, g, b); return float4(rgb, 1.0); } technique Planar420 { pass { vertex_shader = VSPosTex(id); pixel_shader = PSPlanar420(frag_in); } } technique Planar444 { pass { vertex_shader = VSPosTex(id); pixel_shader = PSPlanar444(frag_in); } } technique NV12 { pass { vertex_shader = VSPosTex(id); pixel_shader = PSNV12(frag_in); } } technique NV12_Y { pass { vertex_shader = VSPos(id); pixel_shader = PSNV12_Y(frag_in); } } technique NV12_UV { pass { vertex_shader = VSPosTex(id); pixel_shader = PSNV12_UV(frag_in); } } technique UYVY_Reverse { pass { vertex_shader = VSPosTex(id); pixel_shader = PSPacked422_Reverse(frag_in, 2, 0, 1, 3); } } technique YUY2_Reverse { pass { vertex_shader = VSPosTex(id); pixel_shader = PSPacked422_Reverse(frag_in, 1, 3, 2, 0); } } technique YVYU_Reverse { pass { vertex_shader = VSPosTex(id); pixel_shader = PSPacked422_Reverse(frag_in, 3, 1, 2, 0); } } technique I420_Reverse { pass { vertex_shader = VSPosTex(id); pixel_shader = PSPlanar420_Reverse(frag_in); } } technique I444_Reverse { pass { vertex_shader = VSPosTex(id); pixel_shader = PSPlanar444_Reverse(frag_in); } } technique NV12_Reverse { pass { vertex_shader = VSPosTex(id); pixel_shader = PSNV12_Reverse(frag_in); } } technique Y800_Limited { pass { vertex_shader = VSPosTex(id); pixel_shader = PSY800_Limited(frag_in); } } technique Y800_Full { pass { vertex_shader = VSPosTex(id); pixel_shader = PSY800_Full(frag_in); } } technique RGB_Limited { pass { vertex_shader = VSPosTex(id); pixel_shader = PSRGB_Limited(frag_in); } } technique BGR3_Limited { pass { vertex_shader = VSPosTex(id); pixel_shader = PSBGR3_Limited(frag_in); } } technique BGR3_Full { pass { vertex_shader = VSPosTex(id); pixel_shader = PSBGR3_Full(frag_in); } }