/* * lanczos sharper * note - this shader is adapted from the GPL bsnes shader, very good stuff * there. */ uniform float4x4 ViewProj; uniform texture2d image; uniform float2 base_dimension; uniform float2 base_dimension_i; uniform float undistort_factor = 1.0; sampler_state textureSampler { AddressU = Clamp; AddressV = Clamp; Filter = Linear; }; struct VertData { float4 pos : POSITION; float2 uv : TEXCOORD0; }; struct VertOut { float2 uv : TEXCOORD0; float4 pos : POSITION; }; struct FragData { float2 uv : TEXCOORD0; }; VertOut VSDefault(VertData v_in) { VertOut vert_out; vert_out.uv = v_in.uv; vert_out.pos = mul(float4(v_in.pos.xyz, 1.0), ViewProj); return vert_out; } float weight(float x) { float ax = abs(x); if (x == 0.0) return 1.0; float radius = 3.0; if (ax < radius) { float PIval = 3.14159265358979323846; float x_pi = x * PIval; float radius_i = 1.0 / 3.0; return radius * sin(x_pi) * sin(x_pi * radius_i) / (x_pi * x_pi); } return 0.0; } float3 weight3(float x) { return float3( weight(x * 2.0 - 3.0), weight(x * 2.0 - 1.0), weight(x * 2.0 + 1.0)); } float AspectUndistortX(float x, float a) { // The higher the power, the longer the linear part will be. return (1.0 - a) * (x * x * x * x * x) + a * x; } float AspectUndistortU(float u) { // Normalize texture coord to -1.0 to 1.0 range, and back. return AspectUndistortX((u - 0.5) * 2.0, undistort_factor) * 0.5 + 0.5; } float2 undistort_coord(float xpos, float ypos) { return float2(AspectUndistortU(xpos), ypos); } float4 undistort_pixel(float xpos, float ypos) { return image.Sample(textureSampler, undistort_coord(xpos, ypos)); } float4 undistort_line(float3 xpos012, float3 xpos345, float ypos, float3 rowtap024, float3 rowtap135) { return undistort_pixel(xpos012.x, ypos) * rowtap024.x + undistort_pixel(xpos012.y, ypos) * rowtap135.x + undistort_pixel(xpos012.z, ypos) * rowtap024.y + undistort_pixel(xpos345.x, ypos) * rowtap135.y + undistort_pixel(xpos345.y, ypos) * rowtap024.z + undistort_pixel(xpos345.z, ypos) * rowtap135.z; } float4 DrawLanczos(FragData f_in, bool undistort) { float2 stepxy = base_dimension_i; float2 pos = f_in.uv + stepxy * 0.5; float2 f = frac(pos * base_dimension); float2 f_rev_half = (-0.5) * f + 0.5; float3 rowtap024 = weight3(f_rev_half.x); float3 rowtap135 = weight3(f_rev_half.x + 0.5); float3 coltap024 = weight3(f_rev_half.y); float3 coltap135 = weight3(f_rev_half.y + 0.5); // Need normalization if divided value near zero float rowsum = rowtap024.x + rowtap024.y + rowtap024.z + rowtap135.x + rowtap135.y + rowtap135.z; float rowsum_i = 1.0 / rowsum; rowtap024 = rowtap024 * rowsum_i; rowtap135 = rowtap135 * rowsum_i; float colsum = coltap024.x + coltap024.y + coltap024.z + coltap135.x + coltap135.y + coltap135.z; float colsum_i = 1.0 / colsum; coltap024 = coltap024 * colsum_i; coltap135 = coltap135 * colsum_i; float2 uv0 = (-2.5 - f) * stepxy + pos; float2 uv1 = uv0 + stepxy; float2 uv2 = uv1 + stepxy; float2 uv3 = uv2 + stepxy; float2 uv4 = uv3 + stepxy; float2 uv5 = uv4 + stepxy; if (undistort) { float3 xpos012 = float3(uv0.x, uv1.x, uv2.x); float3 xpos345 = float3(uv3.x, uv4.x, uv5.x); return undistort_line(xpos012, xpos345, uv0.y, rowtap024, rowtap135) * coltap024.x + undistort_line(xpos012, xpos345, uv1.y, rowtap024, rowtap135) * coltap135.x + undistort_line(xpos012, xpos345, uv2.y, rowtap024, rowtap135) * coltap024.y + undistort_line(xpos012, xpos345, uv3.y, rowtap024, rowtap135) * coltap135.y + undistort_line(xpos012, xpos345, uv4.y, rowtap024, rowtap135) * coltap024.z + undistort_line(xpos012, xpos345, uv5.y, rowtap024, rowtap135) * coltap135.z; } float u_weight_sum = rowtap024.y + rowtap135.y; float u_middle_offset = rowtap135.y * stepxy.x / u_weight_sum; float u_middle = uv2.x + u_middle_offset; float v_weight_sum = coltap024.y + coltap135.y; float v_middle_offset = coltap135.y * stepxy.y / v_weight_sum; float v_middle = uv2.y + v_middle_offset; float2 coord_limit = base_dimension - 0.5; float2 coord0_f = max(uv0 * base_dimension, 0.5); float2 coord1_f = coord0_f + 1.0; float2 coord4_f = min(coord0_f + 4.0, coord_limit); float2 coord5_f = min(coord0_f + 5.0, coord_limit); int2 coord0 = int2(coord0_f); int2 coord1 = int2(coord1_f); int2 coord4 = int2(coord4_f); int2 coord5 = int2(coord5_f); float4 row0 = image.Load(int3(coord0, 0)) * rowtap024.x; row0 += image.Load(int3(coord1.x, coord0.y, 0))* rowtap135.x; row0 += image.Sample(textureSampler, float2(u_middle, uv0.y)) * u_weight_sum; row0 += image.Load(int3(coord4.x, coord0.y, 0)) * rowtap024.z; row0 += image.Load(int3(coord5.x, coord0.y, 0)) * rowtap135.z; float4 total = row0 * coltap024.x; float4 row1 = image.Load(int3(coord0.x, coord1.y, 0)) * rowtap024.x; row1 += image.Load(int3(coord1.x, coord1.y, 0))* rowtap135.x; row1 += image.Sample(textureSampler, float2(u_middle, uv1.y)) * u_weight_sum; row1 += image.Load(int3(coord4.x, coord1.y, 0)) * rowtap024.z; row1 += image.Load(int3(coord5.x, coord1.y, 0)) * rowtap135.z; total += row1 * coltap135.x; float4 row23 = image.Sample(textureSampler, float2(uv0.x, v_middle)) * rowtap024.x; row23 += image.Sample(textureSampler, float2(uv1.x, v_middle))* rowtap135.x; row23 += image.Sample(textureSampler, float2(u_middle, v_middle)) * u_weight_sum; row23 += image.Sample(textureSampler, float2(uv4.x, v_middle)) * rowtap024.z; row23 += image.Sample(textureSampler, float2(uv5.x, v_middle)) * rowtap135.z; total += row23 * v_weight_sum; float4 row4 = image.Load(int3(coord0.x, coord4.y, 0)) * rowtap024.x; row4 += image.Load(int3(coord1.x, coord4.y, 0))* rowtap135.x; row4 += image.Sample(textureSampler, float2(u_middle, uv4.y)) * u_weight_sum; row4 += image.Load(int3(coord4.x, coord4.y, 0)) * rowtap024.z; row4 += image.Load(int3(coord5.x, coord4.y, 0)) * rowtap135.z; total += row4 * coltap024.z; float4 row5 = image.Load(int3(coord0.x, coord5.y, 0)) * rowtap024.x; row5 += image.Load(int3(coord1.x, coord5.y, 0))* rowtap135.x; row5 += image.Sample(textureSampler, float2(u_middle, uv5.y)) * u_weight_sum; row5 += image.Load(int3(coord4.x, coord5.y, 0)) * rowtap024.z; row5 += image.Load(int3(coord5, 0)) * rowtap135.z; total += row5 * coltap135.z; return total; } float4 PSDrawLanczosRGBA(FragData f_in, bool undistort) : TARGET { return DrawLanczos(f_in, undistort); } float4 PSDrawLanczosRGBADivide(FragData f_in) : TARGET { float4 rgba = DrawLanczos(f_in, false); float alpha = rgba.a; float multiplier = (alpha > 0.0) ? (1.0 / alpha) : 0.0; return float4(rgba.rgb * multiplier, alpha); } technique Draw { pass { vertex_shader = VSDefault(v_in); pixel_shader = PSDrawLanczosRGBA(f_in, false); } } technique DrawAlphaDivide { pass { vertex_shader = VSDefault(v_in); pixel_shader = PSDrawLanczosRGBADivide(f_in); } } technique DrawUndistort { pass { vertex_shader = VSDefault(v_in); pixel_shader = PSDrawLanczosRGBA(f_in, true); } }