Merge pull request #2551 from jpark37/cube-lut-enhance
Implement CUBE LUT domain properly, tetrahedral interpolation for 3D LUTsmaster
commit
04e6a39de9
|
@ -427,7 +427,9 @@ static bool gl_write_texture_code(struct gl_shader_parser *glsp,
|
|||
else if (cf_token_is(cfp, "Load")) {
|
||||
written = gl_write_texture_call(glsp, var, "texelFetch", false);
|
||||
dstr_cat(&glsp->gl_string, "(");
|
||||
function_end = ").xy, 0)";
|
||||
function_end = (strcmp(var->type, "texture3d") == 0)
|
||||
? ").xyz, 0)"
|
||||
: ").xy, 0)";
|
||||
}
|
||||
|
||||
if (!written)
|
||||
|
|
|
@ -16,6 +16,11 @@
|
|||
|
||||
static const uint32_t LUT_WIDTH = 64;
|
||||
|
||||
enum clut_dimension {
|
||||
CLUT_1D,
|
||||
CLUT_3D,
|
||||
};
|
||||
|
||||
struct lut_filter_data {
|
||||
obs_source_t *context;
|
||||
gs_effect_t *effect;
|
||||
|
@ -28,8 +33,11 @@ struct lut_filter_data {
|
|||
|
||||
char *file;
|
||||
float clut_amount;
|
||||
float clut_scale;
|
||||
float clut_offset;
|
||||
enum clut_dimension clut_dim;
|
||||
struct vec3 clut_scale;
|
||||
struct vec3 clut_offset;
|
||||
struct vec3 domain_min;
|
||||
struct vec3 domain_max;
|
||||
};
|
||||
|
||||
static const char *color_grade_filter_get_name(void *unused)
|
||||
|
@ -109,6 +117,7 @@ static void *load_1d_lut(FILE *const file, const uint32_t width, float red,
|
|||
4 * width * width * width * sizeof(struct half);
|
||||
struct half *values = bmalloc(data_size);
|
||||
|
||||
size_t offset = 0;
|
||||
bool data_found = true;
|
||||
for (uint32_t index = 0; index < width; ++index) {
|
||||
if (!data_found) {
|
||||
|
@ -117,38 +126,10 @@ static void *load_1d_lut(FILE *const file, const uint32_t width, float red,
|
|||
break;
|
||||
}
|
||||
|
||||
for (uint32_t z = 0; z < width; ++z) {
|
||||
const uint32_t z_offset = z * width * width;
|
||||
for (uint32_t y = 0; y < width; ++y) {
|
||||
const uint32_t y_offset = y * width;
|
||||
const uint32_t offset =
|
||||
4 * (index + y_offset + z_offset);
|
||||
values[offset] = half_from_float(red);
|
||||
values[offset + 3] =
|
||||
half_from_bits(0x3C00); // 1.0
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t z = 0; z < width; ++z) {
|
||||
const uint32_t z_offset = z * width * width;
|
||||
for (uint32_t x = 0; x < width; ++x) {
|
||||
const uint32_t offset =
|
||||
4 * (x + (index * width) + z_offset) +
|
||||
1;
|
||||
values[offset] = half_from_float(green);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t y = 0; y < width; ++y) {
|
||||
const uint32_t y_offset = y * width;
|
||||
for (uint32_t x = 0; x < width; ++x) {
|
||||
const uint32_t offset =
|
||||
4 * (x + y_offset +
|
||||
(index * width * width)) +
|
||||
2;
|
||||
values[offset] = half_from_float(blue);
|
||||
}
|
||||
}
|
||||
values[offset++] = half_from_float(red);
|
||||
values[offset++] = half_from_float(green);
|
||||
values[offset++] = half_from_float(blue);
|
||||
values[offset++] = half_from_bits(0x3c00); // 1.0
|
||||
|
||||
data_found = get_cube_entry(file, &red, &green, &blue);
|
||||
}
|
||||
|
@ -189,14 +170,14 @@ static void *load_3d_lut(FILE *const file, const uint32_t width, float red,
|
|||
return values;
|
||||
}
|
||||
|
||||
static void *load_cube_file(const char *const path, uint32_t *const width)
|
||||
static void *load_cube_file(const char *const path, uint32_t *const width,
|
||||
struct vec3 *domain_min, struct vec3 *domain_max,
|
||||
enum clut_dimension *dim)
|
||||
{
|
||||
void *data = NULL;
|
||||
|
||||
FILE *const file = os_fopen(path, "rb");
|
||||
if (file) {
|
||||
float min_value[] = {0.0f, 0.0f, 0.0f};
|
||||
float max_value[] = {1.0f, 1.0f, 1.0f};
|
||||
float red, green, blue;
|
||||
unsigned width_1d = 0;
|
||||
unsigned width_3d = 0;
|
||||
|
@ -214,14 +195,10 @@ static void *load_cube_file(const char *const path, uint32_t *const width)
|
|||
break;
|
||||
} else if (sscanf(line, "DOMAIN_MIN %f %f %f", &f[0],
|
||||
&f[1], &f[2]) == 3) {
|
||||
min_value[0] = f[0];
|
||||
min_value[1] = f[1];
|
||||
min_value[2] = f[2];
|
||||
vec3_set(domain_min, f[0], f[1], f[2]);
|
||||
} else if (sscanf(line, "DOMAIN_MAX %f %f %f", &f[0],
|
||||
&f[1], &f[2]) == 3) {
|
||||
max_value[0] = f[0];
|
||||
max_value[1] = f[1];
|
||||
max_value[2] = f[2];
|
||||
vec3_set(domain_max, f[0], f[1], f[2]);
|
||||
} else if (sscanf(line, "LUT_1D_SIZE %u", &u) == 1) {
|
||||
width_1d = u;
|
||||
} else if (sscanf(line, "LUT_3D_SIZE %u", &u) == 1) {
|
||||
|
@ -229,17 +206,28 @@ static void *load_cube_file(const char *const path, uint32_t *const width)
|
|||
}
|
||||
}
|
||||
|
||||
if (data_found) {
|
||||
if (domain_min->x >= domain_max->x ||
|
||||
domain_min->y >= domain_max->y ||
|
||||
domain_min->z >= domain_max->z) {
|
||||
blog(LOG_WARNING,
|
||||
"Invalid CUBE LUT domain: [%f, %f], [%f, %f], [%f, %f]",
|
||||
domain_min->x, domain_max->x, domain_min->y,
|
||||
domain_max->y, domain_min->z, domain_max->z);
|
||||
} else if (data_found) {
|
||||
if (width_1d > 0) {
|
||||
data = load_1d_lut(file, width_1d, red, green,
|
||||
blue);
|
||||
if (data)
|
||||
if (data) {
|
||||
*width = width_1d;
|
||||
*dim = CLUT_1D;
|
||||
}
|
||||
} else if (width_3d > 0) {
|
||||
data = load_3d_lut(file, width_3d, red, green,
|
||||
blue);
|
||||
if (data)
|
||||
if (data) {
|
||||
*width = width_3d;
|
||||
*dim = CLUT_3D;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -276,12 +264,18 @@ static void color_grade_filter_update(void *data, obs_data_t *settings)
|
|||
obs_leave_graphics();
|
||||
|
||||
if (path) {
|
||||
vec3_set(&filter->domain_min, 0.0f, 0.0f, 0.0f);
|
||||
vec3_set(&filter->domain_max, 1.0f, 1.0f, 1.0f);
|
||||
|
||||
const char *const ext = os_get_path_extension(path);
|
||||
if (ext && astrcmpi(ext, ".cube") == 0) {
|
||||
filter->cube_data =
|
||||
load_cube_file(path, &filter->cube_width);
|
||||
filter->cube_data = load_cube_file(
|
||||
path, &filter->cube_width, &filter->domain_min,
|
||||
&filter->domain_max, &filter->clut_dim);
|
||||
} else {
|
||||
gs_image_file_init(&filter->image, path);
|
||||
filter->cube_width = LUT_WIDTH;
|
||||
filter->clut_dim = CLUT_3D;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -292,16 +286,48 @@ static void color_grade_filter_update(void *data, obs_data_t *settings)
|
|||
filter->target = make_clut_texture_png(
|
||||
filter->image.format, filter->image.cx,
|
||||
filter->image.cy, filter->image.texture_data);
|
||||
filter->clut_scale =
|
||||
(float)(LUT_WIDTH - 1) / (float)LUT_WIDTH;
|
||||
filter->clut_offset = 0.5f / (float)LUT_WIDTH;
|
||||
const float clut_scale = (float)(LUT_WIDTH - 1);
|
||||
vec3_set(&filter->clut_scale, clut_scale, clut_scale,
|
||||
clut_scale);
|
||||
vec3_set(&filter->clut_offset, 0.f, 0.f, 0.f);
|
||||
} else if (filter->cube_data) {
|
||||
const uint32_t width = filter->cube_width;
|
||||
filter->target = gs_voltexture_create(
|
||||
width, width, width, GS_RGBA16F, 1,
|
||||
(uint8_t **)&filter->cube_data, 0);
|
||||
filter->clut_scale = (float)(width - 1) / (float)width;
|
||||
filter->clut_offset = 0.5f / (float)width;
|
||||
if (filter->clut_dim == CLUT_1D) {
|
||||
filter->target = gs_texture_create(
|
||||
width, 1, GS_RGBA16F, 1,
|
||||
(const uint8_t **)&filter->cube_data,
|
||||
0);
|
||||
} else {
|
||||
filter->target = gs_voltexture_create(
|
||||
width, width, width, GS_RGBA16F, 1,
|
||||
(const uint8_t **)&filter->cube_data,
|
||||
0);
|
||||
}
|
||||
|
||||
struct vec3 domain_scale;
|
||||
vec3_sub(&domain_scale, &filter->domain_max,
|
||||
&filter->domain_min);
|
||||
|
||||
const float width_minus_one = (float)(width - 1);
|
||||
vec3_set(&filter->clut_scale, width_minus_one,
|
||||
width_minus_one, width_minus_one);
|
||||
vec3_div(&filter->clut_scale, &filter->clut_scale,
|
||||
&domain_scale);
|
||||
|
||||
vec3_neg(&filter->clut_offset, &filter->domain_min);
|
||||
vec3_mul(&filter->clut_offset, &filter->clut_offset,
|
||||
&filter->clut_scale);
|
||||
|
||||
/* 1D shader wants normalized UVW */
|
||||
if (filter->clut_dim == CLUT_1D) {
|
||||
vec3_divf(&filter->clut_scale,
|
||||
&filter->clut_scale, (float)width);
|
||||
|
||||
vec3_addf(&filter->clut_offset,
|
||||
&filter->clut_offset, 0.5f);
|
||||
vec3_divf(&filter->clut_offset,
|
||||
&filter->clut_offset, (float)width);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -398,21 +424,36 @@ static void color_grade_filter_render(void *data, gs_effect_t *effect)
|
|||
OBS_ALLOW_DIRECT_RENDERING))
|
||||
return;
|
||||
|
||||
param = gs_effect_get_param_by_name(filter->effect, "clut");
|
||||
const char *clut_texture_name = "clut_3d";
|
||||
const char *tech_name = "Draw3D";
|
||||
if (filter->clut_dim == CLUT_1D) {
|
||||
clut_texture_name = "clut_1d";
|
||||
tech_name = "Draw1D";
|
||||
}
|
||||
|
||||
param = gs_effect_get_param_by_name(filter->effect, clut_texture_name);
|
||||
gs_effect_set_texture(param, filter->target);
|
||||
|
||||
param = gs_effect_get_param_by_name(filter->effect, "clut_amount");
|
||||
gs_effect_set_float(param, filter->clut_amount);
|
||||
|
||||
param = gs_effect_get_param_by_name(filter->effect, "clut_scale");
|
||||
gs_effect_set_float(param, filter->clut_scale);
|
||||
gs_effect_set_vec3(param, &filter->clut_scale);
|
||||
|
||||
param = gs_effect_get_param_by_name(filter->effect, "clut_offset");
|
||||
gs_effect_set_float(param, filter->clut_offset);
|
||||
gs_effect_set_vec3(param, &filter->clut_offset);
|
||||
|
||||
obs_source_process_filter_end(filter->context, filter->effect, 0, 0);
|
||||
param = gs_effect_get_param_by_name(filter->effect, "domain_min");
|
||||
gs_effect_set_vec3(param, &filter->domain_min);
|
||||
|
||||
UNUSED_PARAMETER(effect);
|
||||
param = gs_effect_get_param_by_name(filter->effect, "domain_max");
|
||||
gs_effect_set_vec3(param, &filter->domain_max);
|
||||
|
||||
param = gs_effect_get_param_by_name(filter->effect, "cube_width_i");
|
||||
gs_effect_set_float(param, 1.0f / filter->cube_width);
|
||||
|
||||
obs_source_process_filter_tech_end(filter->context, filter->effect, 0,
|
||||
0, tech_name);
|
||||
}
|
||||
|
||||
struct obs_source_info color_grade_filter = {
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
uniform float4x4 ViewProj;
|
||||
uniform texture2d image;
|
||||
|
||||
uniform texture3d clut;
|
||||
uniform texture2d clut_1d;
|
||||
uniform texture3d clut_3d;
|
||||
uniform float clut_amount;
|
||||
uniform float clut_scale;
|
||||
uniform float clut_offset;
|
||||
uniform float3 clut_scale;
|
||||
uniform float3 clut_offset;
|
||||
uniform float3 domain_min;
|
||||
uniform float3 domain_max;
|
||||
uniform float cube_width_i;
|
||||
|
||||
sampler_state textureSampler {
|
||||
Filter = Linear;
|
||||
|
@ -31,22 +35,133 @@ VertDataOut VSDefault(VertDataIn v_in)
|
|||
return vert_out;
|
||||
}
|
||||
|
||||
float4 LUT(VertDataOut v_in) : TARGET
|
||||
float4 LUT1D(VertDataOut v_in) : TARGET
|
||||
{
|
||||
float4 textureColor = image.Sample(textureSampler, v_in.uv);
|
||||
|
||||
float3 clut_uvw = textureColor.rgb * clut_scale + clut_offset;
|
||||
float3 luttedColor = clut.Sample(textureSampler, clut_uvw).rgb;
|
||||
if (textureColor.r >= domain_min.r && textureColor.r <= domain_max.r) {
|
||||
float u = textureColor.r * clut_scale.r + clut_offset.r;
|
||||
float channel = clut_1d.Sample(textureSampler, float2(u, 0.5)).r;
|
||||
textureColor.r = lerp(textureColor.r, channel, clut_amount);
|
||||
}
|
||||
|
||||
float3 final_color = lerp(textureColor.rgb, luttedColor, clut_amount);
|
||||
return float4(final_color.rgb, textureColor.a);
|
||||
if (textureColor.g >= domain_min.g && textureColor.g <= domain_max.g) {
|
||||
float u = textureColor.g * clut_scale.g + clut_offset.g;
|
||||
float channel = clut_1d.Sample(textureSampler, float2(u, 0.5)).g;
|
||||
textureColor.g = lerp(textureColor.g, channel, clut_amount);
|
||||
}
|
||||
|
||||
if (textureColor.b >= domain_min.b && textureColor.b <= domain_max.b) {
|
||||
float u = textureColor.b * clut_scale.b + clut_offset.b;
|
||||
float channel = clut_1d.Sample(textureSampler, float2(u, 0.5)).b;
|
||||
textureColor.b = lerp(textureColor.b, channel, clut_amount);
|
||||
}
|
||||
|
||||
return textureColor;
|
||||
}
|
||||
|
||||
technique Draw
|
||||
float4 LUT3D(VertDataOut v_in) : TARGET
|
||||
{
|
||||
float4 textureColor = image.Sample(textureSampler, v_in.uv);
|
||||
float r = textureColor.r;
|
||||
float g = textureColor.g;
|
||||
float b = textureColor.b;
|
||||
if (r >= domain_min.r && r <= domain_max.r &&
|
||||
g >= domain_min.g && g <= domain_max.g &&
|
||||
b >= domain_min.b && b <= domain_max.b)
|
||||
{
|
||||
float3 clut_pos = textureColor.rgb * clut_scale + clut_offset;
|
||||
float3 floor_pos = floor(clut_pos);
|
||||
|
||||
float3 fracRGB = clut_pos - floor_pos;
|
||||
|
||||
float3 uvw0 = (floor_pos + 0.5) * cube_width_i;
|
||||
float3 uvw3 = (floor_pos + 1.5) * cube_width_i;
|
||||
|
||||
float fracL, fracM, fracS;
|
||||
float3 uvw1, uvw2;
|
||||
if (fracRGB.r < fracRGB.g) {
|
||||
if (fracRGB.r < fracRGB.b) {
|
||||
if (fracRGB.g < fracRGB.b) {
|
||||
// f(R) < f(G) < f(B)
|
||||
fracL = fracRGB.b;
|
||||
fracM = fracRGB.g;
|
||||
fracS = fracRGB.r;
|
||||
uvw1 = float3(uvw0.x, uvw0.y, uvw3.z);
|
||||
uvw2 = float3(uvw0.x, uvw3.y, uvw3.z);
|
||||
} else {
|
||||
// f(R) < f(B) <= f(G)
|
||||
fracL = fracRGB.g;
|
||||
fracM = fracRGB.b;
|
||||
fracS = fracRGB.r;
|
||||
uvw1 = float3(uvw0.x, uvw3.y, uvw0.z);
|
||||
uvw2 = float3(uvw0.x, uvw3.y, uvw3.z);
|
||||
}
|
||||
} else {
|
||||
// f(B) <= f(R) < f(G)
|
||||
fracL = fracRGB.g;
|
||||
fracM = fracRGB.r;
|
||||
fracS = fracRGB.b;
|
||||
uvw1 = float3(uvw0.x, uvw3.y, uvw0.z);
|
||||
uvw2 = float3(uvw3.x, uvw3.y, uvw0.z);
|
||||
}
|
||||
} else if (fracRGB.r < fracRGB.b) {
|
||||
// f(G) <= f(R) < f(B)
|
||||
fracL = fracRGB.b;
|
||||
fracM = fracRGB.r;
|
||||
fracS = fracRGB.g;
|
||||
uvw1 = float3(uvw0.x, uvw0.y, uvw3.z);
|
||||
uvw2 = float3(uvw3.x, uvw0.y, uvw3.z);
|
||||
} else if (fracRGB.g < fracRGB.b) {
|
||||
// f(G) < f(B) <= f(R)
|
||||
fracL = fracRGB.r;
|
||||
fracM = fracRGB.b;
|
||||
fracS = fracRGB.g;
|
||||
uvw1 = float3(uvw3.x, uvw0.y, uvw0.z);
|
||||
uvw2 = float3(uvw3.x, uvw0.y, uvw3.z);
|
||||
} else {
|
||||
// f(B) <= f(G) <= f(R)
|
||||
fracL = fracRGB.r;
|
||||
fracM = fracRGB.g;
|
||||
fracS = fracRGB.b;
|
||||
uvw1 = float3(uvw3.x, uvw0.y, uvw0.z);
|
||||
uvw2 = float3(uvw3.x, uvw3.y, uvw0.z);
|
||||
}
|
||||
|
||||
/* use filtering to collapse 4 taps to 2 */
|
||||
/* use max to kill potential zero-divide NaN */
|
||||
|
||||
float coeff01 = (1.0 - fracM);
|
||||
float weight01 = max((fracL - fracM) / coeff01, 0.0);
|
||||
float3 uvw01 = lerp(uvw0, uvw1, weight01);
|
||||
float3 sample01 = clut_3d.Sample(textureSampler, uvw01).rgb;
|
||||
|
||||
float coeff23 = fracM;
|
||||
float weight23 = max(fracS / coeff23, 0.0);
|
||||
float3 uvw23 = lerp(uvw2, uvw3, weight23);
|
||||
float3 sample23 = clut_3d.Sample(textureSampler, uvw23).rgb;
|
||||
|
||||
float3 luttedColor = (coeff01 * sample01) + (coeff23 * sample23);
|
||||
textureColor.rgb = lerp(textureColor.rgb, luttedColor, clut_amount);
|
||||
}
|
||||
|
||||
return textureColor;
|
||||
}
|
||||
|
||||
technique Draw1D
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(v_in);
|
||||
pixel_shader = LUT(v_in);
|
||||
pixel_shader = LUT1D(v_in);
|
||||
}
|
||||
}
|
||||
|
||||
technique Draw3D
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSDefault(v_in);
|
||||
pixel_shader = LUT3D(v_in);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue