Merge pull request #2551 from jpark37/cube-lut-enhance

Implement CUBE LUT domain properly, tetrahedral interpolation for 3D LUTs
master
Jim 2020-05-20 18:09:56 -07:00 committed by GitHub
commit 04e6a39de9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 230 additions and 72 deletions

View File

@ -427,7 +427,9 @@ static bool gl_write_texture_code(struct gl_shader_parser *glsp,
else if (cf_token_is(cfp, "Load")) { else if (cf_token_is(cfp, "Load")) {
written = gl_write_texture_call(glsp, var, "texelFetch", false); written = gl_write_texture_call(glsp, var, "texelFetch", false);
dstr_cat(&glsp->gl_string, "("); dstr_cat(&glsp->gl_string, "(");
function_end = ").xy, 0)"; function_end = (strcmp(var->type, "texture3d") == 0)
? ").xyz, 0)"
: ").xy, 0)";
} }
if (!written) if (!written)

View File

@ -16,6 +16,11 @@
static const uint32_t LUT_WIDTH = 64; static const uint32_t LUT_WIDTH = 64;
enum clut_dimension {
CLUT_1D,
CLUT_3D,
};
struct lut_filter_data { struct lut_filter_data {
obs_source_t *context; obs_source_t *context;
gs_effect_t *effect; gs_effect_t *effect;
@ -28,8 +33,11 @@ struct lut_filter_data {
char *file; char *file;
float clut_amount; float clut_amount;
float clut_scale; enum clut_dimension clut_dim;
float clut_offset; struct vec3 clut_scale;
struct vec3 clut_offset;
struct vec3 domain_min;
struct vec3 domain_max;
}; };
static const char *color_grade_filter_get_name(void *unused) static const char *color_grade_filter_get_name(void *unused)
@ -109,6 +117,7 @@ static void *load_1d_lut(FILE *const file, const uint32_t width, float red,
4 * width * width * width * sizeof(struct half); 4 * width * width * width * sizeof(struct half);
struct half *values = bmalloc(data_size); struct half *values = bmalloc(data_size);
size_t offset = 0;
bool data_found = true; bool data_found = true;
for (uint32_t index = 0; index < width; ++index) { for (uint32_t index = 0; index < width; ++index) {
if (!data_found) { if (!data_found) {
@ -117,38 +126,10 @@ static void *load_1d_lut(FILE *const file, const uint32_t width, float red,
break; break;
} }
for (uint32_t z = 0; z < width; ++z) { values[offset++] = half_from_float(red);
const uint32_t z_offset = z * width * width; values[offset++] = half_from_float(green);
for (uint32_t y = 0; y < width; ++y) { values[offset++] = half_from_float(blue);
const uint32_t y_offset = y * width; values[offset++] = half_from_bits(0x3c00); // 1.0
const uint32_t offset =
4 * (index + y_offset + z_offset);
values[offset] = half_from_float(red);
values[offset + 3] =
half_from_bits(0x3C00); // 1.0
}
}
for (uint32_t z = 0; z < width; ++z) {
const uint32_t z_offset = z * width * width;
for (uint32_t x = 0; x < width; ++x) {
const uint32_t offset =
4 * (x + (index * width) + z_offset) +
1;
values[offset] = half_from_float(green);
}
}
for (uint32_t y = 0; y < width; ++y) {
const uint32_t y_offset = y * width;
for (uint32_t x = 0; x < width; ++x) {
const uint32_t offset =
4 * (x + y_offset +
(index * width * width)) +
2;
values[offset] = half_from_float(blue);
}
}
data_found = get_cube_entry(file, &red, &green, &blue); data_found = get_cube_entry(file, &red, &green, &blue);
} }
@ -189,14 +170,14 @@ static void *load_3d_lut(FILE *const file, const uint32_t width, float red,
return values; return values;
} }
static void *load_cube_file(const char *const path, uint32_t *const width) static void *load_cube_file(const char *const path, uint32_t *const width,
struct vec3 *domain_min, struct vec3 *domain_max,
enum clut_dimension *dim)
{ {
void *data = NULL; void *data = NULL;
FILE *const file = os_fopen(path, "rb"); FILE *const file = os_fopen(path, "rb");
if (file) { if (file) {
float min_value[] = {0.0f, 0.0f, 0.0f};
float max_value[] = {1.0f, 1.0f, 1.0f};
float red, green, blue; float red, green, blue;
unsigned width_1d = 0; unsigned width_1d = 0;
unsigned width_3d = 0; unsigned width_3d = 0;
@ -214,14 +195,10 @@ static void *load_cube_file(const char *const path, uint32_t *const width)
break; break;
} else if (sscanf(line, "DOMAIN_MIN %f %f %f", &f[0], } else if (sscanf(line, "DOMAIN_MIN %f %f %f", &f[0],
&f[1], &f[2]) == 3) { &f[1], &f[2]) == 3) {
min_value[0] = f[0]; vec3_set(domain_min, f[0], f[1], f[2]);
min_value[1] = f[1];
min_value[2] = f[2];
} else if (sscanf(line, "DOMAIN_MAX %f %f %f", &f[0], } else if (sscanf(line, "DOMAIN_MAX %f %f %f", &f[0],
&f[1], &f[2]) == 3) { &f[1], &f[2]) == 3) {
max_value[0] = f[0]; vec3_set(domain_max, f[0], f[1], f[2]);
max_value[1] = f[1];
max_value[2] = f[2];
} else if (sscanf(line, "LUT_1D_SIZE %u", &u) == 1) { } else if (sscanf(line, "LUT_1D_SIZE %u", &u) == 1) {
width_1d = u; width_1d = u;
} else if (sscanf(line, "LUT_3D_SIZE %u", &u) == 1) { } else if (sscanf(line, "LUT_3D_SIZE %u", &u) == 1) {
@ -229,17 +206,28 @@ static void *load_cube_file(const char *const path, uint32_t *const width)
} }
} }
if (data_found) { if (domain_min->x >= domain_max->x ||
domain_min->y >= domain_max->y ||
domain_min->z >= domain_max->z) {
blog(LOG_WARNING,
"Invalid CUBE LUT domain: [%f, %f], [%f, %f], [%f, %f]",
domain_min->x, domain_max->x, domain_min->y,
domain_max->y, domain_min->z, domain_max->z);
} else if (data_found) {
if (width_1d > 0) { if (width_1d > 0) {
data = load_1d_lut(file, width_1d, red, green, data = load_1d_lut(file, width_1d, red, green,
blue); blue);
if (data) if (data) {
*width = width_1d; *width = width_1d;
*dim = CLUT_1D;
}
} else if (width_3d > 0) { } else if (width_3d > 0) {
data = load_3d_lut(file, width_3d, red, green, data = load_3d_lut(file, width_3d, red, green,
blue); blue);
if (data) if (data) {
*width = width_3d; *width = width_3d;
*dim = CLUT_3D;
}
} }
} }
@ -276,12 +264,18 @@ static void color_grade_filter_update(void *data, obs_data_t *settings)
obs_leave_graphics(); obs_leave_graphics();
if (path) { if (path) {
vec3_set(&filter->domain_min, 0.0f, 0.0f, 0.0f);
vec3_set(&filter->domain_max, 1.0f, 1.0f, 1.0f);
const char *const ext = os_get_path_extension(path); const char *const ext = os_get_path_extension(path);
if (ext && astrcmpi(ext, ".cube") == 0) { if (ext && astrcmpi(ext, ".cube") == 0) {
filter->cube_data = filter->cube_data = load_cube_file(
load_cube_file(path, &filter->cube_width); path, &filter->cube_width, &filter->domain_min,
&filter->domain_max, &filter->clut_dim);
} else { } else {
gs_image_file_init(&filter->image, path); gs_image_file_init(&filter->image, path);
filter->cube_width = LUT_WIDTH;
filter->clut_dim = CLUT_3D;
} }
} }
@ -292,16 +286,48 @@ static void color_grade_filter_update(void *data, obs_data_t *settings)
filter->target = make_clut_texture_png( filter->target = make_clut_texture_png(
filter->image.format, filter->image.cx, filter->image.format, filter->image.cx,
filter->image.cy, filter->image.texture_data); filter->image.cy, filter->image.texture_data);
filter->clut_scale = const float clut_scale = (float)(LUT_WIDTH - 1);
(float)(LUT_WIDTH - 1) / (float)LUT_WIDTH; vec3_set(&filter->clut_scale, clut_scale, clut_scale,
filter->clut_offset = 0.5f / (float)LUT_WIDTH; clut_scale);
vec3_set(&filter->clut_offset, 0.f, 0.f, 0.f);
} else if (filter->cube_data) { } else if (filter->cube_data) {
const uint32_t width = filter->cube_width; const uint32_t width = filter->cube_width;
if (filter->clut_dim == CLUT_1D) {
filter->target = gs_texture_create(
width, 1, GS_RGBA16F, 1,
(const uint8_t **)&filter->cube_data,
0);
} else {
filter->target = gs_voltexture_create( filter->target = gs_voltexture_create(
width, width, width, GS_RGBA16F, 1, width, width, width, GS_RGBA16F, 1,
(uint8_t **)&filter->cube_data, 0); (const uint8_t **)&filter->cube_data,
filter->clut_scale = (float)(width - 1) / (float)width; 0);
filter->clut_offset = 0.5f / (float)width; }
struct vec3 domain_scale;
vec3_sub(&domain_scale, &filter->domain_max,
&filter->domain_min);
const float width_minus_one = (float)(width - 1);
vec3_set(&filter->clut_scale, width_minus_one,
width_minus_one, width_minus_one);
vec3_div(&filter->clut_scale, &filter->clut_scale,
&domain_scale);
vec3_neg(&filter->clut_offset, &filter->domain_min);
vec3_mul(&filter->clut_offset, &filter->clut_offset,
&filter->clut_scale);
/* 1D shader wants normalized UVW */
if (filter->clut_dim == CLUT_1D) {
vec3_divf(&filter->clut_scale,
&filter->clut_scale, (float)width);
vec3_addf(&filter->clut_offset,
&filter->clut_offset, 0.5f);
vec3_divf(&filter->clut_offset,
&filter->clut_offset, (float)width);
}
} }
} }
@ -398,21 +424,36 @@ static void color_grade_filter_render(void *data, gs_effect_t *effect)
OBS_ALLOW_DIRECT_RENDERING)) OBS_ALLOW_DIRECT_RENDERING))
return; return;
param = gs_effect_get_param_by_name(filter->effect, "clut"); const char *clut_texture_name = "clut_3d";
const char *tech_name = "Draw3D";
if (filter->clut_dim == CLUT_1D) {
clut_texture_name = "clut_1d";
tech_name = "Draw1D";
}
param = gs_effect_get_param_by_name(filter->effect, clut_texture_name);
gs_effect_set_texture(param, filter->target); gs_effect_set_texture(param, filter->target);
param = gs_effect_get_param_by_name(filter->effect, "clut_amount"); param = gs_effect_get_param_by_name(filter->effect, "clut_amount");
gs_effect_set_float(param, filter->clut_amount); gs_effect_set_float(param, filter->clut_amount);
param = gs_effect_get_param_by_name(filter->effect, "clut_scale"); param = gs_effect_get_param_by_name(filter->effect, "clut_scale");
gs_effect_set_float(param, filter->clut_scale); gs_effect_set_vec3(param, &filter->clut_scale);
param = gs_effect_get_param_by_name(filter->effect, "clut_offset"); param = gs_effect_get_param_by_name(filter->effect, "clut_offset");
gs_effect_set_float(param, filter->clut_offset); gs_effect_set_vec3(param, &filter->clut_offset);
obs_source_process_filter_end(filter->context, filter->effect, 0, 0); param = gs_effect_get_param_by_name(filter->effect, "domain_min");
gs_effect_set_vec3(param, &filter->domain_min);
UNUSED_PARAMETER(effect); param = gs_effect_get_param_by_name(filter->effect, "domain_max");
gs_effect_set_vec3(param, &filter->domain_max);
param = gs_effect_get_param_by_name(filter->effect, "cube_width_i");
gs_effect_set_float(param, 1.0f / filter->cube_width);
obs_source_process_filter_tech_end(filter->context, filter->effect, 0,
0, tech_name);
} }
struct obs_source_info color_grade_filter = { struct obs_source_info color_grade_filter = {

View File

@ -1,10 +1,14 @@
uniform float4x4 ViewProj; uniform float4x4 ViewProj;
uniform texture2d image; uniform texture2d image;
uniform texture3d clut; uniform texture2d clut_1d;
uniform texture3d clut_3d;
uniform float clut_amount; uniform float clut_amount;
uniform float clut_scale; uniform float3 clut_scale;
uniform float clut_offset; uniform float3 clut_offset;
uniform float3 domain_min;
uniform float3 domain_max;
uniform float cube_width_i;
sampler_state textureSampler { sampler_state textureSampler {
Filter = Linear; Filter = Linear;
@ -31,22 +35,133 @@ VertDataOut VSDefault(VertDataIn v_in)
return vert_out; return vert_out;
} }
float4 LUT(VertDataOut v_in) : TARGET float4 LUT1D(VertDataOut v_in) : TARGET
{ {
float4 textureColor = image.Sample(textureSampler, v_in.uv); float4 textureColor = image.Sample(textureSampler, v_in.uv);
float3 clut_uvw = textureColor.rgb * clut_scale + clut_offset; if (textureColor.r >= domain_min.r && textureColor.r <= domain_max.r) {
float3 luttedColor = clut.Sample(textureSampler, clut_uvw).rgb; float u = textureColor.r * clut_scale.r + clut_offset.r;
float channel = clut_1d.Sample(textureSampler, float2(u, 0.5)).r;
textureColor.r = lerp(textureColor.r, channel, clut_amount);
}
float3 final_color = lerp(textureColor.rgb, luttedColor, clut_amount); if (textureColor.g >= domain_min.g && textureColor.g <= domain_max.g) {
return float4(final_color.rgb, textureColor.a); float u = textureColor.g * clut_scale.g + clut_offset.g;
float channel = clut_1d.Sample(textureSampler, float2(u, 0.5)).g;
textureColor.g = lerp(textureColor.g, channel, clut_amount);
}
if (textureColor.b >= domain_min.b && textureColor.b <= domain_max.b) {
float u = textureColor.b * clut_scale.b + clut_offset.b;
float channel = clut_1d.Sample(textureSampler, float2(u, 0.5)).b;
textureColor.b = lerp(textureColor.b, channel, clut_amount);
}
return textureColor;
} }
technique Draw float4 LUT3D(VertDataOut v_in) : TARGET
{
float4 textureColor = image.Sample(textureSampler, v_in.uv);
float r = textureColor.r;
float g = textureColor.g;
float b = textureColor.b;
if (r >= domain_min.r && r <= domain_max.r &&
g >= domain_min.g && g <= domain_max.g &&
b >= domain_min.b && b <= domain_max.b)
{
float3 clut_pos = textureColor.rgb * clut_scale + clut_offset;
float3 floor_pos = floor(clut_pos);
float3 fracRGB = clut_pos - floor_pos;
float3 uvw0 = (floor_pos + 0.5) * cube_width_i;
float3 uvw3 = (floor_pos + 1.5) * cube_width_i;
float fracL, fracM, fracS;
float3 uvw1, uvw2;
if (fracRGB.r < fracRGB.g) {
if (fracRGB.r < fracRGB.b) {
if (fracRGB.g < fracRGB.b) {
// f(R) < f(G) < f(B)
fracL = fracRGB.b;
fracM = fracRGB.g;
fracS = fracRGB.r;
uvw1 = float3(uvw0.x, uvw0.y, uvw3.z);
uvw2 = float3(uvw0.x, uvw3.y, uvw3.z);
} else {
// f(R) < f(B) <= f(G)
fracL = fracRGB.g;
fracM = fracRGB.b;
fracS = fracRGB.r;
uvw1 = float3(uvw0.x, uvw3.y, uvw0.z);
uvw2 = float3(uvw0.x, uvw3.y, uvw3.z);
}
} else {
// f(B) <= f(R) < f(G)
fracL = fracRGB.g;
fracM = fracRGB.r;
fracS = fracRGB.b;
uvw1 = float3(uvw0.x, uvw3.y, uvw0.z);
uvw2 = float3(uvw3.x, uvw3.y, uvw0.z);
}
} else if (fracRGB.r < fracRGB.b) {
// f(G) <= f(R) < f(B)
fracL = fracRGB.b;
fracM = fracRGB.r;
fracS = fracRGB.g;
uvw1 = float3(uvw0.x, uvw0.y, uvw3.z);
uvw2 = float3(uvw3.x, uvw0.y, uvw3.z);
} else if (fracRGB.g < fracRGB.b) {
// f(G) < f(B) <= f(R)
fracL = fracRGB.r;
fracM = fracRGB.b;
fracS = fracRGB.g;
uvw1 = float3(uvw3.x, uvw0.y, uvw0.z);
uvw2 = float3(uvw3.x, uvw0.y, uvw3.z);
} else {
// f(B) <= f(G) <= f(R)
fracL = fracRGB.r;
fracM = fracRGB.g;
fracS = fracRGB.b;
uvw1 = float3(uvw3.x, uvw0.y, uvw0.z);
uvw2 = float3(uvw3.x, uvw3.y, uvw0.z);
}
/* use filtering to collapse 4 taps to 2 */
/* use max to kill potential zero-divide NaN */
float coeff01 = (1.0 - fracM);
float weight01 = max((fracL - fracM) / coeff01, 0.0);
float3 uvw01 = lerp(uvw0, uvw1, weight01);
float3 sample01 = clut_3d.Sample(textureSampler, uvw01).rgb;
float coeff23 = fracM;
float weight23 = max(fracS / coeff23, 0.0);
float3 uvw23 = lerp(uvw2, uvw3, weight23);
float3 sample23 = clut_3d.Sample(textureSampler, uvw23).rgb;
float3 luttedColor = (coeff01 * sample01) + (coeff23 * sample23);
textureColor.rgb = lerp(textureColor.rgb, luttedColor, clut_amount);
}
return textureColor;
}
technique Draw1D
{ {
pass pass
{ {
vertex_shader = VSDefault(v_in); vertex_shader = VSDefault(v_in);
pixel_shader = LUT(v_in); pixel_shader = LUT1D(v_in);
}
}
technique Draw3D
{
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = LUT3D(v_in);
} }
} }