diff --git a/libobs/graphics/graphics-ffmpeg.c b/libobs/graphics/graphics-ffmpeg.c index 2b771b630..ca9406e7f 100644 --- a/libobs/graphics/graphics-ffmpeg.c +++ b/libobs/graphics/graphics-ffmpeg.c @@ -6,6 +6,7 @@ #include #include "../obs-ffmpeg-compat.h" +#include "srgb.h" struct ffmpeg_image { const char *file; @@ -121,58 +122,129 @@ fail: #define obs_bswap16(v) __builtin_bswap16(v) #endif +static void *ffmpeg_image_copy_data_straight(struct ffmpeg_image *info, + AVFrame *frame) +{ + const size_t linesize = (size_t)info->cx * 4; + const size_t totalsize = info->cy * linesize; + void *data = bmalloc(totalsize); + + const size_t src_linesize = frame->linesize[0]; + if (linesize != src_linesize) { + const size_t min_line = linesize < src_linesize ? linesize + : src_linesize; + + uint8_t *dst = data; + const uint8_t *src = frame->data[0]; + for (int y = 0; y < info->cy; y++) { + memcpy(dst, src, min_line); + dst += linesize; + src += src_linesize; + } + } else { + memcpy(data, frame->data[0], totalsize); + } + + return data; +} + static void *ffmpeg_image_reformat_frame(struct ffmpeg_image *info, - AVFrame *frame) + AVFrame *frame, + enum gs_image_alpha_mode alpha_mode) { struct SwsContext *sws_ctx = NULL; void *data = NULL; int ret = 0; - if (info->format == AV_PIX_FMT_RGBA || - info->format == AV_PIX_FMT_BGRA || - info->format == AV_PIX_FMT_BGR0) { - const size_t linesize = (size_t)info->cx * 4; - const size_t totalsize = info->cy * linesize; - data = bmalloc(totalsize); - - const size_t src_linesize = frame->linesize[0]; - if (linesize != src_linesize) { + if (info->format == AV_PIX_FMT_BGR0) { + data = ffmpeg_image_copy_data_straight(info, frame); + } else if (info->format == AV_PIX_FMT_RGBA || + info->format == AV_PIX_FMT_BGRA) { + if (alpha_mode == GS_IMAGE_ALPHA_STRAIGHT) { + data = ffmpeg_image_copy_data_straight(info, frame); + } else { + const size_t linesize = (size_t)info->cx * 4; + const size_t totalsize = info->cy * linesize; + data = bmalloc(totalsize); + const size_t src_linesize = frame->linesize[0]; const size_t min_line = linesize < src_linesize ? linesize : src_linesize; - uint8_t *dst = data; const uint8_t *src = frame->data[0]; - for (int y = 0; y < info->cy; y++) { - memcpy(dst, src, min_line); - dst += linesize; - src += src_linesize; + const size_t row_elements = min_line >> 2; + if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY_SRGB) { + for (int y = 0; y < info->cy; y++) { + gs_premultiply_xyza_srgb_loop_restrict( + dst, src, row_elements); + dst += linesize; + src += src_linesize; + } + } else if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY) { + for (int y = 0; y < info->cy; y++) { + gs_premultiply_xyza_loop_restrict( + dst, src, row_elements); + dst += linesize; + src += src_linesize; + } } - } else { - memcpy(data, frame->data[0], totalsize); } } else if (info->format == AV_PIX_FMT_RGBA64BE) { - const size_t linesize = (size_t)info->cx * 8; - data = bmalloc(info->cy * linesize); - + const size_t dst_linesize = (size_t)info->cx * 4; + data = bmalloc(info->cy * dst_linesize); const size_t src_linesize = frame->linesize[0]; - const size_t min_line = linesize < src_linesize ? linesize - : src_linesize; - const size_t pairs = min_line >> 1; - + const size_t src_min_line = (dst_linesize * 2) < src_linesize + ? (dst_linesize * 2) + : src_linesize; + const size_t row_elements = src_min_line >> 3; + uint8_t *dst = data; const uint8_t *src = frame->data[0]; - uint16_t *dst = data; - for (int y = 0; y < info->cy; y++) { - for (size_t x = 0; x < pairs; ++x) { - uint16_t value; - memcpy(&value, src, sizeof(value)); - *dst = obs_bswap16(value); - ++dst; - src += sizeof(value); - } + uint16_t value[4]; + float f[4]; + if (alpha_mode == GS_IMAGE_ALPHA_STRAIGHT) { + for (int y = 0; y < info->cy; y++) { + for (size_t x = 0; x < row_elements; ++x) { + memcpy(value, src, sizeof(value)); + f[0] = (float)obs_bswap16(value[0]) / + 65535.0f; + f[1] = (float)obs_bswap16(value[1]) / + 65535.0f; + f[2] = (float)obs_bswap16(value[2]) / + 65535.0f; + f[3] = (float)obs_bswap16(value[3]) / + 65535.0f; + gs_float3_srgb_linear_to_nonlinear(f); + gs_float4_to_u8x4(dst, f); + dst += sizeof(*dst) * 4; + src += sizeof(value); + } - src += src_linesize - min_line; + src += src_linesize - src_min_line; + } + } else { + for (int y = 0; y < info->cy; y++) { + for (size_t x = 0; x < row_elements; ++x) { + memcpy(value, src, sizeof(value)); + f[0] = (float)obs_bswap16(value[0]) / + 65535.0f; + f[1] = (float)obs_bswap16(value[1]) / + 65535.0f; + f[2] = (float)obs_bswap16(value[2]) / + 65535.0f; + f[3] = (float)obs_bswap16(value[3]) / + 65535.0f; + gs_premultiply_float4(f); + gs_float3_srgb_linear_to_nonlinear(f); + gs_float4_to_u8x4(dst, f); + dst += sizeof(*dst) * 4; + src += sizeof(value); + } + + src += src_linesize - src_min_line; + } } + + info->format = AV_PIX_FMT_RGBA; } else { static const enum AVPixelFormat format = AV_PIX_FMT_BGRA; @@ -222,6 +294,14 @@ static void *ffmpeg_image_reformat_frame(struct ffmpeg_image *info, av_freep(pointers); + if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY_SRGB) { + gs_premultiply_xyza_srgb_loop(data, (size_t)info->cx * + info->cy); + } else if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY) { + gs_premultiply_xyza_loop(data, + (size_t)info->cx * info->cy); + } + info->format = format; } @@ -229,7 +309,8 @@ fail: return data; } -static void *ffmpeg_image_decode(struct ffmpeg_image *info) +static void *ffmpeg_image_decode(struct ffmpeg_image *info, + enum gs_image_alpha_mode alpha_mode) { AVPacket packet = {0}; void *data = NULL; @@ -271,7 +352,7 @@ static void *ffmpeg_image_decode(struct ffmpeg_image *info) } } - data = ffmpeg_image_reformat_frame(info, frame); + data = ffmpeg_image_reformat_frame(info, frame, alpha_mode); fail: av_packet_unref(&packet); @@ -312,7 +393,29 @@ uint8_t *gs_create_texture_file_data(const char *file, uint8_t *data = NULL; if (ffmpeg_image_init(&image, file)) { - data = ffmpeg_image_decode(&image); + data = ffmpeg_image_decode(&image, GS_IMAGE_ALPHA_STRAIGHT); + if (data) { + *format = convert_format(image.format); + *cx_out = (uint32_t)image.cx; + *cy_out = (uint32_t)image.cy; + } + + ffmpeg_image_free(&image); + } + + return data; +} + +uint8_t *gs_create_texture_file_data2(const char *file, + enum gs_image_alpha_mode alpha_mode, + enum gs_color_format *format, + uint32_t *cx_out, uint32_t *cy_out) +{ + struct ffmpeg_image image; + uint8_t *data = NULL; + + if (ffmpeg_image_init(&image, file)) { + data = ffmpeg_image_decode(&image, alpha_mode); if (data) { *format = convert_format(image.format); *cx_out = (uint32_t)image.cx; diff --git a/libobs/graphics/graphics.h b/libobs/graphics/graphics.h index a8d9d59f7..b43bd7b15 100644 --- a/libobs/graphics/graphics.h +++ b/libobs/graphics/graphics.h @@ -559,10 +559,19 @@ EXPORT gs_shader_t *gs_vertexshader_create_from_file(const char *file, EXPORT gs_shader_t *gs_pixelshader_create_from_file(const char *file, char **error_string); +enum gs_image_alpha_mode { + GS_IMAGE_ALPHA_STRAIGHT, + GS_IMAGE_ALPHA_PREMULTIPLY_SRGB, + GS_IMAGE_ALPHA_PREMULTIPLY, +}; + EXPORT gs_texture_t *gs_texture_create_from_file(const char *file); EXPORT uint8_t *gs_create_texture_file_data(const char *file, enum gs_color_format *format, uint32_t *cx, uint32_t *cy); +EXPORT uint8_t *gs_create_texture_file_data2( + const char *file, enum gs_image_alpha_mode alpha_mode, + enum gs_color_format *format, uint32_t *cx, uint32_t *cy); #define GS_FLIP_U (1 << 0) #define GS_FLIP_V (1 << 1) diff --git a/libobs/graphics/image-file.c b/libobs/graphics/image-file.c index 1d6a043f4..b7c39d4dc 100644 --- a/libobs/graphics/image-file.c +++ b/libobs/graphics/image-file.c @@ -18,13 +18,14 @@ #include "image-file.h" #include "../util/base.h" #include "../util/platform.h" +#include "vec4.h" #define blog(level, format, ...) \ blog(level, "%s: " format, __FUNCTION__, __VA_ARGS__) static void *bi_def_bitmap_create(int width, int height) { - return bmalloc(width * height * 4); + return bmalloc((size_t)4 * width * height); } static void bi_def_bitmap_set_opaque(void *bitmap, bool opaque) @@ -71,7 +72,8 @@ static inline void *alloc_mem(gs_image_file_t *image, uint64_t *mem_usage, } static bool init_animated_gif(gs_image_file_t *image, const char *path, - uint64_t *mem_usage) + uint64_t *mem_usage, + enum gs_image_alpha_mode alpha_mode) { bool is_animated_gif = true; gif_result result; @@ -156,9 +158,18 @@ static bool init_animated_gif(gs_image_file_t *image, const char *path, image->format = GS_RGBA; if (mem_usage) { - *mem_usage += image->cx * image->cy * 4; + *mem_usage += (size_t)4 * image->cx * image->cy; *mem_usage += size; } + + if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY_SRGB) { + gs_premultiply_xyza_srgb_loop(image->gif.frame_image, + (size_t)image->cx * + image->cy); + } else if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY) { + gs_premultiply_xyza_loop(image->gif.frame_image, + (size_t)image->cx * image->cy); + } } else { gif_finalise(&image->gif); bfree(image->gif_data); @@ -180,7 +191,8 @@ not_animated: } static void gs_image_file_init_internal(gs_image_file_t *image, - const char *file, uint64_t *mem_usage) + const char *file, uint64_t *mem_usage, + enum gs_image_alpha_mode alpha_mode) { size_t len; @@ -195,12 +207,13 @@ static void gs_image_file_init_internal(gs_image_file_t *image, len = strlen(file); if (len > 4 && strcmp(file + len - 4, ".gif") == 0) { - if (init_animated_gif(image, file, mem_usage)) + if (init_animated_gif(image, file, mem_usage, alpha_mode)) { return; + } } - image->texture_data = gs_create_texture_file_data( - file, &image->format, &image->cx, &image->cy); + image->texture_data = gs_create_texture_file_data2( + file, alpha_mode, &image->format, &image->cx, &image->cy); if (mem_usage) { *mem_usage += image->cx * image->cy * @@ -216,7 +229,7 @@ static void gs_image_file_init_internal(gs_image_file_t *image, void gs_image_file_init(gs_image_file_t *image, const char *file) { - gs_image_file_init_internal(image, file, NULL); + gs_image_file_init_internal(image, file, NULL, GS_IMAGE_ALPHA_STRAIGHT); } void gs_image_file_free(gs_image_file_t *image) @@ -241,7 +254,16 @@ void gs_image_file_free(gs_image_file_t *image) void gs_image_file2_init(gs_image_file2_t *if2, const char *file) { - gs_image_file_init_internal(&if2->image, file, &if2->mem_usage); + gs_image_file_init_internal(&if2->image, file, &if2->mem_usage, + GS_IMAGE_ALPHA_STRAIGHT); +} + +void gs_image_file3_init(gs_image_file3_t *if3, const char *file, + enum gs_image_alpha_mode alpha_mode) +{ + gs_image_file_init_internal(&if3->image2.image, file, + &if3->image2.mem_usage, alpha_mode); + if3->alpha_mode = alpha_mode; } void gs_image_file_init_texture(gs_image_file_t *image) @@ -296,7 +318,8 @@ static inline int calculate_new_frame(gs_image_file_t *image, return new_frame; } -static void decode_new_frame(gs_image_file_t *image, int new_frame) +static void decode_new_frame(gs_image_file_t *image, int new_frame, + enum gs_image_alpha_mode alpha_mode) { if (!image->animation_frame_cache[new_frame]) { int last_frame; @@ -314,14 +337,22 @@ static void decode_new_frame(gs_image_file_t *image, int new_frame) /* decode actual desired frame */ if (gif_decode_frame(&image->gif, new_frame) == GIF_OK) { - size_t pos = new_frame * image->gif.width * - image->gif.height * 4; + const size_t area = + (size_t)image->gif.width * image->gif.height; + size_t pos = new_frame * area * 4; image->animation_frame_cache[new_frame] = image->animation_frame_data + pos; + if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY_SRGB) { + gs_premultiply_xyza_srgb_loop( + image->gif.frame_image, area); + } else if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY) { + gs_premultiply_xyza_loop(image->gif.frame_image, + area); + } + memcpy(image->animation_frame_cache[new_frame], - image->gif.frame_image, - image->gif.width * image->gif.height * 4); + image->gif.frame_image, area * 4); image->last_decoded_frame = new_frame; } @@ -330,7 +361,9 @@ static void decode_new_frame(gs_image_file_t *image, int new_frame) image->cur_frame = new_frame; } -bool gs_image_file_tick(gs_image_file_t *image, uint64_t elapsed_time_ns) +static bool gs_image_file_tick_internal(gs_image_file_t *image, + uint64_t elapsed_time_ns, + enum gs_image_alpha_mode alpha_mode) { int loops; @@ -346,7 +379,7 @@ bool gs_image_file_tick(gs_image_file_t *image, uint64_t elapsed_time_ns) calculate_new_frame(image, elapsed_time_ns, loops); if (new_frame != image->cur_frame) { - decode_new_frame(image, new_frame); + decode_new_frame(image, new_frame, alpha_mode); return true; } } @@ -354,15 +387,49 @@ bool gs_image_file_tick(gs_image_file_t *image, uint64_t elapsed_time_ns) return false; } -void gs_image_file_update_texture(gs_image_file_t *image) +bool gs_image_file_tick(gs_image_file_t *image, uint64_t elapsed_time_ns) +{ + return gs_image_file_tick_internal(image, elapsed_time_ns, false); +} + +bool gs_image_file2_tick(gs_image_file2_t *if2, uint64_t elapsed_time_ns) +{ + return gs_image_file_tick_internal(&if2->image, elapsed_time_ns, false); +} + +bool gs_image_file3_tick(gs_image_file3_t *if3, uint64_t elapsed_time_ns) +{ + return gs_image_file_tick_internal(&if3->image2.image, elapsed_time_ns, + if3->alpha_mode); +} + +static void +gs_image_file_update_texture_internal(gs_image_file_t *image, + enum gs_image_alpha_mode alpha_mode) { if (!image->is_animated_gif || !image->loaded) return; if (!image->animation_frame_cache[image->cur_frame]) - decode_new_frame(image, image->cur_frame); + decode_new_frame(image, image->cur_frame, alpha_mode); gs_texture_set_image(image->texture, image->animation_frame_cache[image->cur_frame], image->gif.width * 4, false); } + +void gs_image_file_update_texture(gs_image_file_t *image) +{ + gs_image_file_update_texture_internal(image, false); +} + +void gs_image_file2_update_texture(gs_image_file2_t *if2) +{ + gs_image_file_update_texture_internal(&if2->image, false); +} + +void gs_image_file3_update_texture(gs_image_file3_t *if3) +{ + gs_image_file_update_texture_internal(&if3->image2.image, + if3->alpha_mode); +} diff --git a/libobs/graphics/image-file.h b/libobs/graphics/image-file.h index f0db05faf..1fb62e968 100644 --- a/libobs/graphics/image-file.h +++ b/libobs/graphics/image-file.h @@ -51,8 +51,14 @@ struct gs_image_file2 { uint64_t mem_usage; }; +struct gs_image_file3 { + struct gs_image_file2 image2; + enum gs_image_alpha_mode alpha_mode; +}; + typedef struct gs_image_file gs_image_file_t; typedef struct gs_image_file2 gs_image_file2_t; +typedef struct gs_image_file3 gs_image_file3_t; EXPORT void gs_image_file_init(gs_image_file_t *image, const char *file); EXPORT void gs_image_file_free(gs_image_file_t *image); @@ -64,26 +70,36 @@ EXPORT void gs_image_file_update_texture(gs_image_file_t *image); EXPORT void gs_image_file2_init(gs_image_file2_t *if2, const char *file); +EXPORT bool gs_image_file2_tick(gs_image_file2_t *if2, + uint64_t elapsed_time_ns); +EXPORT void gs_image_file2_update_texture(gs_image_file2_t *if2); + +EXPORT void gs_image_file3_init(gs_image_file3_t *if3, const char *file, + enum gs_image_alpha_mode alpha_mode); + +EXPORT bool gs_image_file3_tick(gs_image_file3_t *if3, + uint64_t elapsed_time_ns); +EXPORT void gs_image_file3_update_texture(gs_image_file3_t *if3); + static void gs_image_file2_free(gs_image_file2_t *if2) { gs_image_file_free(&if2->image); if2->mem_usage = 0; } -static inline void gs_image_file2_init_texture(gs_image_file2_t *if2) +static void gs_image_file2_init_texture(gs_image_file2_t *if2) { gs_image_file_init_texture(&if2->image); } -static inline bool gs_image_file2_tick(gs_image_file2_t *if2, - uint64_t elapsed_time_ns) +static void gs_image_file3_free(gs_image_file3_t *if3) { - return gs_image_file_tick(&if2->image, elapsed_time_ns); + gs_image_file2_free(&if3->image2); } -static inline void gs_image_file2_update_texture(gs_image_file2_t *if2) +static void gs_image_file3_init_texture(gs_image_file3_t *if3) { - gs_image_file_update_texture(&if2->image); + gs_image_file2_init_texture(&if3->image2); } #ifdef __cplusplus diff --git a/libobs/graphics/srgb.h b/libobs/graphics/srgb.h new file mode 100644 index 000000000..e7837872d --- /dev/null +++ b/libobs/graphics/srgb.h @@ -0,0 +1,187 @@ +/****************************************************************************** + Copyright (C) 2021 by Hugh Bailey + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +******************************************************************************/ + +#pragma once + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static inline float gs_srgb_nonlinear_to_linear(float u) +{ + return (u <= 0.04045f) ? (u / 12.92f) + : powf((u + 0.055f) / 1.055f, 2.4f); +} + +static inline float gs_srgb_linear_to_nonlinear(float u) +{ + return (u <= 0.0031308f) ? (12.92f * u) + : ((1.055f * powf(u, 1.0f / 2.4f)) - 0.055f); +} + +static inline float gs_u8_to_float(uint8_t u) +{ + return (float)u / 255.0f; +} + +static inline void gs_u8x4_to_float4(float *f, const uint8_t *u) +{ + f[0] = gs_u8_to_float(u[0]); + f[1] = gs_u8_to_float(u[1]); + f[2] = gs_u8_to_float(u[2]); + f[3] = gs_u8_to_float(u[3]); +} + +static inline uint8_t gs_float_to_u8(float f) +{ + return (uint8_t)(f * 255.0f + 0.5f); +} + +static inline void gs_premultiply_float4(float *f) +{ + f[0] *= f[3]; + f[1] *= f[3]; + f[2] *= f[3]; +} + +static inline void gs_float3_to_u8x3(uint8_t *u, const float *f) +{ + u[0] = gs_float_to_u8(f[0]); + u[1] = gs_float_to_u8(f[1]); + u[2] = gs_float_to_u8(f[2]); +} + +static inline void gs_float4_to_u8x4(uint8_t *u, const float *f) +{ + u[0] = gs_float_to_u8(f[0]); + u[1] = gs_float_to_u8(f[1]); + u[2] = gs_float_to_u8(f[2]); + u[3] = gs_float_to_u8(f[3]); +} + +static inline void gs_float3_srgb_nonlinear_to_linear(float *f) +{ + f[0] = gs_srgb_nonlinear_to_linear(f[0]); + f[1] = gs_srgb_nonlinear_to_linear(f[1]); + f[2] = gs_srgb_nonlinear_to_linear(f[2]); +} + +static inline void gs_float3_srgb_linear_to_nonlinear(float *f) +{ + f[0] = gs_srgb_linear_to_nonlinear(f[0]); + f[1] = gs_srgb_linear_to_nonlinear(f[1]); + f[2] = gs_srgb_linear_to_nonlinear(f[2]); +} + +static inline void gs_premultiply_xyza(uint8_t *data) +{ + uint8_t u[4]; + float f[4]; + memcpy(&u, data, sizeof(u)); + gs_u8x4_to_float4(f, u); + gs_premultiply_float4(f); + gs_float3_to_u8x3(u, f); + memcpy(data, &u, sizeof(u)); +} + +static inline void gs_premultiply_xyza_srgb(uint8_t *data) +{ + uint8_t u[4]; + float f[4]; + memcpy(&u, data, sizeof(u)); + gs_u8x4_to_float4(f, u); + gs_float3_srgb_nonlinear_to_linear(f); + gs_premultiply_float4(f); + gs_float3_srgb_linear_to_nonlinear(f); + gs_float3_to_u8x3(u, f); + memcpy(data, &u, sizeof(u)); +} + +static inline void gs_premultiply_xyza_restrict(uint8_t *__restrict dst, + const uint8_t *__restrict src) +{ + uint8_t u[4]; + float f[4]; + memcpy(&u, src, sizeof(u)); + gs_u8x4_to_float4(f, u); + gs_premultiply_float4(f); + gs_float3_to_u8x3(u, f); + memcpy(dst, &u, sizeof(u)); +} + +static inline void +gs_premultiply_xyza_srgb_restrict(uint8_t *__restrict dst, + const uint8_t *__restrict src) +{ + uint8_t u[4]; + float f[4]; + memcpy(&u, src, sizeof(u)); + gs_u8x4_to_float4(f, u); + gs_float3_srgb_nonlinear_to_linear(f); + gs_premultiply_float4(f); + gs_float3_srgb_linear_to_nonlinear(f); + gs_float3_to_u8x3(u, f); + memcpy(dst, &u, sizeof(u)); +} + +static inline void gs_premultiply_xyza_loop(uint8_t *data, size_t texel_count) +{ + for (size_t i = 0; i < texel_count; ++i) { + gs_premultiply_xyza(data); + data += 4; + } +} + +static inline void gs_premultiply_xyza_srgb_loop(uint8_t *data, + size_t texel_count) +{ + for (size_t i = 0; i < texel_count; ++i) { + gs_premultiply_xyza_srgb(data); + data += 4; + } +} + +static inline void +gs_premultiply_xyza_loop_restrict(uint8_t *__restrict dst, + const uint8_t *__restrict src, + size_t texel_count) +{ + for (size_t i = 0; i < texel_count; ++i) { + gs_premultiply_xyza_restrict(dst, src); + dst += 4; + src += 4; + } +} + +static inline void +gs_premultiply_xyza_srgb_loop_restrict(uint8_t *__restrict dst, + const uint8_t *__restrict src, + size_t texel_count) +{ + for (size_t i = 0; i < texel_count; ++i) { + gs_premultiply_xyza_srgb_restrict(dst, src); + dst += 4; + src += 4; + } +} + +#ifdef __cplusplus +} +#endif diff --git a/libobs/graphics/vec4.h b/libobs/graphics/vec4.h index f5c1c2411..d72a96714 100644 --- a/libobs/graphics/vec4.h +++ b/libobs/graphics/vec4.h @@ -18,6 +18,7 @@ #pragma once #include "math-defs.h" +#include "srgb.h" #include "../util/sse-intrin.h" @@ -198,127 +199,50 @@ static inline void vec4_ceil(struct vec4 *dst, const struct vec4 *v) static inline uint32_t vec4_to_rgba(const struct vec4 *src) { + float f[4]; + memcpy(f, src->ptr, sizeof(f)); + uint8_t u[4]; + gs_float4_to_u8x4(u, f); uint32_t val; - val = (uint32_t)((src->x * 255.0f) + 0.5f); - val |= (uint32_t)((src->y * 255.0f) + 0.5f) << 8; - val |= (uint32_t)((src->z * 255.0f) + 0.5f) << 16; - val |= (uint32_t)((src->w * 255.0f) + 0.5f) << 24; + memcpy(&val, u, sizeof(val)); return val; } static inline uint32_t vec4_to_bgra(const struct vec4 *src) { + float f[4]; + memcpy(f, src->ptr, sizeof(f)); + uint8_t u[4]; + gs_float4_to_u8x4(u, f); + uint8_t temp = u[0]; + u[0] = u[2]; + u[2] = temp; uint32_t val; - val = (uint32_t)((src->z * 255.0f) + 0.5f); - val |= (uint32_t)((src->y * 255.0f) + 0.5f) << 8; - val |= (uint32_t)((src->x * 255.0f) + 0.5f) << 16; - val |= (uint32_t)((src->w * 255.0f) + 0.5f) << 24; + memcpy(&val, u, sizeof(val)); return val; } static inline void vec4_from_rgba(struct vec4 *dst, uint32_t rgba) { - dst->x = (float)(rgba & 0xFF) / 255.0f; - rgba >>= 8; - dst->y = (float)(rgba & 0xFF) / 255.0f; - rgba >>= 8; - dst->z = (float)(rgba & 0xFF) / 255.0f; - rgba >>= 8; - dst->w = (float)rgba / 255.0f; + uint8_t u[4]; + memcpy(u, &rgba, sizeof(u)); + gs_u8x4_to_float4(dst->ptr, u); } static inline void vec4_from_bgra(struct vec4 *dst, uint32_t bgra) { - dst->z = (float)(bgra & 0xFF) / 255.0f; - bgra >>= 8; - dst->y = (float)(bgra & 0xFF) / 255.0f; - bgra >>= 8; - dst->x = (float)(bgra & 0xFF) / 255.0f; - bgra >>= 8; - dst->w = (float)bgra / 255.0f; -} - -static inline float srgb_nonlinear_to_linear(float u) -{ - return (u <= 0.04045f) ? (u / 12.92f) - : powf((u + 0.055f) / 1.055f, 2.4f); + uint8_t u[4]; + memcpy(u, &bgra, sizeof(u)); + uint8_t temp = u[0]; + u[0] = u[2]; + u[2] = temp; + gs_u8x4_to_float4(dst->ptr, u); } static inline void vec4_from_rgba_srgb(struct vec4 *dst, uint32_t rgba) { - dst->x = srgb_nonlinear_to_linear((float)(rgba & 0xFF) / 255.0f); - rgba >>= 8; - dst->y = srgb_nonlinear_to_linear((float)(rgba & 0xFF) / 255.0f); - rgba >>= 8; - dst->z = srgb_nonlinear_to_linear((float)(rgba & 0xFF) / 255.0f); - rgba >>= 8; - dst->w = (float)rgba / 255.0f; -} - -static inline void vec4_from_bgra_srgb(struct vec4 *dst, uint32_t bgra) -{ - dst->z = srgb_nonlinear_to_linear((float)(bgra & 0xFF) / 255.0f); - bgra >>= 8; - dst->y = srgb_nonlinear_to_linear((float)(bgra & 0xFF) / 255.0f); - bgra >>= 8; - dst->x = srgb_nonlinear_to_linear((float)(bgra & 0xFF) / 255.0f); - bgra >>= 8; - dst->w = (float)bgra / 255.0f; -} - -static inline void vec4_from_rgba_srgb_premultiply(struct vec4 *dst, - uint32_t rgba) -{ - vec4_from_rgba_srgb(dst, rgba); - dst->x *= dst->w; - dst->y *= dst->w; - dst->z *= dst->w; -} - -static inline void vec4_from_bgra_srgb_premultiply(struct vec4 *dst, - uint32_t bgra) -{ - vec4_from_bgra_srgb(dst, bgra); - dst->x *= dst->w; - dst->y *= dst->w; - dst->z *= dst->w; -} - -static inline float srgb_linear_to_nonlinear(float u) -{ - return (u <= 0.0031308f) ? (12.92f * u) - : ((1.055f * powf(u, 1.0f / 2.4f)) - 0.055f); -} - -static inline uint32_t vec4_to_rgba_srgb(const struct vec4 *src) -{ - uint32_t val; - val = (uint32_t)((srgb_linear_to_nonlinear(src->x) * 255.0f) + 0.5f); - val |= (uint32_t)((srgb_linear_to_nonlinear(src->y) * 255.0f) + 0.5f) - << 8; - val |= (uint32_t)((srgb_linear_to_nonlinear(src->z) * 255.0f) + 0.5f) - << 16; - val |= (uint32_t)((src->w * 255.0f) + 0.5f) << 24; - return val; -} - -static inline uint32_t vec4_to_bgra_srgb(const struct vec4 *src) -{ - uint32_t val; - val = (uint32_t)((srgb_linear_to_nonlinear(src->z) * 255.0f) + 0.5f); - val |= (uint32_t)((srgb_linear_to_nonlinear(src->y) * 255.0f) + 0.5f) - << 8; - val |= (uint32_t)((srgb_linear_to_nonlinear(src->x) * 255.0f) + 0.5f) - << 16; - val |= (uint32_t)((src->w * 255.0f) + 0.5f) << 24; - return val; -} - -static inline void vec4_srgb_linear_to_nonlinear(struct vec4 *dst) -{ - dst->x = srgb_linear_to_nonlinear(dst->x); - dst->y = srgb_linear_to_nonlinear(dst->y); - dst->y = srgb_linear_to_nonlinear(dst->y); + vec4_from_rgba(dst, rgba); + gs_float3_srgb_nonlinear_to_linear(dst->ptr); } EXPORT void vec4_transform(struct vec4 *dst, const struct vec4 *v,