Added FastNoise2 for SIMD noise, but it won't be integrated yet.

It requires C++17, while Godot uses C++14. I was unable to override it,
and I'm not sure how compatible this will be with the runtime.
At least this would be doable in Godot 4.
master
Marc Gilleron 2021-01-09 16:35:40 +00:00
parent 1e95374ccb
commit 973cf729cd
65 changed files with 9634 additions and 7 deletions

132
SCsub
View File

@ -1,9 +1,14 @@
Import('env')
Import('env_modules')
# TODO Support is turned off for now because Godot 3 doesn't compile with C++17.
# FastNoise2 use C++17 features and STL in its headers as well.
# SIMD noise support would have to wait for Godot 4...
FAST_NOISE_2 = False
env_voxel = env_modules.Clone()
files = [
voxel_files = [
"*.cpp",
"meshers/blocky/*.cpp",
"meshers/transvoxel/*.cpp",
@ -16,7 +21,11 @@ files = [
"generators/graph/*.cpp",
"generators/simple/*.cpp",
"util/*.cpp",
"util/noise/*.cpp",
#"util/noise/*.cpp",
"util/noise/fast_noise_lite.cpp",
"util/noise/fast_noise_lite_gradient.cpp",
"terrain/*.cpp",
"server/*.cpp",
"math/*.cpp",
@ -26,20 +35,129 @@ files = [
if env["tools"]:
# Editor-only stuff
editor_files = [
voxel_editor_files = [
"editor/*.cpp",
"editor/graph/*.cpp",
"editor/terrain/*.cpp",
"editor/fast_noise_lite/*.cpp",
]
files += editor_files
voxel_files += voxel_editor_files
for f in files:
for f in voxel_files:
env_voxel.add_source_files(env.modules_sources, f)
if FAST_NOISE_2:
if env["use_lto"]:
# TODO Auburn warned about issues with LTO and static builds of FastNoise2
# Need to either produce an error, fallback on Scalar, or turn off support entirely?
pass
env_voxel.Append(CPPPATH=["thirdparty/fast_noise_2/include"])
#env_voxel.Append(CPPDEFINES=["VOXEL_SUPPORT_FAST_NOISE_2"])
fn2_sources_common = [
"thirdparty/fast_noise_2/src/FastNoise/FastNoiseMetadata.cpp"
"thirdparty/fast_noise_2/src/FastSIMD/FastSIMD.cpp"
]
fn2_sources_scalar = [
"thirdparty/fast_noise_2/src/FastSIMD/FastSIMD_Level_Scalar.cpp"
]
fn2_sources_sse3 = [
"thirdparty/fast_noise_2/src/FastSIMD/FastSIMD_Level_SSE3.cpp"
]
fn2_sources_ssse3 = [
"thirdparty/fast_noise_2/src/FastSIMD/FastSIMD_Level_SSSE3.cpp"
]
fn2_sources_sse2 = [
"thirdparty/fast_noise_2/src/FastSIMD/FastSIMD_Level_SSE2.cpp"
]
fn2_sources_sse41 = [
"thirdparty/fast_noise_2/src/FastSIMD/FastSIMD_Level_SSE41.cpp"
]
fn2_sources_sse42 = [
"thirdparty/fast_noise_2/src/FastSIMD/FastSIMD_Level_SSE42.cpp"
]
fn2_sources_avx2 = [
"thirdparty/fast_noise_2/src/FastSIMD/FastSIMD_Level_AVX2.cpp"
]
fn2_sources_avx512 = [
"thirdparty/fast_noise_2/src/FastSIMD/FastSIMD_Level_AVX512.cpp"
]
fn2_sources_arm = [
"thirdparty/fast_noise_2/src/FastSIMD/FastSIMD_Level_NEON.cpp"
]
env_fn2 = env_voxel.Clone()
# In case we need common options for FastNoise2 we can add them here
env_fn2_scalar = env_fn2.Clone()
env_fn2_sse2 = env_fn2.Clone()
env_fn2_sse3 = env_fn2.Clone()
env_fn2_ssse3 = env_fn2.Clone()
env_fn2_sse41 = env_fn2.Clone()
env_fn2_sse42 = env_fn2.Clone()
env_fn2_avx2 = env_fn2.Clone()
env_fn2_avx512 = env_fn2.Clone()
env_fn2_arm = env_fn2.Clone()
if env.msvc:
if env["bits"] == "32":
# MSVC/64 warns:
# ignoring unknown option "/arch:SSE2" as 64 bit already has SSE2 built in
env_fn2_scalar.Append(CCFLAGS=["/arch:SSE"])
env_fn2_sse2.Append(CCFLAGS=["/arch:SSE2"])
env_fn2_sse3.Append(CCFLAGS=["/arch:SSE2"])
env_fn2_ssse3.Append(CCFLAGS=["/arch:SSE2"])
env_fn2_sse41.Append(CCFLAGS=["/arch:SSE2"])
env_fn2_sse42.Append(CCFLAGS=["/arch:SSE2"])
env_fn2_avx2.Append(CCFLAGS=["/arch:AVX2"])
env_fn2_avx512.Append(CCFLAGS=["/arch:AVX512"])
else: # Clang, GCC, AppleClang
# TODO The Cmake build script still has a big `if(MSVC)` in that section.
# what does it mean?
if env["bits"] == "32":
env_fn2_scalar.Append(CCFLAGS=["-msse"])
env_fn2_sse2.Append(CCFLAGS=["-msse2"])
env_fn2_sse3.Append(CCFLAGS=["-msse3"])
env_fn2_ssse3.Append(CCFLAGS=["-mssse3"])
env_fn2_sse41.Append(CCFLAGS=["-msse4.1"])
env_fn2_sse42.Append(CCFLAGS=["-msse4.2"])
env_fn2_avx2.Append(CCFLAGS=["-mavx2", "-mfma"])
env_fn2_avx512.Append(CCFLAGS=["-mavx512f", "-mavx512dq", "-mfma"])
# TODO This was in the old FastNoiseSIMD repo from Tinmanjuggernaut. Is it still needed?
# if (env["target"] == "release"):
# # gcc 9.2.1 won"t compile x64 with -O3
# env_thirdparty_avx512.Append(CCFLAGS=["-mavx512f", "-O2"])
# else:
# env_thirdparty_avx512.Append(CCFLAGS=["-mavx512f"])
env_fn2.add_source_files(env.modules_sources, fn2_sources_common)
env_fn2_scalar.add_source_files(env.modules_sources, fn2_sources_scalar)
env_fn2_sse2.add_source_files(env.modules_sources, fn2_sources_sse2)
env_fn2_sse3.add_source_files(env.modules_sources, fn2_sources_sse3)
env_fn2_ssse3.add_source_files(env.modules_sources, fn2_sources_ssse3)
env_fn2_sse41.add_source_files(env.modules_sources, fn2_sources_sse41)
env_fn2_sse42.add_source_files(env.modules_sources, fn2_sources_sse42)
if env["platform"] == "android":
# Both Android and IOS have ARM chips, but only android build tools have necessary headers
env_fn2_arm.add_source_files(env.modules_sources, fn2_sources_arm)
elif env["platform"] in ["windows", "x11", "osx"]:
# AVX is supported on desktop only
env_fn2_avx2.add_source_files(env.modules_sources, fn2_sources_avx2)
env_fn2_avx512.add_source_files(env.modules_sources, fn2_sources_avx512)
# TODO Check webassembly builds (`env["platform"] == "javascript"`)
# Ignored clang warnings because Godot's codebase is old and isn't using override yet
if env['platform'] == 'osx' or env['platform'] == 'android':
env_voxel.Append(CXXFLAGS=['-Wno-inconsistent-missing-override'])
if env['platform'] in ['osx', 'android']:
env_voxel.Append(CXXFLAGS=['-Wno-inconsistent-missing-override'])
# Doesn't work, because reasons
#if env.msvc:

View File

@ -32,6 +32,7 @@
#include "terrain/voxel_terrain.h"
#include "terrain/voxel_viewer.h"
#include "util/macros.h"
//#include "util/noise/fast_noise_2.h"
#include "util/noise/fast_noise_lite.h"
#include "util/noise/fast_noise_lite_gradient.h"
#include "voxel_string_names.h"
@ -93,6 +94,7 @@ void register_voxel_types() {
ClassDB::register_class<VoxelVoxLoader>();
ClassDB::register_class<FastNoiseLite>();
ClassDB::register_class<FastNoiseLiteGradient>();
//ClassDB::register_class<FastNoise2>(); // See SCsub
// Meshers
ClassDB::register_virtual_class<VoxelMesher>();

150
thirdparty/fast_noise_2/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,150 @@
# CMakeList.txt : CMake project for FastNoise
cmake_minimum_required(VERSION 3.7.1)
project(FastNoise2 VERSION 0.5.0)
set(CMAKE_CXX_STANDARD 17)
option(FASTNOISE2_NOISETOOL "Build Noise Tool" ON)
option(FASTNOISE2_TESTS "Build Test" OFF)
if(MSVC)
#setup pdb target location
set(pdb_output_dir "${CMAKE_CURRENT_BINARY_DIR}/pdb-files")
set(CMAKE_PDB_OUTPUT_DIRECTORY "${pdb_output_dir}")
set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY "${pdb_output_dir}")
#need to sync pdp files
add_compile_options("/FS")
endif()
set(install_targets "")
add_subdirectory(src)
if(FASTNOISE2_NOISETOOL)
add_subdirectory(NoiseTool)
endif()
if(FASTNOISE2_TESTS)
add_subdirectory(tests)
endif()
#Install -----------------------------------------------------------
# Introduce variables:
# * CMAKE_INSTALL_LIBDIR
# * CMAKE_INSTALL_BINDIR
include(GNUInstallDirs)
# Layout. This works for all platforms:
# * <prefix>/lib*/cmake/<PROJECT-NAME>
# * <prefix>/lib*/
# * <prefix>/include/
set(config_install_dir "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated")
# Configuration
set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake")
set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake")
set(targets_export_name "${PROJECT_NAME}Targets")
set(namespace "${PROJECT_NAME}::")
# Include module with fuction 'write_basic_package_version_file'
include(CMakePackageConfigHelpers)
# Configure '<PROJECT-NAME>ConfigVersion.cmake'
# Use:
# * PROJECT_VERSION
write_basic_package_version_file(
"${version_config}" COMPATIBILITY SameMajorVersion
)
# Configure '<PROJECT-NAME>Config.cmake'
# Use variables:
# * TARGETS_EXPORT_NAME
# * PROJECT_NAME
configure_package_config_file(
"cmake/Config.cmake.in"
"${project_config}"
INSTALL_DESTINATION "${config_install_dir}"
)
# Targets:
# * <prefix>/lib/libname.a
# * header location after install: <prefix>/include/${PROJECT_NAME}/include.hpp
# * headers can be included by C++ code `#include <${PROJECT_NAME}/include.hpp>`
install(
TARGETS ${install_targets}
EXPORT "${targets_export_name}"
LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
)
if(FASTNOISE2_NOISETOOL)
if(WIN32)
#need sdl2 dll on windows, linux its expected to be installed
if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4") #32bit
install(
FILES NoiseTool/ThirdParty/SDL2-2.0.12/lib/x86/SDL2.dll
DESTINATION "${CMAKE_INSTALL_BINDIR}"
)
else()
install(
FILES NoiseTool/ThirdParty/SDL2-2.0.12/lib/x64/SDL2.dll
DESTINATION "${CMAKE_INSTALL_BINDIR}"
)
endif()
endif()
endif()
# Headers:
install(
FILES ${install_fastsimd_headers}
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/FastSIMD"
)
install(
FILES ${install_fastnoise_headers}
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/FastNoise"
)
# Config
# * <prefix>/lib/cmake/${PROJECT_NAME}/${PROJECT_NAME}Config.cmake
# * <prefix>/lib/cmake/${PROJECT_NAME}/${PROJECT_NAME}ConfigVersion.cmake
install(
FILES "${project_config}" "${version_config}"
DESTINATION "${config_install_dir}"
)
# Config
# * <prefix>/lib/cmake/${PROJECT_NAME}/${PROJECT_NAME}Targets.cmake
install(
EXPORT "${targets_export_name}"
NAMESPACE "${namespace}"
DESTINATION "${config_install_dir}"
)
if(MSVC)
#install pdbs
get_cmake_property(is_multi GENERATOR_IS_MULTI_CONFIG)
if(is_multi)
set(config_suffix "$<CONFIG>")
else()
set(config_suffix "")
endif()
if(BUILD_SHARED_LIBS)
set(pdb_dst ${CMAKE_INSTALL_BINDIR})
else()
set(pdb_dst ${CMAKE_INSTALL_LIBDIR})
endif()
install(
DIRECTORY "${pdb_output_dir}/${config_suffix}/"
DESTINATION ${pdb_dst}
)
endif()

View File

@ -0,0 +1,240 @@
{
"configurations": [
{
"name": "x64-MSVC-Release",
"generator": "Ninja",
"configurationType": "RelWithDebInfo",
"buildRoot": "${projectDir}\\out\\build\\${name}",
"installRoot": "${projectDir}\\out\\install\\${name}",
"cmakeCommandArgs": "",
"buildCommandArgs": "-v",
"ctestCommandArgs": "",
"inheritEnvironments": [ "msvc_x64" ],
"variables": [
{
"name": "FASTNOISE2_TESTS",
"value": "True",
"type": "BOOL"
}
]
},
{
"name": "x64-MSVC-Debug",
"generator": "Ninja",
"configurationType": "Debug",
"buildRoot": "${projectDir}\\out\\build\\${name}",
"installRoot": "${projectDir}\\out\\install\\${name}",
"cmakeCommandArgs": "",
"buildCommandArgs": "-v",
"ctestCommandArgs": "",
"inheritEnvironments": [ "msvc_x64" ],
"variables": [
{
"name": "FASTNOISE2_TESTS",
"value": "True",
"type": "BOOL"
}
]
},
{
"name": "x86-MSVC-Release",
"generator": "Ninja",
"configurationType": "RelWithDebInfo",
"buildRoot": "${projectDir}\\out\\build\\${name}",
"installRoot": "${projectDir}\\out\\install\\${name}",
"cmakeCommandArgs": "",
"buildCommandArgs": "-v",
"ctestCommandArgs": "",
"inheritEnvironments": [ "msvc_x86" ],
"variables": [
{
"name": "FASTNOISE2_TESTS",
"value": "True",
"type": "BOOL"
}
]
},
{
"name": "x86-MSVC-Debug",
"generator": "Ninja",
"configurationType": "Debug",
"buildRoot": "${projectDir}\\out\\build\\${name}",
"installRoot": "${projectDir}\\out\\install\\${name}",
"cmakeCommandArgs": "",
"buildCommandArgs": "-v",
"ctestCommandArgs": "",
"inheritEnvironments": [ "msvc_x86" ],
"variables": [
{
"name": "FASTNOISE2_TESTS",
"value": "True",
"type": "BOOL"
}
]
},
{
"name": "x64-Clang-Release",
"generator": "Ninja",
"configurationType": "RelWithDebInfo",
"buildRoot": "${projectDir}\\out\\build\\${name}",
"installRoot": "${projectDir}\\out\\install\\${name}",
"cmakeCommandArgs": "",
"buildCommandArgs": "-v",
"ctestCommandArgs": "",
"inheritEnvironments": [ "clang_cl_x64" ],
"variables": [
{
"name": "FASTNOISE2_TESTS",
"value": "True",
"type": "BOOL"
}
]
},
{
"name": "x64-Clang-Debug",
"generator": "Ninja",
"configurationType": "Debug",
"buildRoot": "${projectDir}\\out\\build\\${name}",
"installRoot": "${projectDir}\\out\\install\\${name}",
"cmakeCommandArgs": "",
"buildCommandArgs": "-v",
"ctestCommandArgs": "",
"inheritEnvironments": [ "clang_cl_x64" ],
"variables": [
{
"name": "FASTNOISE2_TESTS",
"value": "True",
"type": "BOOL"
}
]
},
{
"name": "x86-Clang-Release",
"generator": "Ninja",
"configurationType": "RelWithDebInfo",
"buildRoot": "${projectDir}\\out\\build\\${name}",
"installRoot": "${projectDir}\\out\\install\\${name}",
"cmakeCommandArgs": "",
"buildCommandArgs": "-v",
"ctestCommandArgs": "",
"inheritEnvironments": [ "clang_cl_x86" ],
"variables": [
{
"name": "FASTNOISE2_TESTS",
"value": "True",
"type": "BOOL"
}
]
},
{
"name": "x86-Clang-Debug",
"generator": "Ninja",
"configurationType": "Debug",
"buildRoot": "${projectDir}\\out\\build\\${name}",
"installRoot": "${projectDir}\\out\\install\\${name}",
"cmakeCommandArgs": "",
"buildCommandArgs": "-v",
"ctestCommandArgs": "",
"inheritEnvironments": [ "clang_cl_x86" ],
"variables": [
{
"name": "FASTNOISE2_TESTS",
"value": "True",
"type": "BOOL"
}
]
},
{
"name": "x64-WSL-GCC-Debug",
"generator": "Ninja",
"configurationType": "Debug",
"buildRoot": "${projectDir}\\out\\build\\${name}",
"installRoot": "${projectDir}\\out\\install\\${name}",
"cmakeExecutable": "cmake",
"cmakeCommandArgs": "",
"buildCommandArgs": "-v",
"ctestCommandArgs": "",
"inheritEnvironments": [ "linux_x64" ],
"wslPath": "${defaultWSLPath}",
"addressSanitizerRuntimeFlags": "detect_leaks=0",
"variables": [
{
"name": "FASTNOISE2_TESTS",
"value": "True",
"type": "BOOL"
}
]
},
{
"name": "x64-WSL-GCC-Release",
"generator": "Ninja",
"configurationType": "RelWithDebInfo",
"buildRoot": "${projectDir}\\out\\build\\${name}",
"installRoot": "${projectDir}\\out\\install\\${name}",
"cmakeExecutable": "cmake",
"cmakeCommandArgs": "",
"buildCommandArgs": "-v",
"ctestCommandArgs": "",
"inheritEnvironments": [ "linux_x64" ],
"wslPath": "${defaultWSLPath}",
"addressSanitizerRuntimeFlags": "detect_leaks=0",
"variables": [
{
"name": "FASTNOISE2_TESTS",
"value": "True",
"type": "BOOL"
}
]
},
{
"name": "x86-WSL-GCC-Debug",
"generator": "Ninja",
"configurationType": "Debug",
"buildRoot": "${projectDir}\\out\\build\\${name}",
"installRoot": "${projectDir}\\out\\install\\${name}",
"cmakeExecutable": "cmake",
"cmakeCommandArgs": "",
"buildCommandArgs": "-v",
"ctestCommandArgs": "",
"inheritEnvironments": [ "linux_x86" ],
"wslPath": "${defaultWSLPath}",
"addressSanitizerRuntimeFlags": "detect_leaks=0",
"variables": [
{
"name": "CMAKE_CXX_FLAGS",
"value": "-m32"
},
{
"name": "FASTNOISE2_TESTS",
"value": "True",
"type": "BOOL"
}
]
},
{
"name": "x86-WSL-GCC-Release",
"generator": "Ninja",
"configurationType": "RelWithDebInfo",
"buildRoot": "${projectDir}\\out\\build\\${name}",
"installRoot": "${projectDir}\\out\\install\\${name}",
"cmakeExecutable": "cmake",
"cmakeCommandArgs": "",
"buildCommandArgs": "-v",
"ctestCommandArgs": "",
"inheritEnvironments": [ "linux_x86" ],
"wslPath": "${defaultWSLPath}",
"addressSanitizerRuntimeFlags": "detect_leaks=0",
"variables": [
{
"name": "CMAKE_CXX_FLAGS",
"value": "-m32"
},
{
"name": "FASTNOISE2_TESTS",
"value": "True",
"type": "BOOL"
}
]
}
]
}

21
thirdparty/fast_noise_2/LICENSE vendored Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2020 Jordan Peck
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

50
thirdparty/fast_noise_2/README.md vendored Normal file
View File

@ -0,0 +1,50 @@
[![GitHub Actions CI](https://img.shields.io/github/workflow/status/Auburn/FastNoise2/CI?style=flat-square&logo=GitHub "GitHub Actions CI")](https://github.com/Auburn/FastNoise2/actions)
[![Discord](https://img.shields.io/discord/703636892901441577?style=flat-square&logo=discord "Discord")](https://discord.gg/SHVaVfV)
# FastNoise2
[Documentation WIP](https://github.com/Auburn/FastNoise2/wiki)
WIP successor to [FastNoiseSIMD](https://github.com/Auburn/FastNoiseSIMD)
FastNoise2 is a fully featured noise generation library which aims to meet all your coherent noise needs while being extremely fast
Uses FastSIMD to compile classes with multiple SIMD types
Supports:
- 32/64 bit
- Windows
- Linux
- MacOS
- MSVC
- Clang
- GCC
Check the [releases](https://github.com/Auburns/FastNoise2/releases) for early versions of the Noise Tool
![NoiseTool](https://user-images.githubusercontent.com/1349548/90967950-4e8da600-e4de-11ea-902a-94e72cb86481.png)
# Getting Started
There are 2 ways to use FastNoise 2, creating a node tree structure in code or importing a serialised node tree created using the NoiseTool.
This is creating a Simplex Fractal FBm with 5 octaves from code:
```
auto fnSimplex = FastNoise::New<FastNoise::Simplex>();
auto fnFractal = FastNoise::New<FastNoise::FractalFBm>();
fnFractal->SetSource( fnSimplex );
fnFractal->SetOctaveCount( 5 );
fnFractal->GenUniformGrid2D( ... );
```
Here is the same Simplex Fractal FBm with 5 octaves but using serialised data from the NoiseTool:
```
FastNoise::SmartNode<> fnGenerator = FastNoise::NewFromEncodedNodeTree( "DQAFAAAAAAAAQAgAAAAAAD8=" );
fnGenerator->GenUniformGrid2D( ... );
```
This is the node graph for the above from the NoiseTool
![SimplexFractalNodes](https://user-images.githubusercontent.com/1349548/90897006-72f16180-e3bc-11ea-8cc3-a68daed7b6c1.png)

View File

@ -0,0 +1,6 @@
include(CMakeFindDependencyMacro)
@PACKAGE_INIT@
include("${CMAKE_CURRENT_LIST_DIR}/@TARGETS_EXPORT_NAME@.cmake")
check_required_components("@PROJECT_NAME@")

View File

@ -0,0 +1,33 @@
#pragma once
#include <memory>
#include "FastSIMD/FastSIMD.h"
#include "FastNoise_Config.h"
#include "Generators/BasicGenerators.h"
#include "Generators/Value.h"
#include "Generators/Perlin.h"
#include "Generators/Simplex.h"
#include "Generators/Cellular.h"
#include "Generators/Fractal.h"
#include "Generators/DomainWarp.h"
#include "Generators/DomainWarpFractal.h"
#include "Generators/Modifiers.h"
#include "Generators/Blends.h"
namespace FastNoise
{
template<typename T>
inline SmartNode<T> New( FastSIMD::eLevel maxLevel = FastSIMD::Level_Null )
{
static_assert( std::is_base_of_v<Generator, T>, "Use FastSIMD::New() to create non FastNoise classes" );
return SmartNode<T>( FastSIMD::New<T>( maxLevel ) );
}
inline SmartNode<> NewFromEncodedNodeTree( const char* encodedNodeTreeString, FastSIMD::eLevel maxLevel = FastSIMD::Level_Null )
{
return Metadata::DeserialiseSmartNode( encodedNodeTreeString, maxLevel );
}
}

View File

@ -0,0 +1,331 @@
#pragma once
#include <functional>
#include <memory>
#include <type_traits>
#include <vector>
#include <cstdint>
#include "FastNoise_Config.h"
#include "FastSIMD/FastSIMD.h"
namespace FastNoise
{
class Generator;
template<typename T>
struct PerDimensionVariable;
struct NodeData;
struct Metadata
{
Metadata( const char* className )
{
name = className;
id = AddMetadataClass( this );
}
static const std::vector<const Metadata*>& GetMetadataClasses()
{
return sMetadataClasses;
}
static const Metadata* GetMetadataClass( std::uint16_t nodeId )
{
if( nodeId < sMetadataClasses.size() )
{
return sMetadataClasses[nodeId];
}
return nullptr;
}
static std::string SerialiseNodeData( NodeData* nodeData, bool fixUp = false );
static SmartNode<> DeserialiseSmartNode( const char* serialisedBase64NodeData, FastSIMD::eLevel level = FastSIMD::Level_Null );
static NodeData* DeserialiseNodeData( const char* serialisedBase64NodeData, std::vector<std::unique_ptr<NodeData>>& nodeDataOut );
struct MemberVariable
{
enum eType
{
EFloat,
EInt,
EEnum
};
union ValueUnion
{
float f;
std::int32_t i;
ValueUnion( float v = 0 )
{
f = v;
}
ValueUnion( std::int32_t v )
{
i = v;
}
operator float()
{
return f;
}
operator std::int32_t()
{
return i;
}
bool operator ==( const ValueUnion& rhs ) const
{
return i == rhs.i;
}
};
const char* name;
eType type;
int dimensionIdx = -1;
ValueUnion valueDefault, valueMin, valueMax;
std::vector<const char*> enumNames;
std::function<void( Generator*, ValueUnion )> setFunc;
};
template<typename T, typename U, typename = std::enable_if_t<!std::is_enum_v<T>>>
void AddVariable( const char* name, T defaultV, U&& func, T minV = 0, T maxV = 0 )
{
MemberVariable member;
member.name = name;
member.valueDefault = defaultV;
member.valueMin = minV;
member.valueMax = maxV;
member.type = std::is_same_v<T, float> ? MemberVariable::EFloat : MemberVariable::EInt;
member.setFunc = [func]( Generator* g, MemberVariable::ValueUnion v ) { func( dynamic_cast<GetArg<U, 0>>(g), v ); };
memberVariables.push_back( member );
}
template<typename T, typename U, typename = std::enable_if_t<!std::is_enum_v<T>>>
void AddVariable( const char* name, T defaultV, void(U::* func)(T), T minV = 0, T maxV = 0 )
{
MemberVariable member;
member.name = name;
member.valueDefault = defaultV;
member.valueMin = minV;
member.valueMax = maxV;
member.type = std::is_same_v<T, float> ? MemberVariable::EFloat : MemberVariable::EInt;
member.setFunc = [func]( Generator* g, MemberVariable::ValueUnion v ) { (dynamic_cast<U*>(g)->*func)(v); };
memberVariables.push_back( member );
}
template<typename T, typename U, typename = std::enable_if_t<std::is_enum_v<T>>, typename... NAMES>
void AddVariableEnum( const char* name, T defaultV, void(U::* func)(T), NAMES... names )
{
MemberVariable member;
member.name = name;
member.type = MemberVariable::EEnum;
member.valueDefault = (int32_t)defaultV;
member.enumNames = { names... };
member.setFunc = [func]( Generator* g, MemberVariable::ValueUnion v ) { (dynamic_cast<U*>(g)->*func)((T)v.i); };
memberVariables.push_back( member );
}
template<typename T, typename U, typename = std::enable_if_t<!std::is_enum_v<T>>>
void AddPerDimensionVariable( const char* name, T defaultV, U&& func, T minV = 0, T maxV = 0 )
{
for( int idx = 0; (size_t)idx < sizeof( PerDimensionVariable<T>::varArray ) / sizeof( *PerDimensionVariable<T>::varArray ); idx++ )
{
MemberVariable member;
member.name = name;
member.valueDefault = defaultV;
member.valueMin = minV;
member.valueMax = maxV;
member.type = std::is_same_v<T, float> ? MemberVariable::EFloat : MemberVariable::EInt;
member.dimensionIdx = idx;
member.setFunc = [func, idx]( Generator* g, MemberVariable::ValueUnion v ) { func( dynamic_cast<GetArg<U, 0>>(g) ).get()[idx] = v; };
memberVariables.push_back( member );
}
}
struct MemberNode
{
const char* name;
int dimensionIdx = -1;
std::function<bool( Generator*, SmartNodeArg<> )> setFunc;
};
template<typename T, typename U>
void AddGeneratorSource( const char* name, void(U::* func)(SmartNodeArg<T>) )
{
MemberNode member;
member.name = name;
member.setFunc = [func]( Generator* g, SmartNodeArg<> s )
{
SmartNode<T> downCast = std::dynamic_pointer_cast<T>(s);
if( downCast )
{
(dynamic_cast<U*>(g)->*func)( downCast );
}
return (bool)downCast;
};
memberNodes.push_back( member );
}
template<typename U>
void AddPerDimensionGeneratorSource( const char* name, U&& func )
{
using GeneratorSourceT = typename std::invoke_result_t<U, GetArg<U, 0>>::type::Type;
using T = typename GeneratorSourceT::Type;
for( int idx = 0; (size_t)idx < sizeof( PerDimensionVariable<GeneratorSourceT>::varArray ) / sizeof( *PerDimensionVariable<GeneratorSourceT>::varArray ); idx++ )
{
MemberNode member;
member.name = name;
member.dimensionIdx = idx;
member.setFunc = [func, idx]( auto* g, SmartNodeArg<> s )
{
SmartNode<T> downCast = std::dynamic_pointer_cast<T>(s);
if( downCast )
{
g->SetSourceMemberVariable( func( dynamic_cast<GetArg<U, 0>>(g) ).get()[idx], downCast );
}
return (bool)downCast;
};
memberNodes.push_back( member );
}
}
struct MemberHybrid
{
const char* name;
float valueDefault = 0.0f;
int dimensionIdx = -1;
std::function<void( Generator*, float )> setValueFunc;
std::function<bool( Generator*, SmartNodeArg<> )> setNodeFunc;
};
template<typename T, typename U>
void AddHybridSource( const char* name, float defaultValue, void(U::* funcNode)(SmartNodeArg<T>), void(U::* funcValue)(float) )
{
MemberHybrid member;
member.name = name;
member.valueDefault = defaultValue;
member.setNodeFunc = [funcNode]( auto* g, SmartNodeArg<> s )
{
SmartNode<T> downCast = std::dynamic_pointer_cast<T>(s);
if( downCast )
{
(dynamic_cast<U*>(g)->*funcNode)( downCast );
}
return (bool)downCast;
};
member.setValueFunc = [funcValue]( Generator* g, float v )
{
(dynamic_cast<U*>(g)->*funcValue)(v);
};
memberHybrids.push_back( member );
}
template<typename U>
void AddPerDimensionHybridSource( const char* name, float defaultV, U&& func )
{
using HybridSourceT = typename std::invoke_result_t<U, GetArg<U, 0>>::type::Type;
using T = typename HybridSourceT::Type;
for( int idx = 0; (size_t)idx < sizeof( PerDimensionVariable<HybridSourceT>::varArray ) / sizeof( *PerDimensionVariable<HybridSourceT>::varArray ); idx++ )
{
MemberHybrid member;
member.name = name;
member.valueDefault = defaultV;
member.dimensionIdx = idx;
member.setNodeFunc = [func, idx]( auto* g, SmartNodeArg<> s )
{
SmartNode<T> downCast = std::dynamic_pointer_cast<T>(s);
if( downCast )
{
g->SetSourceMemberVariable( func( dynamic_cast<GetArg<U, 0>>(g) ).get()[idx], downCast );
}
return (bool)downCast;
};
member.setValueFunc = [func, idx]( Generator* g, float v ) { func( dynamic_cast<GetArg<U, 0>>(g) ).get()[idx] = v; };
memberHybrids.push_back( member );
}
}
std::uint16_t id;
const char* name;
std::vector<const char*> groups;
std::vector<MemberVariable> memberVariables;
std::vector<MemberNode> memberNodes;
std::vector<MemberHybrid> memberHybrids;
virtual Generator* NodeFactory( FastSIMD::eLevel level = FastSIMD::Level_Null ) const = 0;
private:
template<typename F, typename Ret, typename... Args>
static std::tuple<Args...> GetArg_Helper( Ret( F::* )(Args...) const );
template<typename F, std::size_t I>
using GetArg = std::tuple_element_t<I, decltype(GetArg_Helper( &F::operator() ))>;
static std::uint16_t AddMetadataClass( const Metadata* newMetadata )
{
sMetadataClasses.emplace_back( newMetadata );
return (std::uint16_t)sMetadataClasses.size() - 1;
}
static std::vector<const Metadata*> sMetadataClasses;
};
struct NodeData
{
NodeData( const Metadata* metadata );
const Metadata* metadata;
std::vector<Metadata::MemberVariable::ValueUnion> variables;
std::vector<NodeData*> nodes;
std::vector<std::pair<NodeData*, float>> hybrids;
bool operator ==( const NodeData& rhs ) const
{
return metadata == rhs.metadata &&
variables == rhs.variables &&
nodes == rhs.nodes &&
hybrids == rhs.hybrids;
}
};
}
#define FASTNOISE_METADATA( ... ) public:\
FASTSIMD_LEVEL_SUPPORT( FastNoise::SUPPORTED_SIMD_LEVELS );\
const FastNoise::Metadata* GetMetadata() const override;\
struct Metadata : __VA_ARGS__::Metadata{\
Generator* NodeFactory( FastSIMD::eLevel ) const override;
#define FASTNOISE_METADATA_ABSTRACT( ... ) public:\
struct Metadata : __VA_ARGS__::Metadata{

View File

@ -0,0 +1,130 @@
#pragma once
#ifndef FASTSIMD_BUILD_CLASS
#error Do not include this file
#endif
#ifndef FASTNOISE_CLASS
#define FASTNOISE_CLASS( CLASS ) FastNoise::CLASS
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Generator.h"
#else
#include "Generators/Generator.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/BasicGenerators.h"
#else
#include "Generators/BasicGenerators.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Value.h"
#else
#include "Generators/Value.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Perlin.h"
#else
#include "Generators/Perlin.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Simplex.h"
#else
#include "Generators/Simplex.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Cellular.h"
#else
#include "Generators/Cellular.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Fractal.h"
#else
#include "Generators/Fractal.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/DomainWarp.h"
#else
#include "Generators/DomainWarp.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/DomainWarpFractal.h"
#else
#include "Generators/DomainWarpFractal.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Modifiers.h"
#else
#include "Generators/Modifiers.inl"
#endif
#ifdef FASTSIMD_INCLUDE_HEADER_ONLY
#include "Generators/Blends.h"
#else
#include "Generators/Blends.inl"
#endif
// Nodes
// Order is important!
// Always add to bottom of list,
// inserting will break existing encoded node trees
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Constant ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( White ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Checkerboard ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( SineWave ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( PositionOutput ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DistanceToOrigin ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Value ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Perlin ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Simplex ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( OpenSimplex2 ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( CellularValue ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( CellularDistance ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( CellularLookup ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( FractalFBm ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( FractalBillow ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( FractalRidged ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( FractalRidgedMulti ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainWarpGradient ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainWarpFractalProgressive ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainWarpFractalIndependant ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainScale ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainOffset ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainRotate ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( SeedOffset ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Remap ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( ConvertRGBA8 ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Add ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Subtract ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Multiply ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Divide ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Min ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Max ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( MinSmooth ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( MaxSmooth ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Fade ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( Terrace ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( PowFloat ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( PowInt ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( DomainAxisScale ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( AddDimension ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( RemoveDimension ) )
FASTSIMD_BUILD_CLASS( FASTNOISE_CLASS( GeneratorCache ) )

View File

@ -0,0 +1,20 @@
#pragma once
#include "FastSIMD/FastSIMD.h"
#define FASTNOISE_CALC_MIN_MAX 1
namespace FastNoise
{
const FastSIMD::Level_BitFlags SUPPORTED_SIMD_LEVELS =
FastSIMD::Level_Scalar |
FastSIMD::Level_SSE2 |
FastSIMD::Level_SSE41 |
FastSIMD::Level_AVX2 |
FastSIMD::Level_AVX512 ;
template<typename T = class Generator>
using SmartNode = std::shared_ptr<T>;
template<typename T = class Generator>
using SmartNodeArg = const SmartNode<T>&;
}

View File

@ -0,0 +1,109 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
class Constant : public virtual Generator
{
public:
void SetValue( float value ) { mValue = value; }
protected:
float mValue = 1.0f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Basic Generators" );
this->AddVariable( "Value", 1.0f, &Constant::SetValue );
}
};
};
class White : public virtual Generator
{
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Basic Generators" );
}
};
};
class Checkerboard : public virtual Generator
{
public:
void SetSize( float value ) { mSize = value; }
protected:
float mSize = 1.0f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Basic Generators" );
this->AddVariable( "Size", 1.0f, &Checkerboard::SetSize );
}
};
};
class SineWave : public virtual Generator
{
public:
void SetScale( float value ) { mScale = value; }
protected:
float mScale = 1.0f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Basic Generators" );
this->AddVariable( "Scale", 1.0f, &SineWave::SetScale );
}
};
};
class PositionOutput : public virtual Generator
{
public:
template<Dim D>
void Set( float multiplier, float offset = 0.0f ) { mMultiplier[(int)D] = multiplier; mOffset[(int)D] = offset; }
protected:
PerDimensionVariable<float> mMultiplier;
PerDimensionVariable<float> mOffset;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Basic Generators" );
this->AddPerDimensionVariable( "Multiplier", 0.0f, []( PositionOutput* p ) { return std::ref( p->mMultiplier ); } );
this->AddPerDimensionVariable( "Offset", 0.0f, []( PositionOutput* p ) { return std::ref( p->mOffset ); } );
}
};
};
class DistanceToOrigin : public virtual Generator
{
public:
void SetDistanceFunction( DistanceFunction value ) { mDistanceFunction = value; }
protected:
DistanceFunction mDistanceFunction = DistanceFunction::EuclideanSquared;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Basic Generators" );
this->AddVariableEnum( "Distance Function", DistanceFunction::Euclidean, &DistanceToOrigin::SetDistanceFunction, "Euclidean", "Euclidean Squared", "Manhattan", "Hybrid" );
}
};
};
}

View File

@ -0,0 +1,96 @@
#include <cassert>
#include "FastSIMD/InlInclude.h"
#include "BasicGenerators.h"
#include "Utils.inl"
template<typename FS>
class FS_T<FastNoise::Constant, FS> : public virtual FastNoise::Constant, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return float32v( mValue );
}
};
template<typename FS>
class FS_T<FastNoise::White, FS> : public virtual FastNoise::White, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
size_t idx = 0;
((pos = FS_Casti32_f32( (FS_Castf32_i32( pos ) ^ (FS_Castf32_i32( pos ) >> 16)) * int32v( FnPrimes::Lookup[idx++] ) )), ...);
return FnUtils::GetValueCoord( seed, FS_Castf32_i32( pos )... );
}
};
template<typename FS>
class FS_T<FastNoise::Checkerboard, FS> : public virtual FastNoise::Checkerboard, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v multiplier = FS_Reciprocal_f32( float32v( mSize ) );
int32v value = (FS_Convertf32_i32( pos * multiplier ) ^ ...);
return float32v( 1.0f ) ^ FS_Casti32_f32( value << 31 );
}
};
template<typename FS>
class FS_T<FastNoise::SineWave, FS> : public virtual FastNoise::SineWave, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v multiplier = FS_Reciprocal_f32( float32v( mScale ) );
return (FS_Sin_f32( pos * multiplier ) * ...);
}
};
template<typename FS>
class FS_T<FastNoise::PositionOutput, FS> : public virtual FastNoise::PositionOutput, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
size_t offsetIdx = 0;
size_t multiplierIdx = 0;
(((pos += float32v( mOffset[offsetIdx++] )) *= float32v( mMultiplier[multiplierIdx++] )), ...);
return (pos + ...);
}
};
template<typename FS>
class FS_T<FastNoise::DistanceToOrigin, FS> : public virtual FastNoise::DistanceToOrigin, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return FnUtils::CalcDistance( mDistanceFunction, pos... );
}
};

View File

@ -0,0 +1,198 @@
#pragma once
#include "Generator.h"
#include <climits>
namespace FastNoise
{
class OperatorSourceLHS : public virtual Generator
{
public:
void SetLHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLHS, gen ); }
void SetRHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mRHS, gen ); }
void SetRHS( float value ) { mRHS = value; }
protected:
GeneratorSource mLHS;
HybridSource mRHS;
FASTNOISE_METADATA_ABSTRACT( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Blends" );
this->AddGeneratorSource( "LHS", &OperatorSourceLHS::SetLHS );
this->AddHybridSource( "RHS", 0.0f, &OperatorSourceLHS::SetRHS, &OperatorSourceLHS::SetRHS );
}
};
};
class OperatorHybridLHS : public virtual Generator
{
public:
void SetLHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLHS, gen ); }
void SetLHS( float value ) { mLHS = value; }
void SetRHS( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mRHS, gen ); }
void SetRHS( float value ) { mRHS = value; }
protected:
HybridSource mLHS;
HybridSource mRHS;
FASTNOISE_METADATA_ABSTRACT( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Blends" );
this->AddHybridSource( "LHS", 0.0f, &OperatorHybridLHS::SetLHS, &OperatorHybridLHS::SetLHS );
this->AddHybridSource( "RHS", 0.0f, &OperatorHybridLHS::SetRHS, &OperatorHybridLHS::SetRHS );
}
};
};
class Add : public virtual OperatorSourceLHS
{
FASTNOISE_METADATA( OperatorSourceLHS )
using OperatorSourceLHS::Metadata::Metadata;
};
};
class Subtract : public virtual OperatorHybridLHS
{
FASTNOISE_METADATA( OperatorHybridLHS )
using OperatorHybridLHS::Metadata::Metadata;
};
};
class Multiply : public virtual OperatorSourceLHS
{
FASTNOISE_METADATA( OperatorSourceLHS )
using OperatorSourceLHS::Metadata::Metadata;
};
};
class Divide : public virtual OperatorHybridLHS
{
FASTNOISE_METADATA( OperatorHybridLHS )
using OperatorHybridLHS::Metadata::Metadata;
};
};
class Min : public virtual OperatorSourceLHS
{
FASTNOISE_METADATA( OperatorSourceLHS )
using OperatorSourceLHS::Metadata::Metadata;
};
};
class Max : public virtual OperatorSourceLHS
{
FASTNOISE_METADATA( OperatorSourceLHS )
using OperatorSourceLHS::Metadata::Metadata;
};
};
class PowFloat : public virtual Generator
{
public:
void SetValue( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mValue, gen ); }
void SetValue( float value ) { mValue = value; }
void SetPow( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mPow, gen ); }
void SetPow( float value ) { mPow = value; }
protected:
HybridSource mValue;
HybridSource mPow;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Blends" );
this->AddHybridSource( "Value", 2.0f, &PowFloat::SetValue, &PowFloat::SetValue );
this->AddHybridSource( "Pow", 2.0f, &PowFloat::SetPow, &PowFloat::SetPow );
}
};
};
class PowInt : public virtual OperatorHybridLHS
{
public:
void SetValue( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mValue, gen ); }
void SetPow( int32_t value ) { mPow = value; }
protected:
GeneratorSource mValue;
int32_t mPow;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Blends" );
this->AddGeneratorSource( "Value", &PowInt::SetValue );
this->AddVariable( "Pow", 2, &PowInt::SetPow, 2, INT_MAX );
}
};
};
class MinSmooth : public virtual OperatorSourceLHS
{
public:
void SetSmoothness( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSmoothness, gen ); }
void SetSmoothness( float value ) { mSmoothness = value; }
protected:
HybridSource mSmoothness = 0.1f;
FASTNOISE_METADATA( OperatorSourceLHS )
Metadata( const char* className ) : OperatorSourceLHS::Metadata( className )
{
this->AddHybridSource( "Smoothness", 0.1f, &MinSmooth::SetSmoothness, &MinSmooth::SetSmoothness );
}
};
};
class MaxSmooth : public virtual OperatorSourceLHS
{
public:
void SetSmoothness( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSmoothness, gen ); }
void SetSmoothness( float value ) { mSmoothness = value; }
protected:
HybridSource mSmoothness = 0.1f;
FASTNOISE_METADATA( OperatorSourceLHS )
Metadata( const char* className ) : OperatorSourceLHS::Metadata( className )
{
this->AddHybridSource( "Smoothness", 0.1f, &MaxSmooth::SetSmoothness, &MaxSmooth::SetSmoothness );
}
};
};
class Fade : public virtual Generator
{
public:
void SetA( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mA, gen ); }
void SetB( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mB, gen ); }
void SetFade( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mFade, gen ); }
void SetFade( float value ) { mFade = value; }
protected:
GeneratorSource mA;
GeneratorSource mB;
HybridSource mFade = 0.5f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Blends" );
this->AddGeneratorSource( "A", &Fade::SetA );
this->AddGeneratorSource( "B", &Fade::SetB );
this->AddHybridSource( "Fade", 0.5f, &Fade::SetFade, &Fade::SetFade );
}
};
};
}

View File

@ -0,0 +1,174 @@
#include "FastSIMD/InlInclude.h"
#include "Blends.h"
template<typename FS>
class FS_T<FastNoise::Add, FS> : public virtual FastNoise::Add, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return this->GetSourceValue( mLHS, seed, pos... ) + this->GetSourceValue( mRHS, seed, pos... );
}
};
template<typename FS>
class FS_T<FastNoise::Subtract, FS> : public virtual FastNoise::Subtract, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return this->GetSourceValue( mLHS, seed, pos... ) - this->GetSourceValue( mRHS, seed, pos... );
}
};
template<typename FS>
class FS_T<FastNoise::Multiply, FS> : public virtual FastNoise::Multiply, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return this->GetSourceValue( mLHS, seed, pos... ) * this->GetSourceValue( mRHS, seed, pos... );
}
};
template<typename FS>
class FS_T<FastNoise::Divide, FS> : public virtual FastNoise::Divide, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return this->GetSourceValue( mLHS, seed, pos... ) / this->GetSourceValue( mRHS, seed, pos... );
}
};
template<typename FS>
class FS_T<FastNoise::PowFloat, FS> : public virtual FastNoise::PowFloat, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return FS_Pow_f32( this->GetSourceValue( mValue, seed, pos... ), this->GetSourceValue( mPow, seed, pos... ) );
}
};
template<typename FS>
class FS_T<FastNoise::PowInt, FS> : public virtual FastNoise::PowInt, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v value = this->GetSourceValue( mValue, seed, pos... );
float32v pow = value * value;
for( int32_t i = 2; i < mPow; i++ )
{
pow *= value;
}
return pow;
}
};
template<typename FS>
class FS_T<FastNoise::Min, FS> : public virtual FastNoise::Min, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return FS_Min_f32( this->GetSourceValue( mLHS, seed, pos... ), this->GetSourceValue( mRHS, seed, pos... ) );
}
};
template<typename FS>
class FS_T<FastNoise::Max, FS> : public virtual FastNoise::Max, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return FS_Max_f32( this->GetSourceValue( mLHS, seed, pos... ), this->GetSourceValue( mRHS, seed, pos... ) );
}
};
template<typename FS>
class FS_T<FastNoise::MinSmooth, FS> : public virtual FastNoise::MinSmooth, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v a = this->GetSourceValue( mLHS, seed, pos... );
float32v b = this->GetSourceValue( mRHS, seed, pos... );
float32v smoothness = FS_Max_f32( float32v( 1.175494351e-38f ), FS_Abs_f32( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
float32v h = FS_Max_f32( smoothness - FS_Abs_f32( a - b ), float32v( 0.0f ) );
h *= FS_Reciprocal_f32( smoothness );
return FS_FNMulAdd_f32( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS_Min_f32( a, b ) );
}
};
template<typename FS>
class FS_T<FastNoise::MaxSmooth, FS> : public virtual FastNoise::MaxSmooth, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v a = -this->GetSourceValue( mLHS, seed, pos... );
float32v b = -this->GetSourceValue( mRHS, seed, pos... );
float32v smoothness = FS_Max_f32( float32v( 1.175494351e-38f ), FS_Abs_f32( this->GetSourceValue( mSmoothness, seed, pos... ) ) );
float32v h = FS_Max_f32( smoothness - FS_Abs_f32( a - b ), float32v( 0.0f ) );
h *= FS_Reciprocal_f32( smoothness );
return -FS_FNMulAdd_f32( float32v( 1.0f / 6.0f ), h * h * h * smoothness, FS_Min_f32( a, b ) );
}
};
template<typename FS>
class FS_T<FastNoise::Fade, FS> : public virtual FastNoise::Fade, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v fade = FS_Abs_f32( this->GetSourceValue( mFade, seed, pos... ) );
return FS_FMulAdd_f32( this->GetSourceValue( mA, seed, pos... ), float32v( 1 ) - fade, this->GetSourceValue( mB, seed, pos... ) * fade );
}
};

View File

@ -0,0 +1,104 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
class Cellular : public virtual Generator
{
public:
void SetJitterModifier( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mJitterModifier, gen ); }
void SetJitterModifier( float value ) { mJitterModifier = value; }
void SetDistanceFunction( DistanceFunction value ) { mDistanceFunction = value; }
protected:
HybridSource mJitterModifier = 1.0f;
DistanceFunction mDistanceFunction = DistanceFunction::EuclideanSquared;
const float kJitter2D = 0.437015f;
const float kJitter3D = 0.396143f;
const float kJitter4D = 0.366025f;
FASTNOISE_METADATA_ABSTRACT( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Coherent Noise" );
this->AddHybridSource( "Jitter Modifier", 1.0f, &Cellular::SetJitterModifier, &Cellular::SetJitterModifier );
this->AddVariableEnum( "Distance Function", DistanceFunction::EuclideanSquared, &Cellular::SetDistanceFunction, "Euclidean", "Euclidean Squared", "Manhattan", "Hybrid" );
}
};
};
class CellularValue : public virtual Cellular
{
public:
void SetValueIndex( int value ) { mValueIndex = value; }
protected:
static const int kMaxDistanceCount = 4;
int mValueIndex = 0;
FASTNOISE_METADATA( Cellular )
Metadata( const char* className ) : Cellular::Metadata( className )
{
this->AddVariable( "Value Index", 0, &CellularValue::SetValueIndex, 0, kMaxDistanceCount - 1 );
}
};
};
class CellularDistance : public virtual Cellular
{
public:
enum class ReturnType
{
Index0,
Index0Add1,
Index0Sub1,
Index0Mul1,
Index0Div1
};
void SetDistanceIndex0( int value ) { mDistanceIndex0 = value; }
void SetDistanceIndex1( int value ) { mDistanceIndex1 = value; }
void SetReturnType( ReturnType value ) { mReturnType = value; }
protected:
static const int kMaxDistanceCount = 4;
ReturnType mReturnType = ReturnType::Index0;
int mDistanceIndex0 = 0;
int mDistanceIndex1 = 1;
FASTNOISE_METADATA( Cellular )
Metadata( const char* className ) : Cellular::Metadata( className )
{
this->AddVariable( "Distance Index 0", 0, &CellularDistance::SetDistanceIndex0, 0, kMaxDistanceCount - 1 );
this->AddVariable( "Distance Index 1", 1, &CellularDistance::SetDistanceIndex1, 0, kMaxDistanceCount - 1 );
this->AddVariableEnum( "Return Type", ReturnType::Index0, &CellularDistance::SetReturnType, "Index0", "Index0Add1", "Index0Sub1", "Index0Mul1", "Index0Div1" );
}
};
};
class CellularLookup : public virtual Cellular
{
public:
void SetLookup( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mLookup, gen ); }
void SetLookupFrequency( float freq ) { mLookupFreq = freq; }
protected:
GeneratorSource mLookup;
float mLookupFreq = 0.1f;
FASTNOISE_METADATA( Cellular )
Metadata( const char* className ) : Cellular::Metadata( className )
{
this->AddGeneratorSource( "Lookup", &CellularLookup::SetLookup );
this->AddVariable( "Lookup Frequency", 0.1f, &CellularLookup::SetLookupFrequency );
}
};
};
}

View File

@ -0,0 +1,655 @@
#include "FastSIMD/InlInclude.h"
#include <cfloat>
#include <array>
#include "Cellular.h"
#include "Utils.inl"
template<typename FS>
class FS_T<FastNoise::Cellular, FS> : public virtual FastNoise::Cellular, public FS_T<FastNoise::Generator, FS>
{
};
template<typename FS>
class FS_T<FastNoise::CellularValue, FS> : public virtual FastNoise::CellularValue, public FS_T<FastNoise::Cellular, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
float32v jitter = float32v( kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
std::array<float32v, kMaxDistanceCount> value;
std::array<float32v, kMaxDistanceCount> distance;
value.fill( float32v( INFINITY ) );
distance.fill( float32v( INFINITY ) );
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc );
float32v xd = FS_Converti32_f32( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, yd * yd ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS_Converti32_f32( hash );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
for( int i = 0; ; i++ )
{
mask32v closer = newDistance < distance[i];
float32v localDistance = distance[i];
float32v localCellValue = value[i];
distance[i] = FS_Select_f32( closer, newDistance, distance[i] );
value[i] = FS_Select_f32( closer, newCellValue, value[i] );
if( i > mValueIndex )
{
break;
}
newDistance = FS_Select_f32( closer, localDistance, newDistance );
newCellValue = FS_Select_f32( closer, localCellValue, newCellValue );
}
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return value[mValueIndex];
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
float32v jitter = float32v( kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
std::array<float32v, kMaxDistanceCount> value;
std::array<float32v, kMaxDistanceCount> distance;
value.fill( float32v( INFINITY ) );
distance.fill( float32v( INFINITY ) );
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
zcBase *= int32v( FnPrimes::Z );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
float32v zcf = zcfBase;
int32v zc = zcBase;
for( int zi = 0; zi < 3; zi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc );
float32v xd = FS_Converti32_f32( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v yd = FS_Converti32_f32( ( hash >> 10 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v zd = FS_Converti32_f32( ( hash >> 20 ) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, zd * zd ) ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
zd = FS_FMulAdd_f32( zd, invMag, zcf );
float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS_Converti32_f32( hash );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
for( int i = 0; ; i++ )
{
mask32v closer = newDistance < distance[i];
float32v localDistance = distance[i];
float32v localCellValue = value[i];
distance[i] = FS_Select_f32( closer, newDistance, distance[i] );
value[i] = FS_Select_f32( closer, newCellValue, value[i] );
if( i > mValueIndex )
{
break;
}
newDistance = FS_Select_f32( closer, localDistance, newDistance );
newCellValue = FS_Select_f32( closer, localCellValue, newCellValue );
}
zcf += float32v( 1 );
zc += int32v( FnPrimes::Z );
}
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return value[mValueIndex];
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z , float32v w ) const final
{
float32v jitter = float32v( kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
std::array<float32v, kMaxDistanceCount> value;
std::array<float32v, kMaxDistanceCount> distance;
value.fill( float32v( INFINITY ) );
distance.fill( float32v( INFINITY ) );
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
int32v wcBase = FS_Convertf32_i32( w ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
float32v wcfBase = FS_Converti32_f32( wcBase ) - w;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
zcBase *= int32v( FnPrimes::Z );
wcBase *= int32v( FnPrimes::W );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
float32v zcf = zcfBase;
int32v zc = zcBase;
for( int zi = 0; zi < 3; zi++ )
{
float32v wcf = wcfBase;
int32v wc = wcBase;
for( int wi = 0; wi < 3; wi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc, wc );
float32v xd = FS_Converti32_f32( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v zd = FS_Converti32_f32( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v wd = FS_Converti32_f32( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, FS_FMulAdd_f32( zd, zd, wd * wd ) ) ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
zd = FS_FMulAdd_f32( zd, invMag, zcf );
wd = FS_FMulAdd_f32( wd, invMag, wcf );
float32v newCellValue = float32v( (float)(1.0 / INT_MAX) ) * FS_Converti32_f32( hash );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
for( int i = 0; ; i++ )
{
mask32v closer = newDistance < distance[i];
float32v localDistance = distance[i];
float32v localCellValue = value[i];
distance[i] = FS_Select_f32( closer, newDistance, distance[i] );
value[i] = FS_Select_f32( closer, newCellValue, value[i] );
if( i > mValueIndex )
{
break;
}
newDistance = FS_Select_f32( closer, localDistance, newDistance );
newCellValue = FS_Select_f32( closer, localCellValue, newCellValue );
}
wcf += float32v( 1 );
wc += int32v( FnPrimes::W );
}
zcf += float32v( 1 );
zc += int32v( FnPrimes::Z );
}
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return value[mValueIndex];
}
};
template<typename FS>
class FS_T<FastNoise::CellularDistance, FS> : public virtual FastNoise::CellularDistance, public FS_T<FastNoise::Cellular, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
float32v jitter = float32v( kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
std::array<float32v, kMaxDistanceCount> distance;
distance.fill( float32v( INFINITY ) );
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for ( int yi = 0; yi < 3; yi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc );
float32v xd = FS_Converti32_f32( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, yd * yd ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
for( int i = kMaxDistanceCount - 1; i > 0; i-- )
{
distance[i] = FS_Max_f32( FS_Min_f32( distance[i], newDistance ), distance[i - 1] );
}
distance[0] = FS_Min_f32( distance[0], newDistance );
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return GetReturn( distance );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
float32v jitter = float32v( kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
std::array<float32v, kMaxDistanceCount> distance;
distance.fill( float32v( INFINITY ) );
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
zcBase *= int32v( FnPrimes::Z );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
float32v zcf = zcfBase;
int32v zc = zcBase;
for( int zi = 0; zi < 3; zi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc );
float32v xd = FS_Converti32_f32( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v zd = FS_Converti32_f32( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, zd * zd ) ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
zd = FS_FMulAdd_f32( zd, invMag, zcf );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
for( int i = kMaxDistanceCount - 1; i > 0; i-- )
{
distance[i] = FS_Max_f32( FS_Min_f32( distance[i], newDistance ), distance[i - 1] );
}
distance[0] = FS_Min_f32( distance[0], newDistance );
zcf += float32v( 1 );
zc += int32v( FnPrimes::Z );
}
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return GetReturn( distance );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
{
float32v jitter = float32v( kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
std::array<float32v, kMaxDistanceCount> distance;
distance.fill( float32v( INFINITY ) );
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
int32v wcBase = FS_Convertf32_i32( w ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
float32v wcfBase = FS_Converti32_f32( wcBase ) - w;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
zcBase *= int32v( FnPrimes::Z );
wcBase *= int32v( FnPrimes::W );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
float32v zcf = zcfBase;
int32v zc = zcBase;
for( int zi = 0; zi < 3; zi++ )
{
float32v wcf = wcfBase;
int32v wc = wcBase;
for( int wi = 0; wi < 3; wi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc, wc );
float32v xd = FS_Converti32_f32( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v zd = FS_Converti32_f32( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v wd = FS_Converti32_f32( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, FS_FMulAdd_f32( zd, zd, wd * wd ) ) ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
zd = FS_FMulAdd_f32( zd, invMag, zcf );
wd = FS_FMulAdd_f32( wd, invMag, wcf );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
for( int i = kMaxDistanceCount - 1; i > 0; i-- )
{
distance[i] = FS_Max_f32( FS_Min_f32( distance[i], newDistance ), distance[i - 1] );
}
distance[0] = FS_Min_f32( distance[0], newDistance );
wcf += float32v( 1 );
wc += int32v( FnPrimes::W );
}
zcf += float32v( 1 );
zc += int32v( FnPrimes::Z );
}
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return GetReturn( distance );
}
FS_INLINE float32v GetReturn( std::array<float32v, kMaxDistanceCount>& distance ) const
{
if( mDistanceFunction == FastNoise::DistanceFunction::Euclidean )
{
distance[mDistanceIndex0] *= FS_InvSqrt_f32( distance[mDistanceIndex0] );
distance[mDistanceIndex1] *= FS_InvSqrt_f32( distance[mDistanceIndex1] );
}
switch( mReturnType )
{
default:
case ReturnType::Index0:
{
return distance[mDistanceIndex0];
}
case ReturnType::Index0Add1:
{
return distance[mDistanceIndex0] + distance[mDistanceIndex1];
}
case ReturnType::Index0Sub1:
{
return distance[mDistanceIndex0] - distance[mDistanceIndex1];
}
case ReturnType::Index0Mul1:
{
return distance[mDistanceIndex0] * distance[mDistanceIndex1];
}
case ReturnType::Index0Div1:
{
return distance[mDistanceIndex0] * FS_Reciprocal_f32( distance[mDistanceIndex1] );
}
}
}
};
template<typename FS>
class FS_T<FastNoise::CellularLookup, FS> : public virtual FastNoise::CellularLookup, public FS_T<FastNoise::Cellular, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
float32v jitter = float32v( kJitter2D ) * this->GetSourceValue( mJitterModifier, seed, x, y );
float32v distance( FLT_MAX );
float32v cellX, cellY;
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc );
float32v xd = FS_Converti32_f32( hash & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 16) & int32v( 0xffff ) ) - float32v( 0xffff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, yd * yd ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd );
mask32v closer = newDistance < distance;
distance = FS_Min_f32( newDistance, distance );
cellX = FS_Select_f32( closer, xd + x, cellX );
cellY = FS_Select_f32( closer, yd + y, cellY );
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ) );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
float32v jitter = float32v( kJitter3D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z );
float32v distance( FLT_MAX );
float32v cellX, cellY, cellZ;
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
zcBase *= int32v( FnPrimes::Z );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
float32v zcf = zcfBase;
int32v zc = zcBase;
for( int zi = 0; zi < 3; zi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc );
float32v xd = FS_Converti32_f32( hash & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 10) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v zd = FS_Converti32_f32( (hash >> 20) & int32v( 0x3ff ) ) - float32v( 0x3ff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, zd * zd ) ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
zd = FS_FMulAdd_f32( zd, invMag, zcf );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd );
mask32v closer = newDistance < distance;
distance = FS_Min_f32( newDistance, distance );
cellX = FS_Select_f32( closer, xd + x, cellX );
cellY = FS_Select_f32( closer, yd + y, cellY );
cellZ = FS_Select_f32( closer, zd + z, cellZ );
zcf += float32v( 1 );
zc += int32v( FnPrimes::Z );
}
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ) );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
{
float32v jitter = float32v( kJitter4D ) * this->GetSourceValue( mJitterModifier, seed, x, y, z, w );
float32v distance( FLT_MAX );
float32v cellX, cellY, cellZ, cellW;
int32v xc = FS_Convertf32_i32( x ) + int32v( -1 );
int32v ycBase = FS_Convertf32_i32( y ) + int32v( -1 );
int32v zcBase = FS_Convertf32_i32( z ) + int32v( -1 );
int32v wcBase = FS_Convertf32_i32( w ) + int32v( -1 );
float32v xcf = FS_Converti32_f32( xc ) - x;
float32v ycfBase = FS_Converti32_f32( ycBase ) - y;
float32v zcfBase = FS_Converti32_f32( zcBase ) - z;
float32v wcfBase = FS_Converti32_f32( wcBase ) - w;
xc *= int32v( FnPrimes::X );
ycBase *= int32v( FnPrimes::Y );
zcBase *= int32v( FnPrimes::Z );
wcBase *= int32v( FnPrimes::W );
for( int xi = 0; xi < 3; xi++ )
{
float32v ycf = ycfBase;
int32v yc = ycBase;
for( int yi = 0; yi < 3; yi++ )
{
float32v zcf = zcfBase;
int32v zc = zcBase;
for( int zi = 0; zi < 3; zi++ )
{
float32v wcf = wcfBase;
int32v wc = wcBase;
for( int wi = 0; wi < 3; wi++ )
{
int32v hash = FnUtils::HashPrimesHB( seed, xc, yc, zc, wc );
float32v xd = FS_Converti32_f32( hash & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v yd = FS_Converti32_f32( (hash >> 8) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v zd = FS_Converti32_f32( (hash >> 16) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v wd = FS_Converti32_f32( (hash >> 24) & int32v( 0xff ) ) - float32v( 0xff / 2.0f );
float32v invMag = jitter * FS_InvSqrt_f32( FS_FMulAdd_f32( xd, xd, FS_FMulAdd_f32( yd, yd, FS_FMulAdd_f32( zd, zd, wd * wd ) ) ) );
xd = FS_FMulAdd_f32( xd, invMag, xcf );
yd = FS_FMulAdd_f32( yd, invMag, ycf );
zd = FS_FMulAdd_f32( zd, invMag, zcf );
wd = FS_FMulAdd_f32( wd, invMag, wcf );
float32v newDistance = FnUtils::CalcDistance( mDistanceFunction, xd, yd, zd, wd );
mask32v closer = newDistance < distance;
distance = FS_Min_f32( newDistance, distance );
cellX = FS_Select_f32( closer, xd + x, cellX );
cellY = FS_Select_f32( closer, yd + y, cellY );
cellZ = FS_Select_f32( closer, zd + z, cellZ );
cellW = FS_Select_f32( closer, wd + w, cellW );
wcf += float32v( 1 );
wc += int32v( FnPrimes::W );
}
zcf += float32v( 1 );
zc += int32v( FnPrimes::Z );
}
ycf += float32v( 1 );
yc += int32v( FnPrimes::Y );
}
xcf += float32v( 1 );
xc += int32v( FnPrimes::X );
}
return this->GetSourceValue( mLookup, seed - int32v( -1 ), cellX * float32v( mLookupFreq ), cellY * float32v( mLookupFreq ), cellZ * float32v( mLookupFreq ), cellW * float32v( mLookupFreq ) );
}
};

View File

@ -0,0 +1,37 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
class DomainWarp : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetWarpAmplitude( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mWarpAmplitude, gen ); }
void SetWarpAmplitude( float value ) { mWarpAmplitude = value; }
void SetWarpFrequency( float value ) { mWarpFrequency = value; }
protected:
GeneratorSource mSource;
HybridSource mWarpAmplitude = 1.0f;
float mWarpFrequency = 0.5f;
FASTNOISE_METADATA_ABSTRACT( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Domain Warp" );
this->AddGeneratorSource( "Source", &DomainWarp::SetSource );
this->AddHybridSource( "Warp Amplitude", 1.0f, &DomainWarp::SetWarpAmplitude, &DomainWarp::SetWarpAmplitude );
this->AddVariable( "Warp Frequency", 0.5f, &DomainWarp::SetWarpFrequency );
}
};
};
class DomainWarpGradient : public virtual DomainWarp
{
FASTNOISE_METADATA( DomainWarp )
using DomainWarp::Metadata::Metadata;
};
};
}

View File

@ -0,0 +1,181 @@
#include "FastSIMD/InlInclude.h"
#include "DomainWarp.h"
#include "Utils.inl"
template<typename FS>
class FS_T<FastNoise::DomainWarp, FS> : public virtual FastNoise::DomainWarp, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
Warp( seed, this->GetSourceValue( mWarpAmplitude, seed, pos... ), (pos * float32v( mWarpFrequency ))..., pos... );
return this->GetSourceValue( mSource, seed, pos...);
}
public:
float GetWarpFrequency() const { return mWarpFrequency; }
const FastNoise::HybridSource& GetWarpAmplitude() const { return mWarpAmplitude; }
const FastNoise::GeneratorSource& GetWarpSource() const { return mSource; }
virtual void FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const = 0;
virtual void FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const = 0;
virtual void FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const = 0;
};
template<typename FS>
class FS_T<FastNoise::DomainWarpGradient, FS> : public virtual FastNoise::DomainWarpGradient, public FS_T<FastNoise::DomainWarp, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
public:
void FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v& xOut, float32v& yOut ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
xs = FnUtils::InterpHermite( x - xs );
ys = FnUtils::InterpHermite( y - ys );
#define GRADIENT_COORD( _x, _y )\
int32v hash##_x##_y = FnUtils::HashPrimesHB(seed, x##_x, y##_y );\
float32v x##_x##_y = FS_Converti32_f32( hash##_x##_y & int32v( 0xffff ) );\
float32v y##_x##_y = FS_Converti32_f32( (hash##_x##_y >> 16) & int32v( 0xffff ) );
GRADIENT_COORD( 0, 0 );
GRADIENT_COORD( 1, 0 );
GRADIENT_COORD( 0, 1 );
GRADIENT_COORD( 1, 1 );
#undef GRADIENT_COORD
float32v normalise = warpAmp * float32v( 1.0f / (0xffff / 2.0f) );
xOut = FS_FMulAdd_f32( FnUtils::Lerp( FnUtils::Lerp( x00, x10, xs ), FnUtils::Lerp( x01, x11, xs ), ys ) - float32v( 0xffff / 2.0f ), normalise, xOut );
yOut = FS_FMulAdd_f32( FnUtils::Lerp( FnUtils::Lerp( y00, y10, xs ), FnUtils::Lerp( y01, y11, xs ), ys ) - float32v( 0xffff / 2.0f ), normalise, yOut );
}
void FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v& xOut, float32v& yOut, float32v& zOut ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
float32v zs = FS_Floor_f32( z );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
int32v z1 = z0 + int32v( FnPrimes::Z );
xs = FnUtils::InterpHermite( x - xs );
ys = FnUtils::InterpHermite( y - ys );
zs = FnUtils::InterpHermite( z - zs );
#define GRADIENT_COORD( _x, _y, _z )\
int32v hash##_x##_y##_z = FnUtils::HashPrimesHB( seed, x##_x, y##_y, z##_z );\
float32v x##_x##_y##_z = FS_Converti32_f32( hash##_x##_y##_z & int32v( 0x3ff ) );\
float32v y##_x##_y##_z = FS_Converti32_f32( (hash##_x##_y##_z >> 10) & int32v( 0x3ff ) );\
float32v z##_x##_y##_z = FS_Converti32_f32( (hash##_x##_y##_z >> 20) & int32v( 0x3ff ) );
GRADIENT_COORD( 0, 0, 0 );
GRADIENT_COORD( 1, 0, 0 );
GRADIENT_COORD( 0, 1, 0 );
GRADIENT_COORD( 1, 1, 0 );
GRADIENT_COORD( 0, 0, 1 );
GRADIENT_COORD( 1, 0, 1 );
GRADIENT_COORD( 0, 1, 1 );
GRADIENT_COORD( 1, 1, 1 );
#undef GRADIENT_COORD
float32v x0z = FnUtils::Lerp( FnUtils::Lerp( x000, x100, xs ), FnUtils::Lerp( x010, x110, xs ), ys );
float32v y0z = FnUtils::Lerp( FnUtils::Lerp( y000, y100, xs ), FnUtils::Lerp( y010, y110, xs ), ys );
float32v z0z = FnUtils::Lerp( FnUtils::Lerp( z000, z100, xs ), FnUtils::Lerp( z010, z110, xs ), ys );
float32v x1z = FnUtils::Lerp( FnUtils::Lerp( x001, x101, xs ), FnUtils::Lerp( x011, x111, xs ), ys );
float32v y1z = FnUtils::Lerp( FnUtils::Lerp( y001, y101, xs ), FnUtils::Lerp( y011, y111, xs ), ys );
float32v z1z = FnUtils::Lerp( FnUtils::Lerp( z001, z101, xs ), FnUtils::Lerp( z011, z111, xs ), ys );
float32v normalise = warpAmp * float32v( 1.0f / (0x3ff / 2.0f) );
xOut = FS_FMulAdd_f32( FnUtils::Lerp( x0z, x1z, zs ) - float32v( 0x3ff / 2.0f ), normalise, xOut );
yOut = FS_FMulAdd_f32( FnUtils::Lerp( y0z, y1z, zs ) - float32v( 0x3ff / 2.0f ), normalise, yOut );
zOut = FS_FMulAdd_f32( FnUtils::Lerp( z0z, z1z, zs ) - float32v( 0x3ff / 2.0f ), normalise, zOut );
}
void FS_VECTORCALL Warp( int32v seed, float32v warpAmp, float32v x, float32v y, float32v z, float32v w, float32v& xOut, float32v& yOut, float32v& zOut, float32v& wOut ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
float32v zs = FS_Floor_f32( z );
float32v ws = FS_Floor_f32( w );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
int32v w0 = FS_Convertf32_i32( ws ) * int32v( FnPrimes::W );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
int32v z1 = z0 + int32v( FnPrimes::Z );
int32v w1 = w0 + int32v( FnPrimes::W );
xs = FnUtils::InterpHermite( x - xs );
ys = FnUtils::InterpHermite( y - ys );
zs = FnUtils::InterpHermite( z - zs );
ws = FnUtils::InterpHermite( w - ws );
#define GRADIENT_COORD( _x, _y, _z, _w )\
int32v hash##_x##_y##_z##_w = FnUtils::HashPrimesHB( seed, x##_x, y##_y, z##_z, w##_w );\
float32v x##_x##_y##_z##_w = FS_Converti32_f32( hash##_x##_y##_z##_w & int32v( 0xff ) );\
float32v y##_x##_y##_z##_w = FS_Converti32_f32( (hash##_x##_y##_z##_w >> 8) & int32v( 0xff ) );\
float32v z##_x##_y##_z##_w = FS_Converti32_f32( (hash##_x##_y##_z##_w >> 16) & int32v( 0xff ) );\
float32v w##_x##_y##_z##_w = FS_Converti32_f32( (hash##_x##_y##_z##_w >> 24) & int32v( 0xff ) );
GRADIENT_COORD( 0, 0, 0, 0 );
GRADIENT_COORD( 1, 0, 0, 0 );
GRADIENT_COORD( 0, 1, 0, 0 );
GRADIENT_COORD( 1, 1, 0, 0 );
GRADIENT_COORD( 0, 0, 1, 0 );
GRADIENT_COORD( 1, 0, 1, 0 );
GRADIENT_COORD( 0, 1, 1, 0 );
GRADIENT_COORD( 1, 1, 1, 0 );
GRADIENT_COORD( 0, 0, 0, 1 );
GRADIENT_COORD( 1, 0, 0, 1 );
GRADIENT_COORD( 0, 1, 0, 1 );
GRADIENT_COORD( 1, 1, 0, 1 );
GRADIENT_COORD( 0, 0, 1, 1 );
GRADIENT_COORD( 1, 0, 1, 1 );
GRADIENT_COORD( 0, 1, 1, 1 );
GRADIENT_COORD( 1, 1, 1, 1 );
#undef GRADIENT_COORD
float32v x0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( x0000, x1000, xs ), FnUtils::Lerp( x0100, x1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( x0010, x1010, xs ), FnUtils::Lerp( x0110, x1110, xs ), ys ), zs );
float32v y0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( y0000, y1000, xs ), FnUtils::Lerp( y0100, y1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( y0010, y1010, xs ), FnUtils::Lerp( y0110, y1110, xs ), ys ), zs );
float32v z0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( z0000, z1000, xs ), FnUtils::Lerp( z0100, z1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( z0010, z1010, xs ), FnUtils::Lerp( z0110, z1110, xs ), ys ), zs );
float32v w0w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( w0000, w1000, xs ), FnUtils::Lerp( w0100, w1100, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( w0010, w1010, xs ), FnUtils::Lerp( w0110, w1110, xs ), ys ), zs );
float32v x1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( x0001, x1001, xs ), FnUtils::Lerp( x0101, x1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( x0011, x1011, xs ), FnUtils::Lerp( x0111, x1111, xs ), ys ), zs );
float32v y1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( y0001, y1001, xs ), FnUtils::Lerp( y0101, y1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( y0011, y1011, xs ), FnUtils::Lerp( y0111, y1111, xs ), ys ), zs );
float32v z1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( z0001, z1001, xs ), FnUtils::Lerp( z0101, z1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( z0011, z1011, xs ), FnUtils::Lerp( z0111, z1111, xs ), ys ), zs );
float32v w1w = FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp( w0001, w1001, xs ), FnUtils::Lerp( w0101, w1101, xs ), ys ), FnUtils::Lerp( FnUtils::Lerp( w0011, w1011, xs ), FnUtils::Lerp( w0111, w1111, xs ), ys ), zs );
float32v normalise = warpAmp * float32v( 1.0f / (0xff / 2.0f) );
xOut = FS_FMulAdd_f32( FnUtils::Lerp( x0w, x1w, ws ) - float32v( 0xff / 2.0f ), normalise, xOut );
yOut = FS_FMulAdd_f32( FnUtils::Lerp( y0w, y1w, ws ) - float32v( 0xff / 2.0f ), normalise, yOut );
zOut = FS_FMulAdd_f32( FnUtils::Lerp( z0w, z1w, ws ) - float32v( 0xff / 2.0f ), normalise, zOut );
wOut = FS_FMulAdd_f32( FnUtils::Lerp( w0w, w1w, ws ) - float32v( 0xff / 2.0f ), normalise, wOut );
}
};

View File

@ -0,0 +1,26 @@
#pragma once
#include "Fractal.h"
#include "DomainWarp.h"
namespace FastNoise
{
class DomainWarpFractalProgressive : public virtual Fractal<DomainWarp>
{
FASTNOISE_METADATA( Fractal<DomainWarp> )
Metadata( const char* className ) : Fractal<DomainWarp>::Metadata( className, "Domain Warp Source" )
{
groups.push_back( "Domain Warp" );
}
};
};
class DomainWarpFractalIndependant : public virtual Fractal<DomainWarp>
{
FASTNOISE_METADATA( Fractal<DomainWarp> )
Metadata( const char* className ) : Fractal<DomainWarp>::Metadata( className, "Domain Warp Source" )
{
groups.push_back( "Domain Warp" );
}
};
};
}

View File

@ -0,0 +1,71 @@
#include "FastSIMD/InlInclude.h"
#include "DomainWarpFractal.h"
template<typename FS>
class FS_T<FastNoise::DomainWarpFractalProgressive, FS> : public virtual FastNoise::DomainWarpFractalProgressive, public FS_T<FastNoise::Fractal<FastNoise::DomainWarp>, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
auto* warp = this->GetSourceSIMD( mSource );
float32v amp = float32v( mFractalBounding ) * this->GetSourceValue( warp->GetWarpAmplitude(), seed, pos... );
float32v freq = float32v( warp->GetWarpFrequency() );
int32v seedInc = seed;
float32v gain = this->GetSourceValue( mGain, seed, pos... );
float32v lacunarity( mLacunarity );
warp->Warp( seedInc, amp, (pos * freq)..., pos... );
for (int i = 1; i < mOctaves; i++)
{
seedInc -= int32v( -1 );
freq *= lacunarity;
amp *= gain;
warp->Warp( seedInc, amp, (pos * freq)..., pos... );
}
return this->GetSourceValue( warp->GetWarpSource(), seed, pos... );
}
};
template<typename FS>
class FS_T<FastNoise::DomainWarpFractalIndependant, FS> : public virtual FastNoise::DomainWarpFractalIndependant, public FS_T<FastNoise::Fractal<FastNoise::DomainWarp>, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return [this, seed] ( std::remove_reference_t<P>... noisePos, std::remove_reference_t<P>... warpPos )
{
auto* warp = this->GetSourceSIMD( mSource );
float32v amp = float32v( mFractalBounding ) * this->GetSourceValue( warp->GetWarpAmplitude(), seed, noisePos... );
float32v freq = float32v( warp->GetWarpFrequency() );
int32v seedInc = seed;
float32v gain = this->GetSourceValue( mGain, seed, noisePos... );
float32v lacunarity( mLacunarity );
warp->Warp( seedInc, amp, (noisePos * freq)..., warpPos... );
for( int i = 1; i < mOctaves; i++ )
{
seedInc -= int32v( -1 );
freq *= lacunarity;
amp *= gain;
warp->Warp( seedInc, amp, (noisePos * freq)..., warpPos... );
}
return this->GetSourceValue( warp->GetWarpSource(), seed, warpPos... );
} ( pos..., pos... );
}
};

View File

@ -0,0 +1,103 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
template<typename T = Generator>
class Fractal : public virtual Generator
{
public:
void SetSource( SmartNodeArg<T> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetGain( float value ) { mGain = value; CalculateFractalBounding(); }
void SetGain( SmartNodeArg<> gen ) { mGain = 1.0f; this->SetSourceMemberVariable( mGain, gen ); CalculateFractalBounding(); }
void SetOctaveCount( int32_t value ) { mOctaves = value; CalculateFractalBounding(); }
void SetLacunarity( float value ) { mLacunarity = value; }
protected:
GeneratorSourceT<T> mSource;
HybridSource mGain = 0.5f;
int32_t mOctaves = 3;
float mLacunarity = 2.0f;
float mFractalBounding = 1.0f / 1.75f;
virtual void CalculateFractalBounding()
{
float gain = std::abs( mGain.constant );
float amp = gain;
float ampFractal = 1.0f;
for( int32_t i = 1; i < mOctaves; i++ )
{
ampFractal += amp;
amp *= gain;
}
mFractalBounding = 1.0f / ampFractal;
}
FASTNOISE_METADATA_ABSTRACT( Generator )
Metadata( const char* className, const char* sourceName = "Source" ) : Generator::Metadata( className )
{
groups.push_back( "Fractal" );
this->AddGeneratorSource( sourceName, &Fractal::SetSource );
this->AddHybridSource( "Gain", 0.5f, &Fractal::SetGain, &Fractal::SetGain );
this->AddVariable( "Octaves", 3, &Fractal::SetOctaveCount, 2, 16 );
this->AddVariable( "Lacunarity", 2.0f, &Fractal::SetLacunarity );
}
};
};
class FractalFBm : public virtual Fractal<>
{
FASTNOISE_METADATA( Fractal )
using Fractal::Metadata::Metadata;
};
};
class FractalBillow : public virtual Fractal<>
{
FASTNOISE_METADATA( Fractal )
using Fractal::Metadata::Metadata;
};
};
class FractalRidged : public virtual Fractal<>
{
FASTNOISE_METADATA( Fractal )
using Fractal::Metadata::Metadata;
};
};
class FractalRidgedMulti : public virtual Fractal<>
{
public:
void SetWeightAmplitude( float value ) { mWeightAmp = value; CalculateFractalBounding(); }
protected:
float mWeightAmp = 2.0f;
float mWeightBounding = 2.0f / 1.75f;
void CalculateFractalBounding() override
{
Fractal::CalculateFractalBounding();
float weight = 1.0f;
float totalWeight = weight;
for( int32_t i = 1; i < mOctaves; i++ )
{
weight *= mWeightAmp;
totalWeight += 1.0f / weight;
}
mWeightBounding = 2.0f / totalWeight;
}
FASTNOISE_METADATA( Fractal )
Metadata( const char* className ) : Fractal::Metadata( className )
{
this->AddVariable( "Weight Amplitude", 2.0f, &FractalRidgedMulti::SetWeightAmplitude );
}
};
};
}

View File

@ -0,0 +1,128 @@
#include "FastSIMD/InlInclude.h"
#include "Fractal.h"
template<typename FS, typename T>
class FS_T<FastNoise::Fractal<T>, FS> : public virtual FastNoise::Fractal<T>, public FS_T<FastNoise::Generator, FS>
{
};
template<typename FS>
class FS_T<FastNoise::FractalFBm, FS> : public virtual FastNoise::FractalFBm, public FS_T<FastNoise::Fractal<>, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v gain = this->GetSourceValue( mGain , seed, pos... );
float32v sum = this->GetSourceValue( mSource, seed, pos... );
float32v lacunarity( mLacunarity );
float32v amp( 1 );
for( int i = 1; i < mOctaves; i++ )
{
seed -= int32v( -1 );
amp *= gain;
sum += this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) * amp;
}
return sum * float32v( mFractalBounding );
}
};
template<typename FS>
class FS_T<FastNoise::FractalBillow, FS> : public virtual FastNoise::FractalBillow, public FS_T<FastNoise::Fractal<>, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v sum = FS_Abs_f32( this->GetSourceValue( mSource, seed, pos... ) ) * float32v( 2 ) - float32v( 1 );
float32v gain = this->GetSourceValue( mGain, seed, pos... );
float32v lacunarity( mLacunarity );
float32v amp( 1 );
for( int i = 1; i < mOctaves; i++ )
{
seed -= int32v( -1 );
amp *= gain;
sum += (FS_Abs_f32(this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) ) * float32v( 2 ) - float32v( 1 )) * amp;
}
return sum * float32v( mFractalBounding );
}
};
template<typename FS>
class FS_T<FastNoise::FractalRidged, FS> : public virtual FastNoise::FractalRidged, public FS_T<FastNoise::Fractal<>, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT(int32v seed, P... pos) const
{
float32v sum = float32v( 1 ) - FS_Abs_f32( this->GetSourceValue( mSource, seed, pos... ) );
float32v gain = this->GetSourceValue( mGain, seed, pos... );
float32v lacunarity( mLacunarity );
float32v amp( 1 );
for( int i = 1; i < mOctaves; i++ )
{
seed -= int32v( -1 );
amp *= gain;
sum -= (float32v( 1 ) - FS_Abs_f32( this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ) )) * amp;
}
return sum;
}
};
template<typename FS>
class FS_T<FastNoise::FractalRidgedMulti, FS> : public virtual FastNoise::FractalRidgedMulti, public FS_T<FastNoise::Fractal<>, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v offset( 1 );
float32v sum = offset - FS_Abs_f32( this->GetSourceValue( mSource, seed, pos... ) );
float32v gain = this->GetSourceValue( mGain, seed, pos... ) * float32v( 6 );
float32v lacunarity( mLacunarity );
float32v amp = sum;
float32v weightAmp( mWeightAmp );
float32v weight = weightAmp;
float32v totalWeight( 1.0f );
for( int i = 1; i < mOctaves; i++ )
{
amp *= gain;
amp = FS_Min_f32( FS_Max_f32( amp, float32v( 0 ) ), float32v( 1 ) );
seed -= int32v( -1 );
float32v value = offset - FS_Abs_f32( this->GetSourceValue( mSource, seed, (pos *= lacunarity)... ));
value *= amp;
amp = value;
float32v weightRecip = FS_Reciprocal_f32( float32v( weight ) );
sum += value * weightRecip;
totalWeight += weightRecip;
weight *= weightAmp;
}
return sum * float32v( mWeightBounding ) - offset;
}
};

View File

@ -0,0 +1,149 @@
#pragma once
#include <cassert>
#include <cmath>
#include "FastNoise/FastNoiseMetadata.h"
namespace FastNoise
{
enum class Dim
{
X, Y, Z, W,
Count
};
enum class DistanceFunction
{
Euclidean,
EuclideanSquared,
Manhattan,
Hybrid,
};
struct OutputMinMax
{
float min = INFINITY;
float max = -INFINITY;
OutputMinMax& operator <<( float v )
{
min = fminf( min, v );
max = fmaxf( max, v );
return *this;
}
OutputMinMax& operator <<( const OutputMinMax& v )
{
min = fminf( min, v.min );
max = fmaxf( max, v.max );
return *this;
}
};
template<typename T>
struct BaseSource
{
using Type = T;
SmartNode<T> base;
void* simdGeneratorPtr = nullptr;
protected:
BaseSource() = default;
};
template<typename T>
struct GeneratorSourceT : BaseSource<T>
{ };
template<typename T>
struct HybridSourceT : BaseSource<T>
{
float constant;
HybridSourceT( float f = 0.0f )
{
constant = f;
}
};
class Generator
{
public:
using Metadata = FastNoise::Metadata;
friend Metadata;
virtual ~Generator() = default;
virtual FastSIMD::eLevel GetSIMDLevel() const = 0;
virtual const Metadata* GetMetadata() const = 0;
virtual OutputMinMax GenUniformGrid2D( float* noiseOut,
int32_t xStart, int32_t yStart,
int32_t xSize, int32_t ySize,
float frequency, int32_t seed ) const = 0;
virtual OutputMinMax GenUniformGrid3D( float* noiseOut,
int32_t xStart, int32_t yStart, int32_t zStart,
int32_t xSize, int32_t ySize, int32_t zSize,
float frequency, int32_t seed ) const = 0;
virtual OutputMinMax GenPositionArray2D( float* noiseOut, int32_t count,
const float* xPosArray, const float* yPosArray,
float xOffset, float yOffset, int32_t seed ) const = 0;
virtual OutputMinMax GenPositionArray3D( float* noiseOut, int32_t count,
const float* xPosArray, const float* yPosArray, const float* zPosArray,
float xOffset, float yOffset, float zOffset, int32_t seed ) const = 0;
virtual OutputMinMax GenTileable2D( float* noiseOut,
int32_t xSize, int32_t ySize,
float frequency, int32_t seed ) const = 0;
protected:
template<typename T>
void SetSourceMemberVariable( BaseSource<T>& memberVariable, SmartNodeArg<T> gen )
{
static_assert( std::is_base_of_v<Generator, T> );
assert( gen.get() );
assert( GetSIMDLevel() == gen->GetSIMDLevel() ); // Ensure that all SIMD levels match
memberVariable.base = gen;
SetSourceSIMDPtr( dynamic_cast<Generator*>( gen.get() ), &memberVariable.simdGeneratorPtr );
}
private:
virtual void SetSourceSIMDPtr( Generator* base, void** simdPtr ) = 0;
};
using GeneratorSource = GeneratorSourceT<Generator>;
using HybridSource = HybridSourceT<Generator>;
template<typename T>
struct PerDimensionVariable
{
using Type = T;
T varArray[(int)Dim::Count];
template<typename U = T>
PerDimensionVariable( U value = 0 )
{
for( T& element : varArray )
{
element = value;
}
}
T& operator[]( size_t i )
{
return varArray[i];
}
const T& operator[]( size_t i ) const
{
return varArray[i];
}
};
}

View File

@ -0,0 +1,343 @@
#include <cassert>
#include <cstring>
#include "FastSIMD/InlInclude.h"
#include "Generator.h"
#ifdef FS_SIMD_CLASS
#pragma warning( disable:4250 )
#endif
template<typename FS>
class FS_T<FastNoise::Generator, FS> : public virtual FastNoise::Generator
{
FASTSIMD_DECLARE_FS_TYPES;
public:
virtual float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const = 0;
virtual float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const = 0;
virtual float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const { return Gen( seed, x, y, z ); };
#define FASTNOISE_IMPL_GEN_T\
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const override { return GenT( seed, x, y ); }\
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const override { return GenT( seed, x, y, z ); }\
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const override { return GenT( seed, x, y, z, w ); }
FastSIMD::eLevel GetSIMDLevel() const final
{
return FS::SIMD_Level;
}
using VoidPtrStorageType = FS_T<Generator, FS>*;
void SetSourceSIMDPtr( Generator* base, void** simdPtr ) final
{
auto simd = dynamic_cast<VoidPtrStorageType>( base );
assert( simd );
*simdPtr = reinterpret_cast<void*>( simd );
}
template<typename T, typename... POS>
FS_INLINE float32v FS_VECTORCALL GetSourceValue( const FastNoise::HybridSourceT<T>& memberVariable, int32v seed, POS... pos ) const
{
if( memberVariable.simdGeneratorPtr )
{
auto simdGen = reinterpret_cast<VoidPtrStorageType>( memberVariable.simdGeneratorPtr );
return simdGen->Gen( seed, pos... );
}
return float32v( memberVariable.constant );
}
template<typename T, typename... POS>
FS_INLINE float32v FS_VECTORCALL GetSourceValue( const FastNoise::GeneratorSourceT<T>& memberVariable, int32v seed, POS... pos ) const
{
assert( memberVariable.simdGeneratorPtr );
auto simdGen = reinterpret_cast<VoidPtrStorageType>( memberVariable.simdGeneratorPtr );
return simdGen->Gen( seed, pos... );
}
template<typename T>
FS_INLINE const FS_T<T, FS>* GetSourceSIMD( const FastNoise::GeneratorSourceT<T>& memberVariable ) const
{
assert( memberVariable.simdGeneratorPtr );
auto simdGen = reinterpret_cast<VoidPtrStorageType>( memberVariable.simdGeneratorPtr );
auto simdT = static_cast<FS_T<T, FS>*>( simdGen );
return simdT;
}
FastNoise::OutputMinMax GenUniformGrid2D( float* noiseOut, int32_t xStart, int32_t yStart, int32_t xSize, int32_t ySize, float frequency, int32_t seed ) const final
{
assert( xSize >= (int32_t)FS_Size_32() );
float32v min( INFINITY );
float32v max( -INFINITY );
int32v xIdx( xStart );
int32v yIdx( yStart );
float32v freqV( frequency );
int32v xSizeV( xSize );
int32v xMax = xSizeV + xIdx + int32v( -1 );
size_t totalValues = xSize * ySize;
size_t index = 0;
xIdx += int32v::FS_Incremented();
while( index < totalValues - FS_Size_32() )
{
float32v xPos = FS_Converti32_f32( xIdx ) * freqV;
float32v yPos = FS_Converti32_f32( yIdx ) * freqV;
float32v gen = Gen( int32v( seed ), xPos, yPos );
FS_Store_f32( &noiseOut[index], gen );
#if FASTNOISE_CALC_MIN_MAX
min = FS_Min_f32( min, gen );
max = FS_Max_f32( max, gen );
#endif
index += FS_Size_32();
xIdx += int32v( FS_Size_32() );
mask32v xReset = xIdx > xMax;
yIdx = FS_MaskedIncrement_i32( yIdx, xReset );
xIdx = FS_MaskedSub_i32( xIdx, xSizeV, xReset );
}
float32v xPos = FS_Converti32_f32( xIdx ) * freqV;
float32v yPos = FS_Converti32_f32( yIdx ) * freqV;
float32v gen = Gen( int32v( seed ), xPos, yPos );
return DoRemaining( noiseOut, totalValues, index, min, max, gen );
}
FastNoise::OutputMinMax GenUniformGrid3D( float* noiseOut, int32_t xStart, int32_t yStart, int32_t zStart, int32_t xSize, int32_t ySize, int32_t zSize, float frequency, int32_t seed ) const final
{
assert( xSize >= (int32_t)FS_Size_32() );
float32v min( INFINITY );
float32v max( -INFINITY );
int32v xIdx( xStart );
int32v yIdx( yStart );
int32v zIdx( zStart );
float32v freqV( frequency );
int32v xSizeV( xSize );
int32v xMax = xSizeV + xIdx + int32v( -1 );
int32v ySizeV( ySize );
int32v yMax = ySizeV + yIdx + int32v( -1 );
size_t totalValues = xSize * ySize * zSize;
size_t index = 0;
xIdx += int32v::FS_Incremented();
while( index < totalValues - FS_Size_32() )
{
float32v xPos = FS_Converti32_f32( xIdx ) * freqV;
float32v yPos = FS_Converti32_f32( yIdx ) * freqV;
float32v zPos = FS_Converti32_f32( zIdx ) * freqV;
float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
FS_Store_f32( &noiseOut[index], gen );
#if FASTNOISE_CALC_MIN_MAX
min = FS_Min_f32( min, gen );
max = FS_Max_f32( max, gen );
#endif
index += FS_Size_32();
xIdx += int32v( FS_Size_32() );
mask32v xReset = xIdx > xMax;
yIdx = FS_MaskedIncrement_i32( yIdx, xReset );
xIdx = FS_MaskedSub_i32( xIdx, xSizeV, xReset );
mask32v yReset = yIdx > yMax;
zIdx = FS_MaskedIncrement_i32( zIdx, yReset );
yIdx = FS_MaskedSub_i32( yIdx, ySizeV, yReset );
}
float32v xPos = FS_Converti32_f32( xIdx ) * freqV;
float32v yPos = FS_Converti32_f32( yIdx ) * freqV;
float32v zPos = FS_Converti32_f32( zIdx ) * freqV;
float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
return DoRemaining( noiseOut, totalValues, index, min, max, gen );
}
FastNoise::OutputMinMax GenPositionArray2D( float* noiseOut, int32_t count, const float* xPosArray, const float* yPosArray, float xOffset, float yOffset, int32_t seed ) const final
{
float32v min( INFINITY );
float32v max( -INFINITY );
size_t index = 0;
while( index < count - FS_Size_32() )
{
float32v xPos = float32v( xOffset ) + FS_Load_f32( &xPosArray[index] );
float32v yPos = float32v( yOffset ) + FS_Load_f32( &yPosArray[index] );
float32v gen = Gen( int32v( seed ), xPos, yPos );
FS_Store_f32( &noiseOut[index], gen );
#if FASTNOISE_CALC_MIN_MAX
min = FS_Min_f32( min, gen );
max = FS_Max_f32( max, gen );
#endif
index += FS_Size_32();
}
float32v xPos = float32v( xOffset ) + FS_Load_f32( &xPosArray[index] );
float32v yPos = float32v( yOffset ) + FS_Load_f32( &yPosArray[index] );
float32v gen = Gen( int32v( seed ), xPos, yPos );
return DoRemaining( noiseOut, count, index, min, max, gen );
}
FastNoise::OutputMinMax GenPositionArray3D( float* noiseOut, int32_t count, const float* xPosArray, const float* yPosArray, const float* zPosArray, float xOffset, float yOffset, float zOffset, int32_t seed ) const final
{
float32v min( INFINITY );
float32v max( -INFINITY );
int32_t index = 0;
while( index < int64_t(count) - FS_Size_32() )
{
float32v xPos = float32v( xOffset ) + FS_Load_f32( &xPosArray[index] );
float32v yPos = float32v( yOffset ) + FS_Load_f32( &yPosArray[index] );
float32v zPos = float32v( zOffset ) + FS_Load_f32( &zPosArray[index] );
float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
FS_Store_f32( &noiseOut[index], gen );
#if FASTNOISE_CALC_MIN_MAX
min = FS_Min_f32( min, gen );
max = FS_Max_f32( max, gen );
#endif
index += FS_Size_32();
}
float32v xPos = float32v( xOffset ) + FS_Load_f32( &xPosArray[index] );
float32v yPos = float32v( yOffset ) + FS_Load_f32( &yPosArray[index] );
float32v zPos = float32v( zOffset ) + FS_Load_f32( &zPosArray[index] );
float32v gen = Gen( int32v( seed ), xPos, yPos, zPos );
return DoRemaining( noiseOut, count, index, min, max, gen );
}
FastNoise::OutputMinMax GenTileable2D( float* noiseOut, int32_t xSize, int32_t ySize, float frequency, int32_t seed ) const final
{
assert( xSize >= (int32_t)FS_Size_32() );
float32v min( INFINITY );
float32v max( -INFINITY );
int32v xIdx( 0 );
int32v yIdx( 0 );
int32v xSizeV( xSize );
int32v ySizeV( ySize );
int32v xMax = xSizeV + xIdx + int32v( -1 );
size_t totalValues = xSize * ySize;
size_t index = 0;
float pi2Recip( 0.15915493667f );
float xSizePi = (float)xSize * pi2Recip;
float ySizePi = (float)ySize * pi2Recip;
float32v xFreq = float32v( frequency * xSizePi );
float32v yFreq = float32v( frequency * ySizePi );
float32v xMul = float32v( 1 / xSizePi );
float32v yMul = float32v( 1 / ySizePi );
xIdx += int32v::FS_Incremented();
while( index < totalValues - FS_Size_32() )
{
float32v xF = FS_Converti32_f32( xIdx ) * xMul;
float32v yF = FS_Converti32_f32( yIdx ) * yMul;
float32v xPos = FS_Cos_f32( xF ) * xFreq;
float32v yPos = FS_Cos_f32( yF ) * yFreq;
float32v zPos = FS_Sin_f32( xF ) * xFreq;
float32v wPos = FS_Sin_f32( yF ) * yFreq;
float32v gen = Gen( int32v( seed ), xPos, yPos, zPos, wPos );
FS_Store_f32( &noiseOut[index], gen );
#if FASTNOISE_CALC_MIN_MAX
min = FS_Min_f32( min, gen );
max = FS_Max_f32( max, gen );
#endif
index += FS_Size_32();
xIdx += int32v( FS_Size_32() );
mask32v xReset = xIdx > xMax;
yIdx = FS_MaskedIncrement_i32( yIdx, xReset );
xIdx = FS_MaskedSub_i32( xIdx, xSizeV, xReset );
}
float32v xF = FS_Converti32_f32( xIdx ) * xMul;
float32v yF = FS_Converti32_f32( yIdx ) * yMul;
float32v xPos = FS_Cos_f32( xF ) * xFreq;
float32v yPos = FS_Cos_f32( yF ) * yFreq;
float32v zPos = FS_Sin_f32( xF ) * xFreq;
float32v wPos = FS_Sin_f32( yF ) * yFreq;
float32v gen = Gen( int32v( seed ), xPos, yPos, zPos, wPos );
return DoRemaining( noiseOut, totalValues, index, min, max, gen );
}
private:
static FS_INLINE FastNoise::OutputMinMax DoRemaining( float* noiseOut, size_t totalValues, size_t index, float32v min, float32v max, float32v finalGen )
{
FastNoise::OutputMinMax minMax;
size_t remaining = totalValues - index;
if( remaining == FS_Size_32() )
{
FS_Store_f32( &noiseOut[index], finalGen );
#if FASTNOISE_CALC_MIN_MAX
min = FS_Min_f32( min, finalGen );
max = FS_Max_f32( max, finalGen );
#endif
}
else
{
std::memcpy( &noiseOut[index], &finalGen, remaining * sizeof( int32_t ) );
#if FASTNOISE_CALC_MIN_MAX
do
{
minMax << noiseOut[index];
}
while( ++index < totalValues );
#endif
}
#if FASTNOISE_CALC_MIN_MAX
float* minP = reinterpret_cast<float*>(&min);
float* maxP = reinterpret_cast<float*>(&max);
for( size_t i = 0; i < FS_Size_32(); i++ )
{
minMax << FastNoise::OutputMinMax{ minP[i], maxP[i] };
}
#endif
return minMax;
}
};

View File

@ -0,0 +1,321 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
class DomainScale : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetScale( float value ) { mScale = value; }
protected:
GeneratorSource mSource;
float mScale = 1.0f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &DomainScale::SetSource );
this->AddVariable( "Scale", 1.0f, &DomainScale::SetScale );
}
};
};
class DomainOffset : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
template<Dim D>
void SetOffset( float value ) { mOffset[(int)D] = value; }
template<Dim D>
void SetOffset( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mOffset[(int)D], gen ); }
protected:
GeneratorSource mSource;
PerDimensionVariable<HybridSource> mOffset;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &DomainOffset::SetSource );
this->AddPerDimensionHybridSource( "Offset", 0.0f, []( DomainOffset* p ) { return std::ref( p->mOffset ); } );
}
};
};
class DomainRotate : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetYaw( float value ) { mYawCos = cosf( value ); mYawSin = sinf( value ); CalculateRotation(); }
void SetPitch( float value ) { mPitchCos = cosf( value ); mPitchSin = sinf( value ); CalculateRotation(); }
void SetRoll( float value ) { mRollCos = cosf( value ); mRollSin = sinf( value ); CalculateRotation(); }
protected:
GeneratorSource mSource;
float mYawCos = 1.0f;
float mYawSin = 0.0f;
float mPitchCos = 1.0f;
float mPitchSin = 0.0f;
float mRollCos = 1.0f;
float mRollSin = 0.0f;
float mXa = 1.0f;
float mXb = 0.0f;
float mXc = 0.0f;
float mYa = 0.0f;
float mYb = 1.0f;
float mYc = 0.0f;
float mZa = 0.0f;
float mZb = 0.0f;
float mZc = 1.0f;
void CalculateRotation()
{
mXa = mYawCos * mPitchCos;
mXb = mYawCos * mPitchSin * mRollSin - mYawSin * mRollCos;
mXc = mYawCos * mPitchSin * mRollCos + mYawSin * mRollSin;
mYa = mYawSin * mPitchCos;
mYb = mYawSin * mPitchSin * mRollSin + mYawCos * mRollCos;
mYc = mYawSin * mPitchSin * mRollCos - mYawCos * mRollSin;
mZa = -mPitchSin;
mZb = mPitchCos * mRollSin;
mZc = mPitchCos * mRollCos;
}
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &DomainRotate::SetSource );
this->AddVariable( "Yaw", 0.0f, &DomainRotate::SetYaw );
this->AddVariable( "Pitch", 0.0f, &DomainRotate::SetPitch );
this->AddVariable( "Roll", 0.0f, &DomainRotate::SetRoll );
}
};
};
class SeedOffset : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetOffset( int32_t value ) { mOffset = value; }
protected:
GeneratorSource mSource;
int32_t mOffset = 1;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &SeedOffset::SetSource );
this->AddVariable( "Seed Offset", 1, &SeedOffset::SetOffset );
}
};
};
class Remap : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetRemap( float fromMin, float fromMax, float toMin, float toMax ) { mFromMin = fromMin; mFromMax = fromMax; mToMin = toMin; mToMax = toMax; }
protected:
GeneratorSource mSource;
float mFromMin = -1.0f;
float mFromMax = 1.0f;
float mToMin = 0.0f;
float mToMax = 1.0f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &Remap::SetSource );
this->AddVariable( "From Min", -1.0f,
[]( Remap* p, float f )
{
p->mFromMin = f;
});
this->AddVariable( "From Max", 1.0f,
[]( Remap* p, float f )
{
p->mFromMax = f;
});
this->AddVariable( "To Min", 0.0f,
[]( Remap* p, float f )
{
p->mToMin = f;
});
this->AddVariable( "To Max", 1.0f,
[]( Remap* p, float f )
{
p->mToMax = f;
});
}
};
};
class ConvertRGBA8 : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetMinMax( float min, float max ) { mMin = min; mMax = max; }
protected:
GeneratorSource mSource;
float mMin = -1.0f;
float mMax = 1.0f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &ConvertRGBA8::SetSource );
this->AddVariable( "Min", -1.0f,
[]( ConvertRGBA8* p, float f )
{
p->mMin = f;
});
this->AddVariable( "Max", 1.0f,
[]( ConvertRGBA8* p, float f )
{
p->mMax = f;
});
}
};
};
class Terrace : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetMultiplier( float multiplier ) { mMultiplier = multiplier; mMultiplierRecip = 1 / multiplier; }
void SetSmoothness( float smoothness ) { mSmoothness = smoothness; if( mSmoothness != 0.0f ) mSmoothnessRecip = 1 + 1 / smoothness; }
protected:
GeneratorSource mSource;
float mMultiplier = 1.0f;
float mMultiplierRecip = 1.0f;
float mSmoothness = 0.0f;
float mSmoothnessRecip = 0.0f;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &Terrace::SetSource );
this->AddVariable( "Multiplier", 1.0f, &Terrace::SetMultiplier );
this->AddVariable( "Smoothness", 0.0f, &Terrace::SetSmoothness );
}
};
};
class DomainAxisScale : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
template<Dim D>
void SetScale( float value ) { mScale[(int)D] = value; }
protected:
GeneratorSource mSource;
PerDimensionVariable<float> mScale;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &DomainAxisScale::SetSource );
this->AddPerDimensionVariable( "Scale", 1.0f, []( DomainAxisScale* p ) { return std::ref( p->mScale ); } );
}
};
};
class AddDimension : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetNewDimensionPosition( float value ) { mNewDimensionPosition = value; }
void SetNewDimensionPosition( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mNewDimensionPosition, gen ); }
protected:
GeneratorSource mSource;
HybridSource mNewDimensionPosition;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &AddDimension::SetSource );
this->AddHybridSource( "New Dimension Position", 0.0f, &AddDimension::SetNewDimensionPosition, &AddDimension::SetNewDimensionPosition );
}
};
};
class RemoveDimension : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
void SetRemoveDimension( Dim dimension ) { mRemoveDimension = dimension; }
protected:
GeneratorSource mSource;
Dim mRemoveDimension = Dim::Y;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &RemoveDimension::SetSource );
this->AddVariableEnum( "Remove Dimension", Dim::Y, &RemoveDimension::SetRemoveDimension, "X", "Y", "Z", "W" );
}
};
};
class GeneratorCache : public virtual Generator
{
public:
void SetSource( SmartNodeArg<> gen ) { this->SetSourceMemberVariable( mSource, gen ); }
protected:
GeneratorSource mSource;
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Modifiers" );
this->AddGeneratorSource( "Source", &GeneratorCache::SetSource );
}
};
};
}

View File

@ -0,0 +1,277 @@
#include "FastSIMD/InlInclude.h"
#include "Modifiers.h"
template<typename FS>
class FS_T<FastNoise::DomainScale, FS> : public virtual FastNoise::DomainScale, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return this->GetSourceValue( mSource, seed, (pos * float32v( mScale ))... );
}
};
template<typename FS>
class FS_T<FastNoise::DomainOffset, FS> : public virtual FastNoise::DomainOffset, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return [this, seed]( std::remove_reference_t<P>... sourcePos, std::remove_reference_t<P>... offset )
{
size_t idx = 0;
((offset += this->GetSourceValue( mOffset[idx++], seed, sourcePos... )), ...);
return this->GetSourceValue( mSource, seed, offset... );
} (pos..., pos...);
}
};
template<typename FS>
class FS_T<FastNoise::DomainRotate, FS> : public virtual FastNoise::DomainRotate, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
if( mPitchSin == 0.0f && mRollSin == 0.0f )
{
return this->GetSourceValue( mSource, seed,
FS_FNMulAdd_f32( y, float32v( mYawSin ), x * float32v( mYawCos ) ),
FS_FMulAdd_f32( x, float32v( mYawSin ), y * float32v( mYawCos ) ) );
}
return Gen( seed, x, y, float32v( 0 ) );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
return this->GetSourceValue( mSource, seed,
FS_FMulAdd_f32( x, float32v( mXa ), FS_FMulAdd_f32( y, float32v( mXb ), z * float32v( mXc ) ) ),
FS_FMulAdd_f32( x, float32v( mYa ), FS_FMulAdd_f32( y, float32v( mYb ), z * float32v( mYc ) ) ),
FS_FMulAdd_f32( x, float32v( mZa ), FS_FMulAdd_f32( y, float32v( mZb ), z * float32v( mZc ) ) ) );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
{
// No rotation for 4D yet
return this->GetSourceValue( mSource, seed, x, y, z, w );
}
};
template<typename FS>
class FS_T<FastNoise::SeedOffset, FS> : public virtual FastNoise::SeedOffset, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
return this->GetSourceValue( mSource, seed + int32v( mOffset ), pos... );
}
};
template<typename FS>
class FS_T<FastNoise::Remap, FS> : public virtual FastNoise::Remap, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v source = this->GetSourceValue( mSource, seed, pos... );
return float32v( mToMin ) + (( source - float32v( mFromMin ) ) / float32v( mFromMax - mFromMin ) * float32v( mToMax - mToMin ));
}
};
template<typename FS>
class FS_T<FastNoise::ConvertRGBA8, FS> : public virtual FastNoise::ConvertRGBA8, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v source = this->GetSourceValue( mSource, seed, pos... );
source = FS_Min_f32( source, float32v( mMax ));
source = FS_Max_f32( source, float32v( mMin ));
source -= float32v( mMin );
source *= float32v( 255.0f / (mMax - mMin) );
int32v byteVal = FS_Convertf32_i32( source );
int32v output = int32v( 255 << 24 );
output |= byteVal;
output |= byteVal << 8;
output |= byteVal << 16;
return FS_Casti32_f32( output );
}
};
template<typename FS>
class FS_T<FastNoise::Terrace, FS> : public virtual FastNoise::Terrace, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
float32v source = this->GetSourceValue( mSource, seed, pos... );
source *= float32v( mMultiplier );
float32v rounded = FS_Round_f32( source );
if( mSmoothness != 0.0f )
{
float32v diff = rounded - source;
mask32v diffSign = diff < float32v( 0 );
diff = FS_Abs_f32( diff );
diff = float32v( 0.5f ) - diff;
diff *= float32v( mSmoothnessRecip );
diff = FS_Min_f32( diff, float32v( 0.5f ) );
diff = FS_Select_f32( diffSign, float32v( 0.5f ) - diff, diff - float32v( 0.5f ) );
rounded += diff;
}
return rounded * float32v( mMultiplierRecip );
}
};
template<typename FS>
class FS_T<FastNoise::DomainAxisScale, FS> : public virtual FastNoise::DomainAxisScale, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
size_t idx = 0;
((pos *= float32v( mScale[idx++] )), ...);
return this->GetSourceValue( mSource, seed, pos... );
}
};
template<typename FS>
class FS_T<FastNoise::AddDimension, FS> : public virtual FastNoise::AddDimension, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
if constexpr( sizeof...(P) == (size_t)FastNoise::Dim::Count )
{
return this->GetSourceValue( mSource, seed, pos... );
}
else
{
return this->GetSourceValue( mSource, seed, pos..., this->GetSourceValue( mNewDimensionPosition, seed, pos... ) );
}
}
};
template<typename FS>
class FS_T<FastNoise::RemoveDimension, FS> : public virtual FastNoise::RemoveDimension, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
return this->GetSourceValue( mSource, seed, x, y );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
switch( mRemoveDimension )
{
case FastNoise::Dim::X:
return this->GetSourceValue( mSource, seed, y, z );
case FastNoise::Dim::Y:
return this->GetSourceValue( mSource, seed, x, z );
case FastNoise::Dim::Z:
return this->GetSourceValue( mSource, seed, x, y );
default:
return this->GetSourceValue( mSource, seed, x, y, z );
}
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
{
switch( mRemoveDimension )
{
case FastNoise::Dim::X:
return this->GetSourceValue( mSource, seed, y, z, w );
case FastNoise::Dim::Y:
return this->GetSourceValue( mSource, seed, x, z, w );
case FastNoise::Dim::Z:
return this->GetSourceValue( mSource, seed, x, y, w );
case FastNoise::Dim::W:
return this->GetSourceValue( mSource, seed, x, y, z );
default:
return this->GetSourceValue( mSource, seed, x, y, z, w );
}
}
};
template<typename FS>
class FS_T<FastNoise::GeneratorCache, FS> : public virtual FastNoise::GeneratorCache, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
FASTNOISE_IMPL_GEN_T;
template<typename... P>
FS_INLINE float32v GenT( int32v seed, P... pos ) const
{
thread_local static void* CachedGenerator = nullptr;
thread_local static float32v CachedValue;
thread_local static float32v CachedPos[sizeof...( P )];
// TLS is not always aligned, so use FS_Load/FS_Store to access SIMD types
float32v arrayPos[] = { pos... };
bool isSame = (CachedGenerator == mSource.simdGeneratorPtr);
for( size_t i = 0; i < sizeof...( P ); i++ )
{
isSame &= !FS_AnyMask_bool( arrayPos[i] != FS_Load_f32( &CachedPos[i] ) );
}
if( !isSame )
{
CachedGenerator = mSource.simdGeneratorPtr;
float32v value = this->GetSourceValue( mSource, seed, pos... );
FS_Store_f32( &CachedValue, value );
for( size_t i = 0; i < sizeof...(P); i++ )
{
FS_Store_f32( &CachedPos[i], arrayPos[i] );
}
return value;
}
return FS_Load_f32( &CachedValue );
}
};

View File

@ -0,0 +1,16 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
class Perlin : public virtual Generator
{
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Coherent Noise" );
}
};
};
}

View File

@ -0,0 +1,109 @@
#include "FastSIMD/InlInclude.h"
#include "Perlin.h"
#include "Utils.inl"
template<typename FS>
class FS_T<FastNoise::Perlin, FS> : public virtual FastNoise::Perlin, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
float32v xf0 = xs = x - xs;
float32v yf0 = ys = y - ys;
float32v xf1 = xf0 - float32v( 1 );
float32v yf1 = yf0 - float32v( 1 );
xs = FnUtils::InterpQuintic( xs );
ys = FnUtils::InterpQuintic( ys );
return float32v( 0.579106986522674560546875f ) * FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0 ), xf0, yf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0 ), xf1, yf0 ), xs ),
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1 ), xf0, yf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1 ), xf1, yf1 ), xs ), ys );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
float32v zs = FS_Floor_f32( z );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
int32v z1 = z0 + int32v( FnPrimes::Z );
float32v xf0 = xs = x - xs;
float32v yf0 = ys = y - ys;
float32v zf0 = zs = z - zs;
float32v xf1 = xf0 - float32v( 1 );
float32v yf1 = yf0 - float32v( 1 );
float32v zf1 = zf0 - float32v( 1 );
xs = FnUtils::InterpQuintic( xs );
ys = FnUtils::InterpQuintic( ys );
zs = FnUtils::InterpQuintic( zs );
return float32v( 0.964921414852142333984375f ) * FnUtils::Lerp( FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z0 ), xf0, yf0, zf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z0 ), xf1, yf0, zf0 ), xs ),
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z0 ), xf0, yf1, zf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z0 ), xf1, yf1, zf0 ), xs ), ys ),
FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z1 ), xf0, yf0, zf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z1 ), xf1, yf0, zf1 ), xs ),
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z1 ), xf0, yf1, zf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z1 ), xf1, yf1, zf1 ), xs ), ys ), zs );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
float32v zs = FS_Floor_f32( z );
float32v ws = FS_Floor_f32( w );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
int32v w0 = FS_Convertf32_i32( ws ) * int32v( FnPrimes::W );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
int32v z1 = z0 + int32v( FnPrimes::Z );
int32v w1 = w0 + int32v( FnPrimes::W );
float32v xf0 = xs = x - xs;
float32v yf0 = ys = y - ys;
float32v zf0 = zs = z - zs;
float32v wf0 = ws = w - ws;
float32v xf1 = xf0 - float32v( 1 );
float32v yf1 = yf0 - float32v( 1 );
float32v zf1 = zf0 - float32v( 1 );
float32v wf1 = wf0 - float32v( 1 );
xs = FnUtils::InterpQuintic( xs );
ys = FnUtils::InterpQuintic( ys );
zs = FnUtils::InterpQuintic( zs );
ws = FnUtils::InterpQuintic( ws );
return float32v( 0.964921414852142333984375f ) * FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z0, w0 ), xf0, yf0, zf0, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z0, w0 ), xf1, yf0, zf0, wf0 ), xs ),
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z0, w0 ), xf0, yf1, zf0, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z0, w0 ), xf1, yf1, zf0, wf0 ), xs ), ys ),
FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z1, w0 ), xf0, yf0, zf1, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z1, w0 ), xf1, yf0, zf1, wf0 ), xs ),
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z1, w0 ), xf0, yf1, zf1, wf0 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z1, w0 ), xf1, yf1, zf1, wf0 ), xs ), ys ), zs ),
FnUtils::Lerp( FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z0, w1 ), xf0, yf0, zf0, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z0, w1 ), xf1, yf0, zf0, wf1 ), xs ),
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z0, w1 ), xf0, yf1, zf0, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z0, w1 ), xf1, yf1, zf0, wf1 ), xs ), ys ),
FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y0, z1, w1 ), xf0, yf0, zf1, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y0, z1, w1 ), xf1, yf0, zf1, wf1 ), xs ),
FnUtils::Lerp( FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x0, y1, z1, w1 ), xf0, yf1, zf1, wf1 ), FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, x1, y1, z1, w1 ), xf1, yf1, zf1, wf1 ), xs ), ys ), zs ), ws );
}
};

View File

@ -0,0 +1,27 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
class Simplex : public virtual Generator
{
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Coherent Noise" );
}
};
};
class OpenSimplex2 : public virtual Generator
{
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Coherent Noise" );
}
};
};
}

View File

@ -0,0 +1,373 @@
#include "FastSIMD/InlInclude.h"
#include "Simplex.h"
#include "Utils.inl"
template<typename FS>
class FS_T<FastNoise::Simplex, FS> : public virtual FastNoise::Simplex, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
const float SQRT3 = 1.7320508075688772935274463415059f;
const float F2 = 0.5f * (SQRT3 - 1.0f);
const float G2 = (3.0f - SQRT3) / 6.0f;
float32v f = float32v( F2 ) * (x + y);
float32v x0 = FS_Floor_f32( x + f );
float32v y0 = FS_Floor_f32( y + f );
int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
float32v g = float32v( G2 ) * (x0 + y0);
x0 = x - (x0 - g);
y0 = y - (y0 - g);
mask32v i1 = x0 > y0;
//mask32v j1 = ~i1; //NMasked funcs
float32v x1 = FS_MaskedSub_f32( x0, float32v( 1.f ), i1 ) + float32v( G2 );
float32v y1 = FS_NMaskedSub_f32( y0, float32v( 1.f ), i1 ) + float32v( G2 );
float32v x2 = x0 + float32v( G2 * 2 - 1 );
float32v y2 = y0 + float32v( G2 * 2 - 1 );
float32v t0 = FS_FNMulAdd_f32( x0, x0, FS_FNMulAdd_f32( y0, y0, float32v( 0.5f ) ) );
float32v t1 = FS_FNMulAdd_f32( x1, x1, FS_FNMulAdd_f32( y1, y1, float32v( 0.5f ) ) );
float32v t2 = FS_FNMulAdd_f32( x2, x2, FS_FNMulAdd_f32( y2, y2, float32v( 0.5f ) ) );
t0 = FS_Max_f32( t0, float32v( 0 ) );
t1 = FS_Max_f32( t1, float32v( 0 ) );
t2 = FS_Max_f32( t2, float32v( 0 ) );
t0 *= t0; t0 *= t0;
t1 *= t1; t1 *= t1;
t2 *= t2; t2 *= t2;
float32v n0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i, j ), x0, y0 );
float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_NMaskedAdd_i32( j, int32v( FnPrimes::Y ), i1 ) ), x1, y1 );
float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ) ), x2, y2 );
return float32v( 38.283687591552734375f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, n2 * t2 ) );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
const float F3 = 1.0f / 3.0f;
const float G3 = 1.0f / 2.0f;
float32v s = float32v( F3 ) * (x + y + z);
x += s;
y += s;
z += s;
float32v x0 = FS_Floor_f32( x );
float32v y0 = FS_Floor_f32( y );
float32v z0 = FS_Floor_f32( z );
float32v xi = x - x0;
float32v yi = y - y0;
float32v zi = z - z0;
int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
int32v k = FS_Convertf32_i32( z0 ) * int32v( FnPrimes::Z );
mask32v x_ge_y = xi >= yi;
mask32v y_ge_z = yi >= zi;
mask32v x_ge_z = xi >= zi;
float32v g = float32v( G3 ) * (xi + yi + zi);
x0 = xi - g;
y0 = yi - g;
z0 = zi - g;
mask32v i1 = x_ge_y & x_ge_z;
mask32v j1 = FS_BitwiseAndNot_m32( y_ge_z, x_ge_y );
mask32v k1 = FS_BitwiseAndNot_m32( ~x_ge_z, y_ge_z );
mask32v i2 = x_ge_y | x_ge_z;
mask32v j2 = ~x_ge_y | y_ge_z;
mask32v k2 = x_ge_z & y_ge_z; //NMasked
float32v x1 = FS_MaskedSub_f32( x0, float32v( 1 ), i1 ) + float32v( G3 );
float32v y1 = FS_MaskedSub_f32( y0, float32v( 1 ), j1 ) + float32v( G3 );
float32v z1 = FS_MaskedSub_f32( z0, float32v( 1 ), k1 ) + float32v( G3 );
float32v x2 = FS_MaskedSub_f32( x0, float32v( 1 ), i2 ) + float32v( G3 * 2 );
float32v y2 = FS_MaskedSub_f32( y0, float32v( 1 ), j2 ) + float32v( G3 * 2 );
float32v z2 = FS_NMaskedSub_f32( z0, float32v( 1 ), k2 ) + float32v( G3 * 2 );
float32v x3 = x0 + float32v( G3 * 3 - 1 );
float32v y3 = y0 + float32v( G3 * 3 - 1 );
float32v z3 = z0 + float32v( G3 * 3 - 1 );
float32v t0 = FS_FNMulAdd_f32( x0, x0, FS_FNMulAdd_f32( y0, y0, FS_FNMulAdd_f32( z0, z0, float32v( 0.6f ) ) ) );
float32v t1 = FS_FNMulAdd_f32( x1, x1, FS_FNMulAdd_f32( y1, y1, FS_FNMulAdd_f32( z1, z1, float32v( 0.6f ) ) ) );
float32v t2 = FS_FNMulAdd_f32( x2, x2, FS_FNMulAdd_f32( y2, y2, FS_FNMulAdd_f32( z2, z2, float32v( 0.6f ) ) ) );
float32v t3 = FS_FNMulAdd_f32( x3, x3, FS_FNMulAdd_f32( y3, y3, FS_FNMulAdd_f32( z3, z3, float32v( 0.6f ) ) ) );
t0 = FS_Max_f32( t0, float32v( 0 ) );
t1 = FS_Max_f32( t1, float32v( 0 ) );
t2 = FS_Max_f32( t2, float32v( 0 ) );
t3 = FS_Max_f32( t3, float32v( 0 ) );
t0 *= t0; t0 *= t0;
t1 *= t1; t1 *= t1;
t2 *= t2; t2 *= t2;
t3 *= t3; t3 *= t3;
float32v n0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i, j, k ), x0, y0, z0 );
float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j1 ), FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k1 ) ), x1, y1, z1 );
float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i2 ), FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j2 ), FS_NMaskedAdd_i32( k, int32v( FnPrimes::Z ), k2 ) ), x2, y2, z2 );
float32v n3 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ), k + int32v( FnPrimes::Z ) ), x3, y3, z3 );
return float32v( 32.69428253173828125f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, FS_FMulAdd_f32( n2, t2, n3 * t3 ) ) );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
{
const float SQRT5 = 2.236067977499f;
const float F4 = (SQRT5 - 1.0f) / 4.0f;
const float G4 = (5.0f - SQRT5) / 20.0f;
float32v s = float32v( F4 ) * (x + y + z + w);
x += s;
y += s;
z += s;
w += s;
float32v x0 = FS_Floor_f32( x );
float32v y0 = FS_Floor_f32( y );
float32v z0 = FS_Floor_f32( z );
float32v w0 = FS_Floor_f32( w );
float32v xi = x - x0;
float32v yi = y - y0;
float32v zi = z - z0;
float32v wi = w - w0;
int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
int32v k = FS_Convertf32_i32( z0 ) * int32v( FnPrimes::Z );
int32v l = FS_Convertf32_i32( w0 ) * int32v( FnPrimes::W );
float32v g = float32v( G4 ) * (xi + yi + zi + wi);
x0 = xi - g;
y0 = yi - g;
z0 = zi - g;
w0 = wi - g;
int32v rankx( 0 );
int32v ranky( 0 );
int32v rankz( 0 );
int32v rankw( 0 );
mask32v x_ge_y = x0 >= y0;
rankx = FS_MaskedIncrement_i32( rankx, x_ge_y );
ranky = FS_MaskedIncrement_i32( ranky, ~x_ge_y );
mask32v x_ge_z = x0 >= z0;
rankx = FS_MaskedIncrement_i32( rankx, x_ge_z );
rankz = FS_MaskedIncrement_i32( rankz, ~x_ge_z );
mask32v x_ge_w = x0 >= w0;
rankx = FS_MaskedIncrement_i32( rankx, x_ge_w );
rankw = FS_MaskedIncrement_i32( rankw, ~x_ge_w );
mask32v y_ge_z = y0 >= z0;
ranky = FS_MaskedIncrement_i32( ranky, y_ge_z );
rankz = FS_MaskedIncrement_i32( rankz, ~y_ge_z );
mask32v y_ge_w = y0 >= w0;
ranky = FS_MaskedIncrement_i32( ranky, y_ge_w );
rankw = FS_MaskedIncrement_i32( rankw, ~y_ge_w );
mask32v z_ge_w = z0 >= w0;
rankz = FS_MaskedIncrement_i32( rankz, z_ge_w );
rankw = FS_MaskedIncrement_i32( rankw, ~z_ge_w );
mask32v i1 = rankx > int32v( 2 );
mask32v j1 = ranky > int32v( 2 );
mask32v k1 = rankz > int32v( 2 );
mask32v l1 = rankw > int32v( 2 );
mask32v i2 = rankx > int32v( 1 );
mask32v j2 = ranky > int32v( 1 );
mask32v k2 = rankz > int32v( 1 );
mask32v l2 = rankw > int32v( 1 );
mask32v i3 = rankx > int32v( 0 );
mask32v j3 = ranky > int32v( 0 );
mask32v k3 = rankz > int32v( 0 );
mask32v l3 = rankw > int32v( 0 );
float32v x1 = FS_MaskedSub_f32( x0, float32v( 1 ), i1 ) + float32v( G4 );
float32v y1 = FS_MaskedSub_f32( y0, float32v( 1 ), j1 ) + float32v( G4 );
float32v z1 = FS_MaskedSub_f32( z0, float32v( 1 ), k1 ) + float32v( G4 );
float32v w1 = FS_MaskedSub_f32( w0, float32v( 1 ), l1 ) + float32v( G4 );
float32v x2 = FS_MaskedSub_f32( x0, float32v( 1 ), i2 ) + float32v( G4 * 2 );
float32v y2 = FS_MaskedSub_f32( y0, float32v( 1 ), j2 ) + float32v( G4 * 2 );
float32v z2 = FS_MaskedSub_f32( z0, float32v( 1 ), k2 ) + float32v( G4 * 2 );
float32v w2 = FS_MaskedSub_f32( w0, float32v( 1 ), l2 ) + float32v( G4 * 2 );
float32v x3 = FS_MaskedSub_f32( x0, float32v( 1 ), i3 ) + float32v( G4 * 3 );
float32v y3 = FS_MaskedSub_f32( y0, float32v( 1 ), j3 ) + float32v( G4 * 3 );
float32v z3 = FS_MaskedSub_f32( z0, float32v( 1 ), k3 ) + float32v( G4 * 3 );
float32v w3 = FS_MaskedSub_f32( w0, float32v( 1 ), l3 ) + float32v( G4 * 3 );
float32v x4 = x0 + float32v( G4 * 4 - 1 );
float32v y4 = y0 + float32v( G4 * 4 - 1 );
float32v z4 = z0 + float32v( G4 * 4 - 1 );
float32v w4 = w0 + float32v( G4 * 4 - 1 );
float32v t0 = FS_FNMulAdd_f32( x0, x0, FS_FNMulAdd_f32( y0, y0, FS_FNMulAdd_f32( z0, z0, FS_FNMulAdd_f32( w0, w0, float32v( 0.6f ) ) ) ) );
float32v t1 = FS_FNMulAdd_f32( x1, x1, FS_FNMulAdd_f32( y1, y1, FS_FNMulAdd_f32( z1, z1, FS_FNMulAdd_f32( w1, w1, float32v( 0.6f ) ) ) ) );
float32v t2 = FS_FNMulAdd_f32( x2, x2, FS_FNMulAdd_f32( y2, y2, FS_FNMulAdd_f32( z2, z2, FS_FNMulAdd_f32( w2, w2, float32v( 0.6f ) ) ) ) );
float32v t3 = FS_FNMulAdd_f32( x3, x3, FS_FNMulAdd_f32( y3, y3, FS_FNMulAdd_f32( z3, z3, FS_FNMulAdd_f32( w3, w3, float32v( 0.6f ) ) ) ) );
float32v t4 = FS_FNMulAdd_f32( x4, x4, FS_FNMulAdd_f32( y4, y4, FS_FNMulAdd_f32( z4, z4, FS_FNMulAdd_f32( w4, w4, float32v( 0.6f ) ) ) ) );
t0 = FS_Max_f32( t0, float32v( 0 ) );
t1 = FS_Max_f32( t1, float32v( 0 ) );
t2 = FS_Max_f32( t2, float32v( 0 ) );
t3 = FS_Max_f32( t3, float32v( 0 ) );
t4 = FS_Max_f32( t4, float32v( 0 ) );
t0 *= t0; t0 *= t0;
t1 *= t1; t1 *= t1;
t2 *= t2; t2 *= t2;
t3 *= t3; t3 *= t3;
t4 *= t4; t4 *= t4;
float32v n0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i, j, k, l ), x0, y0, z0, w0 );
float32v n1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed,
FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ),
FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j1 ),
FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k1 ),
FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l1 ) ), x1, y1, z1, w1 );
float32v n2 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed,
FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i2 ),
FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j2 ),
FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k2 ),
FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l2 ) ), x2, y2, z2, w2 );
float32v n3 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed,
FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i3 ),
FS_MaskedAdd_i32( j, int32v( FnPrimes::Y ), j3 ),
FS_MaskedAdd_i32( k, int32v( FnPrimes::Z ), k3 ),
FS_MaskedAdd_i32( l, int32v( FnPrimes::W ), l3 ) ), x3, y3, z3, w3 );
float32v n4 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ), k + int32v( FnPrimes::Z ), l + int32v( FnPrimes::W ) ), x4, y4, z4, w4 );
return float32v( 27.f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, FS_FMulAdd_f32( n2, t2, FS_FMulAdd_f32( n3, t3, n4 * t4 ) ) ) );
}
};
template<typename FS>
class FS_T<FastNoise::OpenSimplex2, FS> : public virtual FastNoise::OpenSimplex2, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
const float SQRT3 = 1.7320508075f;
const float F2 = 0.5f * (SQRT3 - 1.0f);
const float G2 = (3.0f - SQRT3) / 6.0f;
float32v f = float32v( F2 ) * (x + y);
float32v x0 = FS_Floor_f32( x + f );
float32v y0 = FS_Floor_f32( y + f );
int32v i = FS_Convertf32_i32( x0 ) * int32v( FnPrimes::X );
int32v j = FS_Convertf32_i32( y0 ) * int32v( FnPrimes::Y );
float32v g = float32v( G2 ) * (x0 + y0);
x0 = x - (x0 - g);
y0 = y - (y0 - g);
mask32v i1 = x0 > y0;
//mask32v j1 = ~i1; //NMasked funcs
float32v x1 = FS_MaskedSub_f32( x0, float32v( 1.f ), i1 ) + float32v( G2 );
float32v y1 = FS_NMaskedSub_f32( y0, float32v( 1.f ), i1 ) + float32v( G2 );
float32v x2 = x0 + float32v( (G2 * 2) - 1 );
float32v y2 = y0 + float32v( (G2 * 2) - 1 );
float32v t0 = float32v( 0.5f ) - (x0 * x0) - (y0 * y0);
float32v t1 = float32v( 0.5f ) - (x1 * x1) - (y1 * y1);
float32v t2 = float32v( 0.5f ) - (x2 * x2) - (y2 * y2);
t0 = FS_Max_f32( t0, float32v( 0 ) );
t1 = FS_Max_f32( t1, float32v( 0 ) );
t2 = FS_Max_f32( t2, float32v( 0 ) );
t0 *= t0; t0 *= t0;
t1 *= t1; t1 *= t1;
t2 *= t2; t2 *= t2;
float32v n0 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, i, j ), x0, y0 );
float32v n1 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, FS_MaskedAdd_i32( i, int32v( FnPrimes::X ), i1 ), FS_NMaskedAdd_i32( j, int32v( FnPrimes::Y ), i1 ) ), x1, y1 );
float32v n2 = FnUtils::GetGradientDotFancy( FnUtils::HashPrimes( seed, i + int32v( FnPrimes::X ), j + int32v( FnPrimes::Y ) ), x2, y2 );
return float32v( 49.918426513671875f ) * FS_FMulAdd_f32( n0, t0, FS_FMulAdd_f32( n1, t1, n2 * t2 ) );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
float32v f = float32v( 2.0f / 3.0f ) * (x + y + z);
float32v xr = f - x;
float32v yr = f - y;
float32v zr = f - z;
float32v val( 0 );
for( size_t i = 0; ; i++ )
{
float32v v0xr = FS_Round_f32( xr );
float32v v0yr = FS_Round_f32( yr );
float32v v0zr = FS_Round_f32( zr );
float32v d0xr = xr - v0xr;
float32v d0yr = yr - v0yr;
float32v d0zr = zr - v0zr;
float32v score0xr = FS_Abs_f32( d0xr );
float32v score0yr = FS_Abs_f32( d0yr );
float32v score0zr = FS_Abs_f32( d0zr );
mask32v dir0xr = FS_Max_f32( score0yr, score0zr ) <= score0xr;
mask32v dir0yr = FS_BitwiseAndNot_m32( FS_Max_f32( score0zr, score0xr ) <= score0yr, dir0xr );
mask32v dir0zr = ~(dir0xr | dir0yr);
float32v v1xr = FS_MaskedAdd_f32( v0xr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0xr ), dir0xr );
float32v v1yr = FS_MaskedAdd_f32( v0yr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0yr ), dir0yr );
float32v v1zr = FS_MaskedAdd_f32( v0zr, float32v( 1.0f ) | ( float32v( -1.0f ) & d0zr ), dir0zr );
float32v d1xr = xr - v1xr;
float32v d1yr = yr - v1yr;
float32v d1zr = zr - v1zr;
int32v hv0xr = FS_Convertf32_i32( v0xr ) * int32v( FnPrimes::X );
int32v hv0yr = FS_Convertf32_i32( v0yr ) * int32v( FnPrimes::Y );
int32v hv0zr = FS_Convertf32_i32( v0zr ) * int32v( FnPrimes::Z );
int32v hv1xr = FS_Convertf32_i32( v1xr ) * int32v( FnPrimes::X );
int32v hv1yr = FS_Convertf32_i32( v1yr ) * int32v( FnPrimes::Y );
int32v hv1zr = FS_Convertf32_i32( v1zr ) * int32v( FnPrimes::Z );
float32v t0 = FS_FNMulAdd_f32( d0zr, d0zr, FS_FNMulAdd_f32( d0yr, d0yr, FS_FNMulAdd_f32( d0xr, d0xr, float32v( 0.6f ) ) ) );
float32v t1 = FS_FNMulAdd_f32( d1zr, d1zr, FS_FNMulAdd_f32( d1yr, d1yr, FS_FNMulAdd_f32( d1xr, d1xr, float32v( 0.6f ) ) ) );
t0 = FS_Max_f32( t0, float32v( 0 ) );
t1 = FS_Max_f32( t1, float32v( 0 ) );
t0 *= t0; t0 *= t0;
t1 *= t1; t1 *= t1;
float32v v0 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, hv0xr, hv0yr, hv0zr ), d0xr, d0yr, d0zr );
float32v v1 = FnUtils::GetGradientDot( FnUtils::HashPrimes( seed, hv1xr, hv1yr, hv1zr ), d1xr, d1yr, d1zr );
val = FS_FMulAdd_f32( v0, t0, FS_FMulAdd_f32( v1, t1, val ) );
if( i == 1 )
{
break;
}
xr += float32v( 0.5f );
yr += float32v( 0.5f );
zr += float32v( 0.5f );
seed = ~seed;
}
return float32v( 32.69428253173828125f ) * val;
}
};

View File

@ -0,0 +1,306 @@
#pragma once
#include "FastSIMD/InlInclude.h"
#include <climits>
namespace FastNoise
{
namespace Primes
{
static constexpr int32_t X = 501125321;
static constexpr int32_t Y = 1136930381;
static constexpr int32_t Z = 1720413743;
static constexpr int32_t W = 1066037191;
static constexpr int32_t Lookup[] = { X,Y,Z,W };
}
template<typename FS>
struct Utils
{
using float32v = typename FS::float32v;
using int32v = typename FS::int32v;
using mask32v = typename FS::mask32v;
static constexpr float ROOT2 = 1.4142135623730950488f;
static constexpr float ROOT3 = 1.7320508075688772935f;
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level < FastSIMD::Level_AVX2>* = nullptr>
FS_INLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
{
int32v index = FS_Convertf32_i32( FS_Converti32_f32( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
// Bit-4 = Choose X Y ordering
mask32v xy;
if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
{
xy = int32_t( index & int32v( 1 << 2 ) ) != 0;
}
else
{
xy = index << 29;
if constexpr( FS::SIMD_Level < FastSIMD::Level_SSE41 )
{
xy >>= 31;
}
}
float32v a = FS_Select_f32( xy, fY, fX );
float32v b = FS_Select_f32( xy, fX, fY );
// Bit-1 = b flip sign
b ^= FS_Casti32_f32( index << 31 );
// Bit-2 = Mul a by 2 or Root3
mask32v aMul2;
if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
{
aMul2 = int32_t( index & int32v( 1 << 1 ) ) != 0;
}
else
{
aMul2 = (index << 30) >> 31;
}
a *= FS_Select_f32( aMul2, float32v( 2 ), float32v( ROOT3 ) );
// b zero value if a mul 2
b = FS_NMask_f32( b, aMul2 );
// Bit-8 = Flip sign of a + b
return ( a + b ) ^ FS_Casti32_f32( (index >> 3) << 31 );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
FS_INLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
{
int32v index = FS_Convertf32_i32( FS_Converti32_f32( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
float32v gX = _mm256_permutevar8x32_ps( float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0 ), index );
float32v gY = _mm256_permutevar8x32_ps( float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2 ), index );
// Bit-8 = Flip sign of a + b
return FS_FMulAdd_f32( gX, fX, fY * gY ) ^ FS_Casti32_f32( (index >> 3) << 31 );
}
template<typename SIMD = FS, std::enable_if_t<(SIMD::SIMD_Level == FastSIMD::Level_AVX512)>* = nullptr>
FS_INLINE static float32v GetGradientDotFancy( int32v hash, float32v fX, float32v fY )
{
int32v index = FS_Convertf32_i32( FS_Converti32_f32( hash & int32v( 0x3FFFFF ) ) * float32v( 1.3333333333333333f ) );
float32v gX = _mm512_permutexvar_ps( index, float32v( ROOT3, ROOT3, 2, 2, 1, -1, 0, 0, -ROOT3, -ROOT3, -2, -2, -1, 1, 0, 0 ) );
float32v gY = _mm512_permutexvar_ps( index, float32v( 1, -1, 0, 0, ROOT3, ROOT3, 2, 2, -1, 1, 0, 0, -ROOT3, -ROOT3, -2, -2 ) );
return FS_FMulAdd_f32( gX, fX, fY * gY );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level < FastSIMD::Level_AVX2>* = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
{
// ( 1+R2, 1 ) ( -1-R2, 1 ) ( 1+R2, -1 ) ( -1-R2, -1 )
// ( 1, 1+R2 ) ( 1, -1-R2 ) ( -1, 1+R2 ) ( -1, -1-R2 )
int32v bit1 = (hash << 31);
int32v bit2 = (hash >> 1) << 31;
mask32v bit4;
if constexpr( FS::SIMD_Level == FastSIMD::Level_Scalar )
{
bit4 = int32_t( hash & int32v( 1 << 2 ) ) != 0;
}
else
{
bit4 = hash << 29;
if constexpr( FS::SIMD_Level < FastSIMD::Level_SSE41 )
{
bit4 >>= 31;
}
}
fX ^= FS_Casti32_f32( bit1 );
fY ^= FS_Casti32_f32( bit2 );
float32v a = FS_Select_f32( bit4, fY, fX );
float32v b = FS_Select_f32( bit4, fX, fY );
return FS_FMulAdd_f32( float32v( 1.0f + ROOT2 ), a, b );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX2>* = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
{
float32v gX = _mm256_permutevar8x32_ps( float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ), hash );
float32v gY = _mm256_permutevar8x32_ps( float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ), hash );
return FS_FMulAdd_f32( gX, fX, fY * gY );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512> * = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY )
{
float32v gX = _mm512_permutexvar_ps( hash, float32v( 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1, 1 + ROOT2, -1 - ROOT2, 1 + ROOT2, -1 - ROOT2, 1, -1, 1, -1 ) );
float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2, 1, 1, -1, -1, 1 + ROOT2, 1 + ROOT2, -1 - ROOT2, -1 - ROOT2 ) );
return FS_FMulAdd_f32( gX, fX, fY * gY );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level != FastSIMD::Level_AVX512 > * = nullptr >
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
{
int32v hasha13 = hash & int32v( 13 );
//if h < 8 then x, else y
float32v u = FS_Select_f32( hasha13 < int32v( 8 ), fX, fY );
//if h < 4 then y else if h is 12 or 14 then x else z
float32v v = FS_Select_f32( hasha13 == int32v( 12 ), fX, fZ );
v = FS_Select_f32( hasha13 < int32v( 2 ), fY, v );
//if h1 then -u else u
//if h2 then -v else v
float32v h1 = FS_Casti32_f32( hash << 31 );
float32v h2 = FS_Casti32_f32( (hash & int32v( 2 )) << 30 );
//then add them
return ( u ^ h1 ) + ( v ^ h2 );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512>* = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ )
{
float32v gX = _mm512_permutexvar_ps( hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, 0, -1, 0 ) );
float32v gY = _mm512_permutexvar_ps( hash, float32v( 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ) );
float32v gZ = _mm512_permutexvar_ps( hash, float32v( 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1, 0, 1, 0, -1 ) );
return FS_FMulAdd_f32( gX, fX, FS_FMulAdd_f32( fY, gY, fZ * gZ ));
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level != FastSIMD::Level_AVX512>* = nullptr >
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
{
int32v p = hash & int32v( 3 << 3 );
float32v a = FS_Select_f32( p > int32v( 0 ), fX, fY );
float32v b;
if constexpr( FS::SIMD_Level <= FastSIMD::Level_SSE2 )
{
b = FS_Select_f32( p > int32v( 1 << 3 ), fY, fZ );
}
else
{
b = FS_Select_f32( hash << 27, fY, fZ );
}
float32v c = FS_Select_f32( p > int32v( 2 << 3 ), fZ, fW );
float32v aSign = FS_Casti32_f32( hash << 31 );
float32v bSign = FS_Casti32_f32( (hash << 30) & int32v( 0x80000000 ) );
float32v cSign = FS_Casti32_f32( (hash << 29) & int32v( 0x80000000 ) );
return ( a ^ aSign ) + ( b ^ bSign ) + ( c ^ cSign );
}
template<typename SIMD = FS, std::enable_if_t<SIMD::SIMD_Level == FastSIMD::Level_AVX512>* = nullptr>
FS_INLINE static float32v GetGradientDot( int32v hash, float32v fX, float32v fY, float32v fZ, float32v fW )
{
float32v gX = _mm512_permutex2var_ps( float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 ), hash, float32v( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
float32v gY = _mm512_permutex2var_ps( float32v( 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ), hash, float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ) );
float32v gZ = _mm512_permutex2var_ps( float32v( 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 ), hash, float32v( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 ) );
float32v gW = _mm512_permutex2var_ps( float32v( 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1 ), hash, float32v( 1, 1, 1, 1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0 ) );
return FS_FMulAdd_f32( gX, fX, FS_FMulAdd_f32( fY, gY, FS_FMulAdd_f32( fZ, gZ, fW * gW ) ));
}
template<typename SIMD = FS, typename... P>
FS_INLINE static int32v HashPrimes( int32v seed, P... primedPos )
{
int32v hash = seed;
hash ^= (primedPos ^ ...);
hash *= int32v( 0x27d4eb2d );
return (hash >> 15) ^ hash;
}
template<typename SIMD = FS, typename... P>
FS_INLINE static int32v HashPrimesHB( int32v seed, P... primedPos )
{
int32v hash = seed;
hash ^= (primedPos ^ ...);
hash *= int32v( 0x27d4eb2d );
return hash;
}
template<typename SIMD = FS, typename... P>
FS_INLINE static float32v GetValueCoord( int32v seed, P... primedPos )
{
int32v hash = seed;
hash ^= (primedPos ^ ...);
hash *= hash * int32v( 0x27d4eb2d );
return FS_Converti32_f32( hash ) * float32v( 1.0f / (float)INT_MAX );
}
template<typename SIMD = FS>
FS_INLINE static float32v Lerp( float32v a, float32v b, float32v t )
{
return FS_FMulAdd_f32( t, b - a, a );
}
template<typename SIMD = FS>
FS_INLINE static float32v InterpHermite( float32v t )
{
return t * t * FS_FNMulAdd_f32( t, float32v( 2 ), float32v( 3 ));
}
template<typename SIMD = FS>
FS_INLINE static float32v InterpQuintic( float32v t )
{
return t * t * t * FS_FMulAdd_f32( t, FS_FMulAdd_f32( t, float32v( 6 ), float32v( -15 )), float32v( 10 ) );
}
template<typename SIMD = FS, typename... P>
FS_INLINE static float32v CalcDistance( DistanceFunction distFunc, float32v dX, P... d )
{
switch( distFunc )
{
default:
case DistanceFunction::Euclidean:
{
float32v distSqr = dX * dX;
((distSqr = FS_FMulAdd_f32( d, d, distSqr )), ...);
return FS_InvSqrt_f32( distSqr ) * distSqr;
}
case DistanceFunction::EuclideanSquared:
{
float32v distSqr = dX * dX;
((distSqr = FS_FMulAdd_f32( d, d, distSqr )), ...);
return distSqr;
}
case DistanceFunction::Manhattan:
{
float32v dist = FS_Abs_f32( dX );
dist += (FS_Abs_f32( d ) + ...);
return dist;
}
case DistanceFunction::Hybrid:
{
float32v both = FS_FMulAdd_f32( dX, dX, FS_Abs_f32( dX ) );
((both += FS_FMulAdd_f32( d, d, FS_Abs_f32( d ) )), ...);
return both;
}
}
}
};
}
using FnUtils = FastNoise::Utils<FS_SIMD_CLASS>;
namespace FnPrimes = FastNoise::Primes;

View File

@ -0,0 +1,16 @@
#pragma once
#include "Generator.h"
namespace FastNoise
{
class Value : public virtual Generator
{
FASTNOISE_METADATA( Generator )
Metadata( const char* className ) : Generator::Metadata( className )
{
groups.push_back( "Coherent Noise" );
}
};
};
}

View File

@ -0,0 +1,88 @@
#include "FastSIMD/InlInclude.h"
#include "Value.h"
#include "Utils.inl"
template<typename FS>
class FS_T<FastNoise::Value, FS> : public virtual FastNoise::Value, public FS_T<FastNoise::Generator, FS>
{
FASTSIMD_DECLARE_FS_TYPES;
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
xs = FnUtils::InterpHermite( x - xs );
ys = FnUtils::InterpHermite( y - ys );
return FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0 ), FnUtils::GetValueCoord( seed, x1, y0 ), xs ),
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1 ), FnUtils::GetValueCoord( seed, x1, y1 ), xs ), ys );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
float32v zs = FS_Floor_f32( z );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
int32v z1 = z0 + int32v( FnPrimes::Z );
xs = FnUtils::InterpHermite( x - xs );
ys = FnUtils::InterpHermite( y - ys );
zs = FnUtils::InterpHermite( z - zs );
return FnUtils::Lerp( FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0 ), FnUtils::GetValueCoord( seed, x1, y0, z0 ), xs ),
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0 ), FnUtils::GetValueCoord( seed, x1, y1, z0 ), xs ), ys ),
FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1 ), FnUtils::GetValueCoord( seed, x1, y0, z1 ), xs ),
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1 ), FnUtils::GetValueCoord( seed, x1, y1, z1 ), xs ), ys ), zs );
}
float32v FS_VECTORCALL Gen( int32v seed, float32v x, float32v y, float32v z, float32v w ) const final
{
float32v xs = FS_Floor_f32( x );
float32v ys = FS_Floor_f32( y );
float32v zs = FS_Floor_f32( z );
float32v ws = FS_Floor_f32( w );
int32v x0 = FS_Convertf32_i32( xs ) * int32v( FnPrimes::X );
int32v y0 = FS_Convertf32_i32( ys ) * int32v( FnPrimes::Y );
int32v z0 = FS_Convertf32_i32( zs ) * int32v( FnPrimes::Z );
int32v w0 = FS_Convertf32_i32( ws ) * int32v( FnPrimes::W );
int32v x1 = x0 + int32v( FnPrimes::X );
int32v y1 = y0 + int32v( FnPrimes::Y );
int32v z1 = z0 + int32v( FnPrimes::Z );
int32v w1 = w0 + int32v( FnPrimes::W );
xs = FnUtils::InterpHermite( x - xs );
ys = FnUtils::InterpHermite( y - ys );
zs = FnUtils::InterpHermite( z - zs );
ws = FnUtils::InterpHermite( w - ws );
return FnUtils::Lerp( FnUtils::Lerp( FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0, w0 ), FnUtils::GetValueCoord( seed, x1, y0, z0, w0 ), xs ),
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0, w0 ), FnUtils::GetValueCoord( seed, x1, y1, z0, w0 ), xs ), ys ),
FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1, w0 ), FnUtils::GetValueCoord( seed, x1, y0, z1, w0 ), xs ),
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1, w0 ), FnUtils::GetValueCoord( seed, x1, y1, z1, w0 ), xs ), ys ), zs ),
FnUtils::Lerp( FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z0, w1 ), FnUtils::GetValueCoord( seed, x1, y0, z0, w1 ), xs ),
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z0, w1 ), FnUtils::GetValueCoord( seed, x1, y1, z0, w1 ), xs ), ys ),
FnUtils::Lerp(
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y0, z1, w1 ), FnUtils::GetValueCoord( seed, x1, y0, z1, w1 ), xs ),
FnUtils::Lerp( FnUtils::GetValueCoord( seed, x0, y1, z1, w1 ), FnUtils::GetValueCoord( seed, x1, y1, z1, w1 ), xs ), ys ), zs ), ws );
}
};

View File

@ -0,0 +1,83 @@
#pragma once
#include <type_traits>
#include <tuple>
#include <stdexcept>
#include "FastSIMD/FunctionList.h"
template<typename T, size_t Size>
class VecN;
template<typename T>
class VecN<T, 0>
{
protected:
template<typename... A>
constexpr VecN( A... ) {}
template<typename... A>
void ForEach( A... ) const {}
template<typename... A>
void ForEachR( A... ) const {}
};
template<typename T, size_t S>
class VecN : public VecN<T, S - 1>
{
public:
static constexpr size_t Size = S;
typedef std::integral_constant<size_t, Size - 1> Index;
constexpr VecN() : Base(), value() {}
template<typename... A>
constexpr VecN( A... args ) :
Base( args... ),
value( std::get<Index::value>( std::make_tuple( args... ) ) )
{
}
template<size_t I>
FS_INLINE std::enable_if_t<(I < Size), T&> At()
{
return VecN<T, I + 1>::value;
}
template<size_t I>
FS_INLINE std::enable_if_t<(I < Size), T> At() const
{
return VecN<T, I + 1>::value;
}
template<size_t I>
FS_INLINE std::enable_if_t<(I >= Size), T&> At() const
{
throw std::out_of_range( "Index of of range" );
}
template<typename F, typename... A>
FS_INLINE void ForEach( F&& func, A&&... other )
{
Base::ForEach( func, other... );
func( Index(), value, (other.template At<Index::value>())... );
}
template<typename F, typename... A>
FS_INLINE void ForEachR( F&& func, A&&... other )
{
func( Index(), value, (other.template At<Index::value>())... );
Base::ForEachR( func, other... );
}
protected:
typedef VecN<T, Size - 1> Base;
typedef std::integral_constant<size_t, Size - 1> Index;
T value;
};

View File

@ -0,0 +1,52 @@
#pragma once
#include <cstdint>
#include "FastSIMD_Config.h"
namespace FastSIMD
{
typedef uint32_t Level_BitFlags;
enum eLevel : Level_BitFlags
{
Level_Null = 0, // Uninitilised
Level_Scalar = 1 << 0, // 80386 instruction set (Not SIMD)
Level_SSE = 1 << 1, // SSE (XMM) supported by CPU (not testing for O.S. support)
Level_SSE2 = 1 << 2, // SSE2
Level_SSE3 = 1 << 3, // SSE3
Level_SSSE3 = 1 << 4, // Supplementary SSE3 (SSSE3)
Level_SSE41 = 1 << 5, // SSE4.1
Level_SSE42 = 1 << 6, // SSE4.2
Level_AVX = 1 << 7, // AVX supported by CPU and operating system
Level_AVX2 = 1 << 8, // AVX2
Level_AVX512 = 1 << 9, // AVX512, AVX512DQ supported by CPU and operating system
Level_NEON = 1 << 16, // ARM NEON
};
const Level_BitFlags COMPILED_SIMD_LEVELS =
(FASTSIMD_COMPILE_SCALAR ? Level_Scalar : 0) |
(FASTSIMD_COMPILE_SSE ? Level_SSE : 0) |
(FASTSIMD_COMPILE_SSE2 ? Level_SSE2 : 0) |
(FASTSIMD_COMPILE_SSE3 ? Level_SSE3 : 0) |
(FASTSIMD_COMPILE_SSSE3 ? Level_SSSE3 : 0) |
(FASTSIMD_COMPILE_SSE41 ? Level_SSE41 : 0) |
(FASTSIMD_COMPILE_SSE42 ? Level_SSE42 : 0) |
(FASTSIMD_COMPILE_AVX ? Level_AVX : 0) |
(FASTSIMD_COMPILE_AVX2 ? Level_AVX2 : 0) |
(FASTSIMD_COMPILE_AVX512 ? Level_AVX512 : 0) |
(FASTSIMD_COMPILE_NEON ? Level_NEON : 0) ;
eLevel CPUMaxSIMDLevel();
template<typename T>
T* New( eLevel maxSIMDLevel = Level_Null );
template<typename T, eLevel SIMD_LEVEL>
T* ClassFactory();
#define FASTSIMD_LEVEL_SUPPORT( ... ) \
static const FastSIMD::Level_BitFlags Supported_SIMD_Levels = __VA_ARGS__
};

View File

@ -0,0 +1,29 @@
#pragma once
#if defined(__arm__) || defined(__aarch64__)
#define FASTSIMD_x86 0
#define FASTSIMD_ARM 1
#else
#define FASTSIMD_x86 1
#define FASTSIMD_ARM 0
#endif
#define FASTSIMD_64BIT (INTPTR_MAX == INT64_MAX)
#define FASTSIMD_COMPILE_SCALAR (!(FASTSIMD_x86 && FASTSIMD_64BIT)) // Don't compile for x86 64bit since CPU is guaranteed SSE2 support
#define FASTSIMD_COMPILE_SSE (FASTSIMD_x86 & 000) // Not supported
#define FASTSIMD_COMPILE_SSE2 (FASTSIMD_x86 & 1 )
#define FASTSIMD_COMPILE_SSE3 (FASTSIMD_x86 & 1 )
#define FASTSIMD_COMPILE_SSSE3 (FASTSIMD_x86 & 1 )
#define FASTSIMD_COMPILE_SSE41 (FASTSIMD_x86 & 1 )
#define FASTSIMD_COMPILE_SSE42 (FASTSIMD_x86 & 1 )
#define FASTSIMD_COMPILE_AVX (FASTSIMD_x86 & 000) // Not supported
#define FASTSIMD_COMPILE_AVX2 (FASTSIMD_x86 & 1 )
#define FASTSIMD_COMPILE_AVX512 (FASTSIMD_x86 & 1 )
#define FASTSIMD_COMPILE_NEON (FASTSIMD_ARM & 1 )
#define FASTSIMD_USE_FMA 1
#define FASTSIMD_CONFIG_GENERATE_CONSTANTS 0

View File

@ -0,0 +1,821 @@
#pragma once
#include <cinttypes>
#include <type_traits>
#include <memory>
#include "FastSIMD/FastSIMD.h"
#ifdef _MSC_VER
#if defined( _M_IX86_FP ) && _M_IX86_FP < 2
#define FS_VECTORCALL
#else
#define FS_VECTORCALL __vectorcall
#endif
#define FS_INLINE __forceinline
#else
#define FS_VECTORCALL
#define FS_INLINE __attribute__((always_inline)) inline
#endif
#ifndef NDEBUG
#undef FS_INLINE
#define FS_INLINE inline
#endif
/// <summary>
/// Number of 32 width elements that will fit into a vector
/// </summary>
/// <remarks>
/// Compile time constant
/// </remarks>
/// <code>
/// size_t FS_Size_32()
/// </code>
#define FS_Size_32() FS::template VectorSize<32>
// Vector builders
/// <summary>
/// Vector with values incrementing from 0 based on element index {0, 1, 2, 3...}
/// </summary>
/// <code>
/// example: int32v::FS_Incremented()
/// </code>
#define FS_Incremented() Incremented()
// Load
/// <summary>
/// Copies sizeof(float32v) bytes from given memory location into float32v
/// </summary>
/// <remarks>
/// Memory does not need to be aligned
/// </remarks>
/// <code>
/// float32v FS_Load_f32( void const* ptr )
/// </code>
#define FS_Load_f32( ... ) FS::Load_f32( __VA_ARGS__ )
/// <summary>
/// Copies sizeof(int32v) bytes from given memory location into int32v
/// </summary>
/// <remarks>
/// Memory does not need to be aligned
/// </remarks>
/// <code>
/// int32v FS_Load_i32( void const* ptr )
/// </code>
#define FS_Load_i32( ... ) FS::Load_i32( __VA_ARGS__ )
// Store
/// <summary>
/// Copies all elements of float32v to given memory location
/// </summary>
/// <code>
/// void FS_Store_f32( void* ptr, float32v f )
/// </code>
#define FS_Store_f32( ... ) FS::Store_f32( __VA_ARGS__ )
/// <summary>
/// Copies all elements of int32v to given memory location
/// </summary>
/// <code>
/// void FS_Store_i32( void* ptr, int32v i )
/// </code>
#define FS_Store_i32( ... ) FS::Store_i32( __VA_ARGS__ )
// Cast
/// <summary>
/// Bitwise cast int to float
/// </summary>
/// <code>
/// float32v FS_Casti32_f32( int32v i )
/// </code>
#define FS_Casti32_f32( ... ) FS::Casti32_f32( __VA_ARGS__ )
/// <summary>
/// Bitwise cast float to int
/// </summary>
/// <code>
/// int32v FS_Castf32_i32( float32v f )
/// </code>
#define FS_Castf32_i32( ... ) FS::Castf32_i32( __VA_ARGS__ )
// Convert
/// <summary>
/// Convert int to float
/// </summary>
/// <remarks>
/// Rounding: truncate
/// </remarks>
/// <code>
/// float32v FS_Converti32_f32( int32v i )
/// </code>
#define FS_Converti32_f32( ... ) FS::Converti32_f32( __VA_ARGS__ )
/// <summary>
/// Convert float to int
/// </summary>
/// <code>
/// int32v FS_Convertf32_i32( float32v f )
/// </code>
#define FS_Convertf32_i32( ... ) FS::Convertf32_i32( __VA_ARGS__ )
// Select
/// <summary>
/// return ( m ? a : b )
/// </summary>
/// <code>
/// float32v FS_Select_f32( mask32v m, float32v a, float32v b )
/// </code>
#define FS_Select_f32( ... ) FS::Select_f32( __VA_ARGS__ )
/// <summary>
/// return ( m ? a : b )
/// </summary>
/// <code>
/// int32v FS_Select_i32( mask32v m, int32v a, int32v b )
/// </code>
#define FS_Select_i32( ... ) FS::Select_i32( __VA_ARGS__ )
// Min, Max
/// <summary>
/// return ( a < b ? a : b )
/// </summary>
/// <code>
/// float32v FS_Min_f32( float32v a, float32v b )
/// </code>
#define FS_Min_f32( ... ) FS::Min_f32( __VA_ARGS__ )
/// <summary>
/// return ( a > b ? a : b )
/// </summary>
/// <code>
/// float32v FS_Max_f32( float32v a, float32v b )
/// </code>
#define FS_Max_f32( ... ) FS::Max_f32( __VA_ARGS__ )
/// <summary>
/// return ( a < b ? a : b )
/// </summary>
/// <code>
/// int32v FS_Min_i32( int32v a, int32v b )
/// </code>
#define FS_Min_i32( ... ) FS::Min_i32( __VA_ARGS__ )
/// <summary>
/// return ( a > b ? a : b )
/// </summary>
/// <code>
/// int32v FS_Max_i32( int32v a, int32v b )
/// </code>
#define FS_Max_i32( ... ) FS::Max_i32( __VA_ARGS__ )
// Bitwise
/// <summary>
/// return ( a & ~b )
/// </summary>
/// <code>
/// float32v FS_BitwiseAndNot_f32( float32v a, float32v b )
/// </code>
#define FS_BitwiseAndNot_f32( ... ) FS::BitwiseAndNot_f32( __VA_ARGS__ )
/// <summary>
/// return ( a & ~b )
/// </summary>
/// <code>
/// int32v FS_BitwiseAndNot_i32( int32v a, int32v b )
/// </code>
#define FS_BitwiseAndNot_i32( ... ) FS::BitwiseAndNot_i32( __VA_ARGS__ )
/// <summary>
/// return ( a & ~b )
/// </summary>
/// <code>
/// mask32v FS_BitwiseAndNot_m32( mask32v a, mask32v b )
/// </code>
#define FS_BitwiseAndNot_m32( ... ) FastSIMD::BitwiseAndNot_m32<FS>( __VA_ARGS__ )
/// <summary>
/// return ZeroExtend( a >> b )
/// </summary>
/// <code>
/// float32v FS_BitwiseShiftRightZX_f32( float32v a, int32_t b )
/// </code>
#define FS_BitwiseShiftRightZX_f32( ... ) FS::BitwiseShiftRightZX_f32( __VA_ARGS__ )
/// <summary>
/// return ZeroExtend( a >> b )
/// </summary>
/// <code>
/// float32v FS_BitwiseShiftRightZX_i32( int32v a, int32_t b )
/// </code>
#define FS_BitwiseShiftRightZX_i32( ... ) FS::BitwiseShiftRightZX_i32( __VA_ARGS__ )
// Abs
/// <summary>
/// return ( a < 0 ? -a : a )
/// </summary>
/// <code>
/// float32v FS_Abs_f32( float32v a )
/// </code>
#define FS_Abs_f32( ... ) FS::Abs_f32( __VA_ARGS__ )
/// <summary>
/// return ( a < 0 ? -a : a )
/// </summary>
/// <code>
/// int32v FS_Abs_i32( int32v a )
/// </code>
#define FS_Abs_i32( ... ) FS::Abs_i32( __VA_ARGS__ )
// Float math
/// <summary>
/// return sqrt( a )
/// </summary>
/// <code>
/// float32v FS_Sqrt_f32( float32v a )
/// </code>
#define FS_Sqrt_f32( ... ) FS::Sqrt_f32( __VA_ARGS__ )
/// <summary>
/// return APPROXIMATE( 1.0 / sqrt( a ) )
/// </summary>
/// <code>
/// float32v FS_InvSqrt_f32( float32v a )
/// </code>
#define FS_InvSqrt_f32( ... ) FS::InvSqrt_f32( __VA_ARGS__ )
/// <summary>
/// return APPROXIMATE( 1.0 / a )
/// </summary>
/// <code>
/// float32v FS_Reciprocal_f32( float32v a )
/// </code>
#define FS_Reciprocal_f32( ... ) FS::Reciprocal_f32( __VA_ARGS__ )
// Floor, Ceil, Round
/// <summary>
/// return floor( a )
/// </summary>
/// <remarks>
/// Rounding: Towards negative infinity
/// </remarks>
/// <code>
/// float32v FS_Floor_f32( float32v a )
/// </code>
#define FS_Floor_f32( ... ) FS::Floor_f32( __VA_ARGS__ )
/// <summary>
/// return ceil( a )
/// </summary>
/// <remarks>
/// Rounding: Towards positive infinity
/// </remarks>
/// <code>
/// float32v FS_Ceil_f32( float32v a )
/// </code>
#define FS_Ceil_f32( ... ) FS::Ceil_f32( __VA_ARGS__ )
/// <summary>
/// return round( a )
/// </summary>
/// <remarks>
/// Rounding: Banker's rounding
/// </remarks>
/// <code>
/// float32v FS_Round_f32( float32v a )
/// </code>
#define FS_Round_f32( ... ) FS::Round_f32( __VA_ARGS__ )
// Trig
/// <summary>
/// return APPROXIMATE( cos( a ) )
/// </summary>
/// <code>
/// float32v FS_Cos_f32( float32v a )
/// </code>
#define FS_Cos_f32( ... ) FastSIMD::Cos_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return APPROXIMATE( sin( a ) )
/// </summary>
/// <code>
/// float32v FS_Sin_f32( float32v a )
/// </code>
#define FS_Sin_f32( ... ) FastSIMD::Sin_f32<FS>( __VA_ARGS__ )
// Math
/// <summary>
/// return pow( v, pow )
/// </summary>
/// <code>
/// float32v FS_Pow_f32( float32v v, float32v pow )
/// </code>
#define FS_Pow_f32( ... ) FastSIMD::Pow_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return log( a )
/// </summary>
/// <remarks>
/// a <= 0 returns 0
/// </remarks>
/// <code>
/// float32v FS_Log_f32( float32v a )
/// </code>
#define FS_Log_f32( ... ) FastSIMD::Log_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return exp( a )
/// </summary>
/// <remarks>
/// a will be clamped to -88.376, 88.376
/// </remarks>
/// <code>
/// float32v FS_Exp_f32( float32v a )
/// </code>
#define FS_Exp_f32( ... ) FastSIMD::Exp_f32<FS>( __VA_ARGS__ )
// Mask
/// <summary>
/// return ( m ? a : 0 )
/// </summary>
/// <code>
/// int32v FS_Mask_i32( int32v a, mask32v m )
/// </code>
#define FS_Mask_i32( ... ) FS::Mask_i32( __VA_ARGS__ )
/// <summary>
/// return ( m ? a : 0 )
/// </summary>
/// <code>
/// float32v FS_Mask_f32( float32v a, mask32v m )
/// </code>
#define FS_Mask_f32( ... ) FS::Mask_f32( __VA_ARGS__ )
/// <summary>
/// return ( m ? 0 : a )
/// </summary>
/// <code>
/// int32v FS_NMask_i32( int32v a, mask32v m )
/// </code>
#define FS_NMask_i32( ... ) FS::NMask_i32( __VA_ARGS__ )
/// <summary>
/// return ( m ? 0 : a )
/// </summary>
/// <code>
/// float32v FS_NMask_f32( float32v a, mask32v m )
/// </code>
#define FS_NMask_f32( ... ) FS::NMask_f32( __VA_ARGS__ )
/// <summary>
/// return m.contains( true )
/// </summary>
/// <code>
/// bool FS_AnyMask_bool( mask32v m )
/// </code>
#define FS_AnyMask_bool( ... ) FS::AnyMask_bool( __VA_ARGS__ )
// FMA
/// <summary>
/// return ( (a * b) + c )
/// </summary>
/// <code>
/// float32v FS_FMulAdd_f32( float32v a, float32v b, float32v c )
/// </code>
#define FS_FMulAdd_f32( ... ) FastSIMD::FMulAdd_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( -(a * b) + c )
/// </summary>
/// <code>
/// float32v FS_FNMulAdd_f32( float32v a, float32v b, float32v c )
/// </code>
#define FS_FNMulAdd_f32( ... ) FastSIMD::FNMulAdd_f32<FS>( __VA_ARGS__ )
// Masked float
/// <summary>
/// return ( m ? (a + b) : a )
/// </summary>
/// <code>
/// float32v FS_MaskedAdd_f32( float32v a, float32v b, mask32v m )
/// </code>
#define FS_MaskedAdd_f32( ... ) FastSIMD::MaskedAdd_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? (a - b) : a )
/// </summary>
/// <code>
/// float32v FS_MaskedSub_f32( float32v a, float32v b, mask32v m )
/// </code>
#define FS_MaskedSub_f32( ... ) FastSIMD::MaskedSub_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? (a * b) : a )
/// </summary>
/// <code>
/// float32v FS_MaskedMul_f32( float32v a, float32v b, mask32v m )
/// </code>
#define FS_MaskedMul_f32( ... ) FastSIMD::MaskedMul_f32<FS>( __VA_ARGS__ )
// Masked int32
/// <summary>
/// return ( m ? (a + b) : a )
/// </summary>
/// <code>
/// int32v FS_MaskedAdd_i32( int32v a, int32v b, mask32v m )
/// </code>
#define FS_MaskedAdd_i32( ... ) FastSIMD::MaskedAdd_i32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? (a - b) : a )
/// </summary>
/// <code>
/// int32v FS_MaskedSub_i32( int32v a, int32v b, mask32v m )
/// </code>
#define FS_MaskedSub_i32( ... ) FastSIMD::MaskedSub_i32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? (a * b) : a )
/// </summary>
/// <code>
/// int32v FS_MaskedMul_i32( int32v a, int32v b, mask32v m )
/// </code>
#define FS_MaskedMul_i32( ... ) FastSIMD::MaskedMul_i32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? (a + 1) : a )
/// </summary>
/// <code>
/// int32v FS_MaskedIncrement_i32( int32v a, mask32v m )
/// </code>
#define FS_MaskedIncrement_i32( ... ) FastSIMD::MaskedIncrement_i32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? (a - 1) : a )
/// </summary>
/// <code>
/// int32v FS_MaskedDecrement_i32( int32v a, mask32v m )
/// </code>
#define FS_MaskedDecrement_i32( ... ) FastSIMD::MaskedDecrement_i32<FS>( __VA_ARGS__ )
// NMasked float
/// <summary>
/// return ( m ? a : (a + b) )
/// </summary>
/// <code>
/// float32v FS_NMaskedAdd_f32( float32v a, float32v b, mask32v m )
/// </code>
#define FS_NMaskedAdd_f32( ... ) FastSIMD::NMaskedAdd_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? a : (a - b) )
/// </summary>
/// <code>
/// float32v FS_NMaskedSub_f32( float32v a, float32v b, mask32v m )
/// </code>
#define FS_NMaskedSub_f32( ... ) FastSIMD::NMaskedSub_f32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? a : (a * b) )
/// </summary>
/// <code>
/// float32v FS_NMaskedMul_f32( float32v a, float32v b, mask32v m )
/// </code>
#define FS_NMaskedMul_f32( ... ) FastSIMD::NMaskedMul_f32<FS>( __VA_ARGS__ )
// NMasked int32
/// <summary>
/// return ( m ? a : (a + b) )
/// </summary>
/// <code>
/// int32v FS_NMaskedAdd_i32( int32v a, int32v b, mask32v m )
/// </code>
#define FS_NMaskedAdd_i32( ... ) FastSIMD::NMaskedAdd_i32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? a : (a - b) )
/// </summary>
/// <code>
/// int32v FS_NMaskedSub_i32( int32v a, int32v b, mask32v m )
/// </code>
#define FS_NMaskedSub_i32( ... ) FastSIMD::NMaskedSub_i32<FS>( __VA_ARGS__ )
/// <summary>
/// return ( m ? a : (a * b) )
/// </summary>
/// <code>
/// int32v FS_NMaskedMul_i32( int32v a, int32v b, mask32v m )
/// </code>
#define FS_NMaskedMul_i32( ... ) FastSIMD::NMaskedMul_i32<FS>( __VA_ARGS__ )
namespace FastSIMD
{
//FMA
template<typename FS>
FS_INLINE typename FS::float32v FMulAdd_f32( typename FS::float32v a, typename FS::float32v b, typename FS::float32v c )
{
return (a * b) + c;
}
template<typename FS>
FS_INLINE typename FS::float32v FNMulAdd_f32( typename FS::float32v a, typename FS::float32v b, typename FS::float32v c )
{
return -(a * b) + c;
}
// Masked float
template<typename FS>
FS_INLINE typename FS::float32v MaskedAdd_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
{
return a + FS::Mask_f32( b, m );
}
template<typename FS>
FS_INLINE typename FS::float32v MaskedSub_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
{
return a - FS::Mask_f32( b, m );
}
template<typename FS>
FS_INLINE typename FS::float32v MaskedMul_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
{
return a * FS::Mask_f32( b, m );
}
// Masked int32
template<typename FS>
FS_INLINE typename FS::int32v MaskedAdd_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
{
return a + FS::Mask_i32( b, m );
}
template<typename FS>
FS_INLINE typename FS::int32v MaskedSub_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
{
return a - FS::Mask_i32( b, m );
}
template<typename FS>
FS_INLINE typename FS::int32v MaskedMul_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
{
return a * FS::Mask_i32( b, m );
}
// NMasked float
template<typename FS>
FS_INLINE typename FS::float32v NMaskedAdd_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
{
return a + FS::NMask_f32( b, m );
}
template<typename FS>
FS_INLINE typename FS::float32v NMaskedSub_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
{
return a - FS::NMask_f32( b, m );
}
template<typename FS>
FS_INLINE typename FS::float32v NMaskedMul_f32( typename FS::float32v a, typename FS::float32v b, typename FS::mask32v m )
{
return a * FS::NMask_f32( b, m );
}
// NMasked int32
template<typename FS>
FS_INLINE typename FS::int32v NMaskedAdd_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
{
return a + FS::NMask_i32( b, m );
}
template<typename FS>
FS_INLINE typename FS::int32v NMaskedSub_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
{
return a - FS::NMask_i32( b, m );
}
template<typename FS>
FS_INLINE typename FS::int32v NMaskedMul_i32( typename FS::int32v a, typename FS::int32v b, typename FS::mask32v m )
{
return a * FS::NMask_i32( b, m );
}
template<typename FS, std::enable_if_t<std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
FS_INLINE typename FS::int32v MaskedIncrement_i32( typename FS::int32v a, typename FS::mask32v m )
{
return a - m;
}
template<typename FS, std::enable_if_t<!std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
FS_INLINE typename FS::int32v MaskedIncrement_i32( typename FS::int32v a, typename FS::mask32v m )
{
return MaskedSub_i32<FS>( a, typename FS::int32v( -1 ), m );
}
template<typename FS, std::enable_if_t<std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
FS_INLINE typename FS::int32v MaskedDecrement_i32( typename FS::int32v a, typename FS::mask32v m )
{
return a + m;
}
template<typename FS, std::enable_if_t<!std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
FS_INLINE typename FS::int32v MaskedDecrement_i32( typename FS::int32v a, typename FS::mask32v m )
{
return MaskedAdd_i32<FS>( a, typename FS::int32v( -1 ), m );
}
// Bitwise
template<typename FS, std::enable_if_t<std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
FS_INLINE typename FS::mask32v BitwiseAndNot_m32( typename FS::mask32v a, typename FS::mask32v b )
{
return FS::BitwiseAndNot_i32( a, b );
}
template<typename FS, std::enable_if_t<!std::is_same_v<typename FS::int32v, typename FS::mask32v>>* = nullptr>
FS_INLINE typename FS::mask32v BitwiseAndNot_m32( typename FS::mask32v a, typename FS::mask32v b )
{
return a & (~b);
}
// Trig
template<typename FS>
FS_INLINE typename FS::float32v Cos_f32( typename FS::float32v value )
{
typedef typename FS::int32v int32v;
typedef typename FS::float32v float32v;
typedef typename FS::mask32v mask32v;
value = FS_Abs_f32( value );
value -= FS_Floor_f32( value * float32v( 0.1591549f ) ) * float32v( 6.283185f );
mask32v geHalfPi = value >= float32v( 1.570796f );
mask32v geHalfPi2 = value >= float32v( 3.141593f );
mask32v geHalfPi3 = value >= float32v( 4.7123889f );
float32v cosAngle = value ^ FS_Mask_f32( ( value ^ float32v( 3.141593f ) - value ), geHalfPi );
cosAngle = cosAngle ^ FS_Mask_f32( FS_Casti32_f32( int32v( 0x80000000 ) ), geHalfPi2 );
cosAngle = cosAngle ^ FS_Mask_f32( cosAngle ^ ( float32v( 6.283185f ) - value ), geHalfPi3 );
cosAngle *= cosAngle;
cosAngle = FS_FMulAdd_f32( cosAngle, FS_FMulAdd_f32( cosAngle, float32v( 0.03679168f ), float32v( -0.49558072f ) ), float32v( 0.99940307f ) );
return cosAngle ^ FS_Mask_f32( FS_Casti32_f32( int32v( 0x80000000 ) ), FS_BitwiseAndNot_m32( geHalfPi, geHalfPi3 ) );
}
template<typename FS>
FS_INLINE typename FS::float32v Sin_f32( typename FS::float32v value )
{
return Cos_f32<FS>( typename FS::float32v( 1.570796f ) - value );
}
template<typename FS>
FS_INLINE typename FS::float32v Exp_f32( typename FS::float32v x )
{
typedef typename FS::int32v int32v;
typedef typename FS::float32v float32v;
x = FS_Min_f32( x, float32v( 88.3762626647949f ) );
x = FS_Max_f32( x, float32v( -88.3762626647949f ) );
/* express exp(x) as exp(g + n*log(2)) */
float32v fx = x * float32v( 1.44269504088896341f );
fx += float32v( 0.5f );
float32v flr = FS_Floor_f32( fx );
fx = FS_MaskedSub_f32( flr, float32v( 1 ), flr > fx );
x -= fx * float32v( 0.693359375f );
x -= fx * float32v( -2.12194440e-4f );
float32v y( 1.9875691500E-4f );
y *= x;
y += float32v( 1.3981999507E-3f );
y *= x;
y += float32v( 8.3334519073E-3f );
y *= x;
y += float32v( 4.1665795894E-2f );
y *= x;
y += float32v( 1.6666665459E-1f );
y *= x;
y += float32v( 5.0000001201E-1f );
y *= x * x;
y += x + float32v( 1 );
/* build 2^n */
int32v i = FS_Convertf32_i32( fx );
// another two AVX2 instructions
i += int32v( 0x7f );
i <<= 23;
float32v pow2n = FS_Casti32_f32( i );
return y * pow2n;
}
template<typename FS>
FS_INLINE typename FS::float32v Log_f32( typename FS::float32v x )
{
typedef typename FS::int32v int32v;
typedef typename FS::float32v float32v;
typedef typename FS::mask32v mask32v;
mask32v validMask = x > float32v( 0 );
x = FS_Max_f32( x, FS_Casti32_f32( int32v( 0x00800000 ) ) ); /* cut off denormalized stuff */
// can be done with AVX2
int32v i = FS_BitwiseShiftRightZX_i32( FS_Castf32_i32( x ), 23 );
/* keep only the fractional part */
x &= FS_Casti32_f32( int32v( ~0x7f800000 ) );
x |= float32v( 0.5f );
// this is again another AVX2 instruction
i -= int32v( 0x7f );
float32v e = FS_Converti32_f32( i );
e += float32v( 1 );
mask32v mask = x < float32v( 0.707106781186547524f );
x = FS_MaskedAdd_f32( x, x, mask );
x -= float32v( 1 );
e = FS_MaskedSub_f32( e, float32v( 1 ), mask );
float32v y = float32v( 7.0376836292E-2f );
y *= x;
y += float32v( -1.1514610310E-1f );
y *= x;
y += float32v( 1.1676998740E-1f );
y *= x;
y += float32v( -1.2420140846E-1f );
y *= x;
y += float32v( 1.4249322787E-1f );
y *= x;
y += float32v( -1.6668057665E-1f );
y *= x;
y += float32v( 2.0000714765E-1f );
y *= x;
y += float32v( -2.4999993993E-1f );
y *= x;
y += float32v( 3.3333331174E-1f );
y *= x;
float32v xx = x * x;
y *= xx;
y *= e * float32v( -2.12194440e-4f );
y -= xx * float32v( 0.5f );
x += y;
x += e * float32v( 0.693359375f );
return FS_Mask_f32( x, validMask );
}
template<typename FS>
FS_INLINE typename FS::float32v Pow_f32( typename FS::float32v value, typename FS::float32v pow )
{
return Exp_f32<FS>( pow * Log_f32<FS>( value ) );
}
}

View File

@ -0,0 +1,10 @@
#pragma once
#include "FunctionList.h"
template<typename CLASS, typename FS>
class FS_T;
#define FASTSIMD_DECLARE_FS_TYPES \
using float32v = typename FS::float32v;\
using int32v = typename FS::int32v;\
using mask32v = typename FS::mask32v

View File

@ -0,0 +1,37 @@
#pragma once
#include "FastSIMD.h"
namespace FastSIMD
{
template<eLevel... T>
struct SIMDTypeContainer
{
static constexpr eLevel MinimumCompiled = Level_Null;
template<eLevel L>
static constexpr eLevel GetNextCompiledAfter = Level_Null;
};
template<eLevel HEAD, eLevel... TAIL>
struct SIMDTypeContainer<HEAD, TAIL...>
{
static constexpr eLevel MinimumCompiled = (HEAD & COMPILED_SIMD_LEVELS) != 0 ? HEAD : SIMDTypeContainer<TAIL...>::MinimumCompiled;
template<eLevel L>
static constexpr eLevel GetNextCompiledAfter = (L == HEAD) ? SIMDTypeContainer<TAIL...>::MinimumCompiled : SIMDTypeContainer<TAIL...>::template GetNextCompiledAfter<L>;
};
using SIMDTypeList = SIMDTypeContainer<
Level_Scalar,
Level_SSE,
Level_SSE2,
Level_SSE3,
Level_SSSE3,
Level_SSE41,
Level_SSE42,
Level_AVX,
Level_AVX2,
Level_AVX512,
Level_NEON>;
}

View File

@ -0,0 +1,95 @@
set(CMAKE_CXX_STANDARD 17)
file(GLOB_RECURSE FastSIMD_headers "../include/FastSIMD/*.h")
file(GLOB_RECURSE FastSIMD_include_inl "../include/FastSIMD/*.inl")
file(GLOB FastSIMD_inline "FastSIMD/*.inl")
file(GLOB_RECURSE FastSIMD_internal_headers "FastSIMD/Internal/*.h")
file(GLOB_RECURSE FastSIMD_internal_inl "FastSIMD/Internal/*.inl")
list(APPEND FastSIMD_headers ${FastSIMD_inline})
list(APPEND FastSIMD_headers ${FastSIMD_include_inl})
list(APPEND FastSIMD_internal_headers ${FastSIMD_internal_inl})
set(FastSIMD_sources
FastSIMD/FastSIMD.cpp
FastSIMD/FastSIMD_Level_AVX2.cpp
FastSIMD/FastSIMD_Level_AVX512.cpp
FastSIMD/FastSIMD_Level_NEON.cpp
FastSIMD/FastSIMD_Level_Scalar.cpp
FastSIMD/FastSIMD_Level_SSE2.cpp
FastSIMD/FastSIMD_Level_SSE3.cpp
FastSIMD/FastSIMD_Level_SSE41.cpp
FastSIMD/FastSIMD_Level_SSE42.cpp
FastSIMD/FastSIMD_Level_SSSE3.cpp
)
file(GLOB FastNoise_headers "../include/FastNoise/*.h")
file(GLOB FastNoise_inl "../include/FastNoise/*.inl")
file(GLOB_RECURSE FastNoise_generators_headers "../include/FastNoise/Generators/*.h")
file(GLOB_RECURSE FastNoise_generators_inl "../include/FastNoise/Generators/*.inl")
list(APPEND FastNoise_headers ${FastNoise_inl})
list(APPEND FastNoise_generators_headers ${FastNoise_generators_inl})
set(FastNoise_source
FastNoise/FastNoiseMetadata.cpp
)
source_group("SIMD" FILES ${FastSIMD_headers})
source_group("SIMD" FILES ${FastSIMD_sources})
source_group("SIMD\\internals" FILES ${FastSIMD_internal_headers})
source_group("FastNoise" FILES ${FastNoise_headers})
source_group("FastNoise" FILES ${FastNoise_source})
source_group("FastNoise\\Generators" FILES ${FastNoise_generators_headers})
add_library(FastNoise
${FastNoise_headers}
${FastNoise_source}
${FastNoise_generators_headers}
${FastSIMD_headers}
${FastSIMD_internal_headers}
${FastSIMD_sources}
)
set(install_targets ${install_targets} FastNoise PARENT_SCOPE)
set(install_fastnoise_headers ${FastNoise_headers} PARENT_SCOPE)
set(install_fastsimd_headers ${FastSIMD_headers} PARENT_SCOPE)
target_include_directories(FastNoise PUBLIC
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>
)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
target_compile_options(FastNoise PRIVATE /GL- /GS- /fp:fast)
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
set_source_files_properties(FastSIMD/FastSIMD_Level_Scalar.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE2.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE2")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE3.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE2")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSSE3.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE2")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE41.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE2")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE42.cpp PROPERTIES COMPILE_FLAGS "/arch:SSE2")
endif()
set_source_files_properties(FastSIMD/FastSIMD_Level_AVX2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2")
set_source_files_properties(FastSIMD/FastSIMD_Level_AVX512.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX512")
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
if(MSVC)
target_compile_options(FastNoise PRIVATE /GS- /fp:fast)
else()
target_compile_options(FastNoise PRIVATE "-ffast-math")
endif()
if(CMAKE_SIZEOF_VOID_P EQUAL 4 OR "${CMAKE_CXX_FLAGS}" MATCHES "-m32")
set_source_files_properties(FastSIMD/FastSIMD_Level_Scalar.cpp PROPERTIES COMPILE_FLAGS "-msse")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE2.cpp PROPERTIES COMPILE_FLAGS "-msse2")
endif()
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE3.cpp PROPERTIES COMPILE_FLAGS "-msse3")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSSE3.cpp PROPERTIES COMPILE_FLAGS "-mssse3")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE41.cpp PROPERTIES COMPILE_FLAGS "-msse4.1")
set_source_files_properties(FastSIMD/FastSIMD_Level_SSE42.cpp PROPERTIES COMPILE_FLAGS "-msse4.2")
set_source_files_properties(FastSIMD/FastSIMD_Level_AVX2.cpp PROPERTIES COMPILE_FLAGS "-mavx2 -mfma")
set_source_files_properties(FastSIMD/FastSIMD_Level_AVX512.cpp PROPERTIES COMPILE_FLAGS "-mavx512f -mavx512dq -mfma")
endif()

View File

@ -0,0 +1,127 @@
#pragma once
#include <cstring>
#include <string>
#include <vector>
#include <cstdint>
namespace FastNoise
{
/** https://gist.github.com/tomykaira/f0fd86b6c73063283afe550bc5d77594
* The MIT License (MIT)
* Copyright (c) 2016 tomykaira
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
namespace Base64
{
static std::string Encode( const std::vector<uint8_t>& data )
{
static constexpr char sEncodingTable[] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '0', '1', '2', '3',
'4', '5', '6', '7', '8', '9', '+', '/'
};
size_t in_len = data.size();
size_t out_len = 4 * ((in_len + 2) / 3);
std::string ret( out_len, '\0' );
size_t i;
char* p = const_cast<char*>(ret.c_str());
for( i = 0; i < in_len - 2; i += 3 )
{
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
*p++ = sEncodingTable[((data[i] & 0x3) << 4) | ((int)(data[i + 1] & 0xF0) >> 4)];
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2) | ((int)(data[i + 2] & 0xC0) >> 6)];
*p++ = sEncodingTable[data[i + 2] & 0x3F];
}
if( i < in_len )
{
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
if( i == (in_len - 1) )
{
*p++ = sEncodingTable[((data[i] & 0x3) << 4)];
*p++ = '=';
}
else
{
*p++ = sEncodingTable[((data[i] & 0x3) << 4) | ((int)(data[i + 1] & 0xF0) >> 4)];
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2)];
}
*p++ = '=';
}
return ret;
}
static std::vector<uint8_t> Decode( const char* input )
{
static constexpr unsigned char kDecodingTable[] = {
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 64, 64, 64,
64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64,
64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
};
size_t in_len = std::strlen( input );
if( in_len % 4 != 0 ) return {};
size_t out_len = in_len / 4 * 3;
if( input[in_len - 1] == '=' ) out_len--;
if( input[in_len - 2] == '=' ) out_len--;
std::vector<uint8_t> out( out_len );
for( size_t i = 0, j = 0; i < in_len; )
{
uint32_t a = input[i] == '=' ? 0 & i++ : kDecodingTable[static_cast<int>(input[i++])];
uint32_t b = input[i] == '=' ? 0 & i++ : kDecodingTable[static_cast<int>(input[i++])];
uint32_t c = input[i] == '=' ? 0 & i++ : kDecodingTable[static_cast<int>(input[i++])];
uint32_t d = input[i] == '=' ? 0 & i++ : kDecodingTable[static_cast<int>(input[i++])];
uint32_t triple = (a << 3 * 6) + (b << 2 * 6) + (c << 1 * 6) + (d << 0 * 6);
if( j < out_len ) out[j++] = (triple >> 2 * 8) & 0xFF;
if( j < out_len ) out[j++] = (triple >> 1 * 8) & 0xFF;
if( j < out_len ) out[j++] = (triple >> 0 * 8) & 0xFF;
}
return out;
}
};
}

View File

@ -0,0 +1,390 @@
#include "FastNoise/FastNoiseMetadata.h"
#include "Base64.h"
#include <unordered_set>
#include <unordered_map>
#include <cassert>
#include <cstdint>
using namespace FastNoise;
std::vector<const Metadata*> Metadata::sMetadataClasses;
NodeData::NodeData( const Metadata* data )
{
metadata = data;
if( metadata )
{
for( const auto& value : metadata->memberVariables )
{
variables.push_back( value.valueDefault );
}
for( const auto& value : metadata->memberNodes )
{
(void)value;
nodes.push_back( nullptr );
}
for( const auto& value : metadata->memberHybrids )
{
hybrids.emplace_back( nullptr, value.valueDefault );
}
}
}
template<typename T>
void AddToDataStream( std::vector<uint8_t>& dataStream, T value )
{
for( size_t i = 0; i < sizeof( T ); i++ )
{
dataStream.push_back( (uint8_t)(value >> (i * 8)) );
}
}
bool SerialiseNodeDataInternal( NodeData* nodeData, bool fixUp, std::vector<uint8_t>& dataStream, std::unordered_map<const NodeData*, uint16_t>& referenceIds, std::unordered_set<const NodeData*> dependancies = {} )
{
const Metadata* metadata = nodeData->metadata;
if( !metadata ||
nodeData->variables.size() != metadata->memberVariables.size() ||
nodeData->nodes.size() != metadata->memberNodes.size() ||
nodeData->hybrids.size() != metadata->memberHybrids.size() )
{
assert( 0 ); // Member size mismatch with metadata
return false;
}
if( fixUp )
{
dependancies.insert( nodeData );
for( auto& node : nodeData->nodes )
{
if( dependancies.find( node ) != dependancies.end() )
{
node = nullptr;
}
}
for( auto& hybrid : nodeData->hybrids )
{
if( dependancies.find( hybrid.first ) != dependancies.end() )
{
hybrid.first = nullptr;
}
}
}
auto reference = referenceIds.find( nodeData );
if( reference != referenceIds.end() )
{
AddToDataStream( dataStream, UINT16_MAX );
AddToDataStream( dataStream, reference->second );
return true;
}
AddToDataStream( dataStream, metadata->id );
for( size_t i = 0; i < metadata->memberVariables.size(); i++ )
{
AddToDataStream( dataStream, nodeData->variables[i].i );
}
for( size_t i = 0; i < metadata->memberNodes.size(); i++ )
{
if( fixUp && nodeData->nodes[i] )
{
std::unique_ptr<Generator> gen( metadata->NodeFactory() );
SmartNode<> node( nodeData->nodes[i]->metadata->NodeFactory() );
if( !metadata->memberNodes[i].setFunc( gen.get(), node ) )
{
nodeData->nodes[i] = nullptr;
return false;
}
}
if( !nodeData->nodes[i] || !SerialiseNodeDataInternal( nodeData->nodes[i], fixUp, dataStream, referenceIds, dependancies ) )
{
return false;
}
}
for( size_t i = 0; i < metadata->memberHybrids.size(); i++ )
{
if( !nodeData->hybrids[i].first )
{
AddToDataStream( dataStream, (uint8_t)0 );
Metadata::MemberVariable::ValueUnion v = nodeData->hybrids[i].second;
AddToDataStream( dataStream, v.i );
}
else
{
if( fixUp )
{
std::unique_ptr<Generator> gen( metadata->NodeFactory() );
std::shared_ptr<Generator> node( nodeData->hybrids[i].first->metadata->NodeFactory() );
if( !metadata->memberHybrids[i].setNodeFunc( gen.get(), node ) )
{
nodeData->hybrids[i].first = nullptr;
return false;
}
}
AddToDataStream( dataStream, (uint8_t)1 );
if( !SerialiseNodeDataInternal( nodeData->hybrids[i].first, fixUp, dataStream, referenceIds, dependancies ) )
{
return false;
}
}
}
referenceIds.emplace( nodeData, (uint16_t)referenceIds.size() );
return true;
}
std::string Metadata::SerialiseNodeData( NodeData* nodeData, bool fixUp )
{
std::vector<uint8_t> serialData;
std::unordered_map<const NodeData*, uint16_t> referenceIds;
if( !SerialiseNodeDataInternal( nodeData, fixUp, serialData, referenceIds ) )
{
return "";
}
return Base64::Encode( serialData );
}
template<typename T>
bool GetFromDataStream( const std::vector<uint8_t>& dataStream, size_t& idx, T& value )
{
if( dataStream.size() < idx + sizeof( T ) )
{
return false;
}
value = *reinterpret_cast<const T*>( dataStream.data() + idx );
idx += sizeof( T );
return true;
}
SmartNode<> DeserialiseSmartNodeInternal( const std::vector<uint8_t>& serialisedNodeData, size_t& serialIdx, std::unordered_map<uint16_t, SmartNode<>>& referenceNodes, FastSIMD::eLevel level = FastSIMD::Level_Null )
{
uint16_t nodeId;
if( !GetFromDataStream( serialisedNodeData, serialIdx, nodeId ) )
{
return nullptr;
}
if( nodeId == UINT16_MAX )
{
uint16_t referenceId;
if( !GetFromDataStream( serialisedNodeData, serialIdx, referenceId ) )
{
return nullptr;
}
auto refNode = referenceNodes.find( referenceId );
if( refNode == referenceNodes.end() )
{
return nullptr;
}
return refNode->second;
}
const Metadata* metadata = Metadata::GetMetadataClass( nodeId );
if( !metadata )
{
return nullptr;
}
SmartNode<> generator( metadata->NodeFactory( level ) );
for( const auto& var : metadata->memberVariables )
{
Metadata::MemberVariable::ValueUnion v;
if( !GetFromDataStream( serialisedNodeData, serialIdx, v ) )
{
return nullptr;
}
var.setFunc( generator.get(), v );
}
for( const auto& node : metadata->memberNodes )
{
SmartNode<> nodeGen = DeserialiseSmartNodeInternal( serialisedNodeData, serialIdx, referenceNodes, level );
if( !nodeGen || !node.setFunc( generator.get(), nodeGen ) )
{
return nullptr;
}
}
for( const auto& hybrid : metadata->memberHybrids )
{
uint8_t isGenerator;
if( !GetFromDataStream( serialisedNodeData, serialIdx, isGenerator ) || isGenerator > 1 )
{
return nullptr;
}
if( isGenerator )
{
SmartNode<> nodeGen = DeserialiseSmartNodeInternal( serialisedNodeData, serialIdx, referenceNodes, level );
if( !nodeGen || !hybrid.setNodeFunc( generator.get(), nodeGen ) )
{
return nullptr;
}
}
else
{
float v;
if( !GetFromDataStream( serialisedNodeData, serialIdx, v ) )
{
return nullptr;
}
hybrid.setValueFunc( generator.get(), v );
}
}
referenceNodes.emplace( (uint16_t)referenceNodes.size(), generator );
return generator;
}
SmartNode<> Metadata::DeserialiseSmartNode( const char* serialisedBase64NodeData, FastSIMD::eLevel level )
{
std::vector<uint8_t> dataStream = Base64::Decode( serialisedBase64NodeData );
size_t startIdx = 0;
std::unordered_map<uint16_t, SmartNode<>> referenceNodes;
return DeserialiseSmartNodeInternal( dataStream, startIdx, referenceNodes, level );
}
NodeData* DeserialiseNodeDataInternal( const std::vector<uint8_t>& serialisedNodeData, std::vector<std::unique_ptr<NodeData>>& nodeDataOut, size_t& serialIdx, std::unordered_map<uint16_t, NodeData*>& referenceNodes )
{
uint16_t nodeId;
if( !GetFromDataStream( serialisedNodeData, serialIdx, nodeId ) )
{
return nullptr;
}
if( nodeId == UINT16_MAX )
{
uint16_t referenceId;
if( !GetFromDataStream( serialisedNodeData, serialIdx, referenceId ) )
{
return nullptr;
}
auto refNode = referenceNodes.find( referenceId );
if( refNode == referenceNodes.end() )
{
return nullptr;
}
return refNode->second;
}
const Metadata* metadata = Metadata::GetMetadataClass( nodeId );
if( !metadata )
{
return nullptr;
}
std::unique_ptr<NodeData> nodeData( new NodeData( metadata ) );
for( auto& var : nodeData->variables )
{
if( !GetFromDataStream( serialisedNodeData, serialIdx, var ) )
{
return nullptr;
}
}
for( auto& node : nodeData->nodes )
{
node = DeserialiseNodeDataInternal( serialisedNodeData, nodeDataOut, serialIdx, referenceNodes );
if( !node )
{
return nullptr;
}
}
for( auto& hybrid : nodeData->hybrids )
{
uint8_t isGenerator;
if( !GetFromDataStream( serialisedNodeData, serialIdx, isGenerator ) || isGenerator > 1 )
{
return nullptr;
}
if( isGenerator )
{
hybrid.first = DeserialiseNodeDataInternal( serialisedNodeData, nodeDataOut, serialIdx, referenceNodes );
if( !hybrid.first )
{
return nullptr;
}
}
else
{
if( !GetFromDataStream( serialisedNodeData, serialIdx, hybrid.second ) )
{
return nullptr;
}
}
}
referenceNodes.emplace( (uint16_t)referenceNodes.size(), nodeData.get() );
return nodeDataOut.emplace_back( std::move( nodeData ) ).get();
}
NodeData* Metadata::DeserialiseNodeData( const char* serialisedBase64NodeData, std::vector<std::unique_ptr<NodeData>>& nodeDataOut )
{
std::vector<uint8_t> dataStream = Base64::Decode( serialisedBase64NodeData );
size_t startIdx = 0;
std::unordered_map<uint16_t, NodeData*> referenceNodes;
return DeserialiseNodeDataInternal( dataStream, nodeDataOut, startIdx, referenceNodes );
}
#define FASTSIMD_BUILD_CLASS2( CLASS ) \
const CLASS::Metadata g ## CLASS ## Metadata( #CLASS );\
const FastNoise::Metadata* CLASS::GetMetadata() const\
{\
return &g ## CLASS ## Metadata;\
}\
Generator* CLASS::Metadata::NodeFactory( FastSIMD::eLevel l ) const\
{\
return FastSIMD::New<CLASS>( l );\
}
#define FASTSIMD_BUILD_CLASS( CLASS ) FASTSIMD_BUILD_CLASS2( CLASS )
#define FASTNOISE_CLASS( CLASS ) CLASS
#define FASTSIMD_INCLUDE_HEADER_ONLY
#include "FastNoise/FastNoise_BuildList.inl"

View File

@ -0,0 +1,17 @@
#include "FS_Class.inl"
#ifdef FASTSIMD_INCLUDE_CHECK
#include __FILE__
#endif
#include "FS_Class.inl"
#pragma once
FASTSIMD_CLASS_DECLARATION( Example )
{
FASTSIMD_CLASS_SETUP( FastSIMD::Level_AVX2 | FastSIMD::Level_SSE41 | FastSIMD::Level_SSE2 | FastSIMD::Level_Scalar );
public:
FS_EXTERNAL_FUNC( void DoStuff( int* data ) );
FS_EXTERNAL_FUNC( void DoArray( int* data0, int* data1, int size ) );
};

View File

@ -0,0 +1,125 @@
#define FASTSIMD_INTELLISENSE
#include "Example.h"
//template<typename T>// Generic function, used if no specialised function found
//FS_CLASS( Example ) < T, FS_SIMD_CLASS::SIMD_Level >::FS_CLASS( Example )()
//{
// int test = 1;
//
// test += test;
//}
template<typename F, FastSIMD::ELevel S> // Generic function, used if no specialised function found
void FS_CLASS( Example )<F, S>::DoStuff( int* data )
{
int32v a = int32v( 1 );
FS_Store_i32( data, a );
}
//template<typename CLASS_T, typename SIMD_T> // Different function for level SSE2 or AVX2
//void FS_CLASS( Example )::DoStuff( int* data )
//{
// int32v a = _mm_loadu_si128( reinterpret_cast<__m128i const*>(data) );
//
// a += _mm_set_epi32( 2, 3, 4, 5 );
//
// a -= _mm_castps_si128( FS_VecZero_f32( ) );
//
// FS_Store_i32( data, a );
//}
//
//
//template<typename CLASS_T, FastSIMD::Level LEVEL_T>
//void FS_CLASS( Example )::DoArray( int* data0, int* data1, int size )
//{
// for ( int i = 0; i < size; i += FS_VectorSize_i32() )
// {
// int32v a = FS_Load_i32( &data0[i] );
// int32v b = FS_Load_i32( &data1[i] );
//
// a *= b;
//
// a <<= 1;
//
// a -= FS_VecZero_i32();
//
// (~a);
//
// FS_Store_i32( &data0[i], a );
// }
//}
template<typename F, FastSIMD::ELevel S>
void FS_CLASS( Example )<F, S>::DoArray( int* data0, int* data1, int size )
{
for ( size_t i = 0; i < size; i += int32v::FS_Size() )
{
int32v a = FS_Load_i32( &data0[i] );
int32v b = FS_Load_i32( &data1[i] );
a += b;
a <<= 1;
a *= b;
a -= int32v::FS_Zero();
(~a);
FS_Store_i32( &data0[i], a );
}
}
template<typename T_FS>
class FS_CLASS( Example )<T_FS, FastSIMD::Level_AVX2> : public FS_CLASS( Example )<T_FS, FastSIMD::Level_Null>
{
//typedef FastSIMD_AVX2 T_FS;
FASTSIMD_CLASS_SETUP( FastSIMD::COMPILED_SIMD_LEVELS );
public:
void DoArray( int* data0, int* data1, int size )
{
for ( size_t i = 0; i < size; i += int32v::FS_Size() )
{
int32v a = FS_Load_i32( &data0[i] );
int32v b = FS_Load_i32( &data1[i] );
//a += gfhfdghdfgh();
a += b;
a <<= 2;
a *= b;
a -= int32v::FS_Zero();
(~a);
FS_Store_i32( &data0[i], a );
}
}
};
//
//template<typename T>
//typename std::enable_if<(T::SIMD_Level <= 1)>::type FS_CLASS( Example )<T, FS_SIMD_CLASS::SIMD_Level>::DoArray( int* data0, int* data1, int size )
//{
// for ( int i = 0; i < size; i += FS_VectorSize_i32() )
// {
// int32v a = FS_Load_i32( &data0[i] );
// int32v b = FS_Load_i32( &data1[i] );
//
// a += b;
//
// a <<= 1;
//
// a -= FS_VecZero_i32();
//
// (~a);
//
// FS_Store_i32( &data0[i], a );
// }
//}

View File

@ -0,0 +1,239 @@
#include "FastSIMD/FastSIMD.h"
#include <algorithm>
#include <cstdint>
#ifdef __GNUG__
#include <x86intrin.h>
#else
#include <intrin.h>
#endif
#include "FastSIMD/TypeList.h"
static FastSIMD::eLevel simdLevel = FastSIMD::Level_Null;
static_assert(FastSIMD::SIMDTypeList::MinimumCompiled & FastSIMD::COMPILED_SIMD_LEVELS, "FASTSIMD_FALLBACK_SIMD_LEVEL is not a compiled SIMD level, check FastSIMD_Config.h");
#if FASTSIMD_x86
// Define interface to cpuid instruction.
// input: eax = functionnumber, ecx = 0
// output: eax = output[0], ebx = output[1], ecx = output[2], edx = output[3]
static void cpuid( int output[4], int functionnumber )
{
#if defined( __GNUC__ ) || defined( __clang__ ) // use inline assembly, Gnu/AT&T syntax
int a, b, c, d;
__asm("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "a"(functionnumber), "c"(0) : );
output[0] = a;
output[1] = b;
output[2] = c;
output[3] = d;
#elif defined( _MSC_VER ) || defined ( __INTEL_COMPILER ) // Microsoft or Intel compiler, intrin.h included
__cpuidex( output, functionnumber, 0 ); // intrinsic function for CPUID
#else // unknown platform. try inline assembly with masm/intel syntax
__asm
{
mov eax, functionnumber
xor ecx, ecx
cpuid;
mov esi, output
mov[esi], eax
mov[esi + 4], ebx
mov[esi + 8], ecx
mov[esi + 12], edx
}
#endif
}
// Define interface to xgetbv instruction
static int64_t xgetbv( int ctr )
{
#if (defined( _MSC_FULL_VER ) && _MSC_FULL_VER >= 160040000) || (defined( __INTEL_COMPILER ) && __INTEL_COMPILER >= 1200) // Microsoft or Intel compiler supporting _xgetbv intrinsic
return _xgetbv( ctr ); // intrinsic function for XGETBV
#elif defined( __GNUC__ ) // use inline assembly, Gnu/AT&T syntax
uint32_t a, d;
__asm("xgetbv" : "=a"(a), "=d"(d) : "c"(ctr) : );
return a | (uint64_t( d ) << 32);
#else // #elif defined (_WIN32) // other compiler. try inline assembly with masm/intel/MS syntax
uint32_t a, d;
__asm {
mov ecx, ctr
_emit 0x0f
_emit 0x01
_emit 0xd0; // xgetbv
mov a, eax
mov d, edx
}
return a | (uint64_t( d ) << 32);
#endif
}
#endif
FastSIMD::eLevel FastSIMD::CPUMaxSIMDLevel()
{
if ( simdLevel > Level_Null )
{
return simdLevel;
}
#if FASTSIMD_x86
int abcd[4] = { 0,0,0,0 }; // cpuid results
#if !FASTSIMD_64BIT
simdLevel = Level_Scalar; // default value
cpuid( abcd, 0 ); // call cpuid function 0
if ( abcd[0] == 0 )
return simdLevel; // no further cpuid function supported
cpuid( abcd, 1 ); // call cpuid function 1 for feature flags
if ( (abcd[3] & (1 << 0)) == 0 )
return simdLevel; // no floating point
if ( (abcd[3] & (1 << 23)) == 0 )
return simdLevel; // no MMX
if ( (abcd[3] & (1 << 15)) == 0 )
return simdLevel; // no conditional move
if ( (abcd[3] & (1 << 24)) == 0 )
return simdLevel; // no FXSAVE
if ( (abcd[3] & (1 << 25)) == 0 )
return simdLevel; // no SSE
simdLevel = Level_SSE;
// 1: SSE supported
if ( (abcd[3] & (1 << 26)) == 0 )
return simdLevel; // no SSE2
#else
cpuid( abcd, 1 ); // call cpuid function 1 for feature flags
#endif
simdLevel = Level_SSE2; // default value for 64bit
// 2: SSE2 supported
if ( (abcd[2] & (1 << 0)) == 0 )
return simdLevel; // no SSE3
simdLevel = Level_SSE3;
// 3: SSE3 supported
if ( (abcd[2] & (1 << 9)) == 0 )
return simdLevel; // no SSSE3
simdLevel = Level_SSSE3;
// 4: SSSE3 supported
if ( (abcd[2] & (1 << 19)) == 0 )
return simdLevel; // no SSE4.1
simdLevel = Level_SSE41;
// 5: SSE4.1 supported
if ( (abcd[2] & (1 << 23)) == 0 )
return simdLevel; // no POPCNT
if ( (abcd[2] & (1 << 20)) == 0 )
return simdLevel; // no SSE4.2
simdLevel = Level_SSE42;
// 6: SSE4.2 supported
if ( (abcd[2] & (1 << 26)) == 0 )
return simdLevel; // no XSAVE
if ( (abcd[2] & (1 << 27)) == 0 )
return simdLevel; // no OSXSAVE
if ( (abcd[2] & (1 << 28)) == 0 )
return simdLevel; // no AVX
uint64_t osbv = xgetbv( 0 );
if ( (osbv & 6) != 6 )
return simdLevel; // AVX not enabled in O.S.
simdLevel = Level_AVX;
// 7: AVX supported
cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
if ( (abcd[1] & (1 << 5)) == 0 )
return simdLevel; // no AVX2
simdLevel = Level_AVX2;
// 8: AVX2 supported
if( (osbv & (0xE0)) != 0xE0 )
return simdLevel; // AVX512 not enabled in O.S.
if ( (abcd[1] & (1 << 16)) == 0 )
return simdLevel; // no AVX512
cpuid( abcd, 0xD ); // call cpuid leaf 0xD for feature flags
if ( (abcd[0] & 0x60) != 0x60 )
return simdLevel; // no AVX512
// 9: AVX512 supported
cpuid( abcd, 7 ); // call cpuid leaf 7 for feature flags
if ( (abcd[1] & (1 << 31)) == 0 )
return simdLevel; // no AVX512VL
// 10: AVX512VL supported
if ( (abcd[1] & 0x40020000) != 0x40020000 )
return simdLevel; // no AVX512BW, AVX512DQ
simdLevel = Level_AVX512;
// 11: AVX512BW & AVX512DQ supported
#endif
#if FASTSIMD_ARM
simdLevel = Level_NEON;
#endif
return simdLevel;
}
template<typename CLASS_T, FastSIMD::eLevel SIMD_LEVEL>
CLASS_T* SIMDLevelSelector( FastSIMD::eLevel maxSIMDLevel )
{
if constexpr( ( CLASS_T::Supported_SIMD_Levels & SIMD_LEVEL ) != 0 )
{
CLASS_T* newClass = SIMDLevelSelector<CLASS_T, FastSIMD::SIMDTypeList::GetNextCompiledAfter<SIMD_LEVEL>>( maxSIMDLevel );
if( !newClass && SIMD_LEVEL <= maxSIMDLevel )
{
return FastSIMD::ClassFactory<CLASS_T, SIMD_LEVEL>();
}
return newClass;
}
else
{
if constexpr( SIMD_LEVEL == FastSIMD::Level_Null )
{
return nullptr;
}
return SIMDLevelSelector<CLASS_T, FastSIMD::SIMDTypeList::GetNextCompiledAfter<SIMD_LEVEL>>( maxSIMDLevel );
}
}
template<typename CLASS_T>
CLASS_T* FastSIMD::New( eLevel maxSIMDLevel )
{
if( maxSIMDLevel == Level_Null )
{
maxSIMDLevel = CPUMaxSIMDLevel();
}
else
{
maxSIMDLevel = std::min( maxSIMDLevel, CPUMaxSIMDLevel() );
}
static_assert(( CLASS_T::Supported_SIMD_Levels & FastSIMD::SIMDTypeList::MinimumCompiled ), "MinimumCompiled SIMD Level must be supported by this class" );
return SIMDLevelSelector<CLASS_T, SIMDTypeList::MinimumCompiled>( maxSIMDLevel );
}
#define FASTSIMD_BUILD_CLASS( CLASS ) \
template CLASS* FastSIMD::New( FastSIMD::eLevel );
#define FASTSIMD_INCLUDE_HEADER_ONLY
#include "FastSIMD_BuildList.inl"

View File

@ -0,0 +1,10 @@
#pragma once
#ifndef FASTSIMD_BUILD_CLASS
#error Do not include this file
#endif
//#include "Example/Example.inl"
//FASTSIMD_BUILD_CLASS( Example )
#include "FastNoise/FastNoise_BuildList.inl"

View File

@ -0,0 +1,17 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_AVX2
// To compile AVX2 support enable AVX(2) code generation compiler flags for this file
#ifndef __AVX__
#ifdef _MSC_VER
#error To compile AVX set C++ code generation to use /arch:AVX on FastSIMD_Level_AVX2.cpp, or change "#define FASTSIMD_COMPILE_AVX2" in FastSIMD_Config.h
#else
#error To compile AVX add build command "-march=core-avx" on FastSIMD_Level_AVX2.cpp, or change "#define FASTSIMD_COMPILE_AVX2" in FastSIMD_Config.h
#endif
#endif
#include "Internal/AVX.h"
#define FS_SIMD_CLASS FastSIMD::AVX2
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,17 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_AVX512
// To compile AVX512 support enable AVX512 code generation compiler flags for this file
#ifndef __AVX512DQ__
#ifdef _MSC_VER
#error To compile AVX512 set C++ code generation to use /arch:AVX512 on FastSIMD_Level_AVX512.cpp, or change "#define FASTSIMD_COMPILE_AVX512" in FastSIMD_Config.h
#else
#error To compile AVX512 add build command "-mavx512f -mavx512dq" on FastSIMD_Level_AVX512.cpp, or change "#define FASTSIMD_COMPILE_AVX512" in FastSIMD_Config.h
#endif
#endif
#include "Internal/AVX512.h"
#define FS_SIMD_CLASS FastSIMD::AVX512
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,7 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_NEON
#include "Internal/NEON.h"
#define FS_SIMD_CLASS FastSIMD::NEON
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,7 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_SSE2
#include "Internal/SSE.h"
#define FS_SIMD_CLASS FastSIMD::SSE2
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,7 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_SSE3
#include "Internal/SSE.h"
#define FS_SIMD_CLASS FastSIMD::SSE3
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,7 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_SSE41
#include "Internal/SSE.h"
#define FS_SIMD_CLASS FastSIMD::SSE41
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,7 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_SSE42
#include "Internal/SSE.h"
#define FS_SIMD_CLASS FastSIMD::SSE42
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,7 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_SSSE3
#include "Internal/SSE.h"
#define FS_SIMD_CLASS FastSIMD::SSSE3
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,7 @@
#include "FastSIMD/FastSIMD.h"
#if FASTSIMD_COMPILE_SCALAR
#include "Internal/Scalar.h"
#define FS_SIMD_CLASS FastSIMD::Scalar
#include "Internal/SourceBuilder.inl"
#endif

View File

@ -0,0 +1,448 @@
#pragma once
#ifdef __GNUG__
#include <x86intrin.h>
#else
#include <intrin.h>
#endif
#include "VecTools.h"
namespace FastSIMD
{
struct AVX_f32x8
{
FASTSIMD_INTERNAL_TYPE_SET( AVX_f32x8, __m256 );
FS_INLINE static AVX_f32x8 Incremented()
{
return _mm256_set_ps( 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f );
}
FS_INLINE explicit AVX_f32x8( float f )
{
*this = _mm256_set1_ps( f );
}
FS_INLINE explicit AVX_f32x8( float f0, float f1, float f2, float f3, float f4, float f5, float f6, float f7 )
{
*this = _mm256_set_ps( f7, f6, f5, f4, f3, f2, f1, f0 );
}
FS_INLINE AVX_f32x8& operator+=( const AVX_f32x8& rhs )
{
*this = _mm256_add_ps( *this, rhs );
return *this;
}
FS_INLINE AVX_f32x8& operator-=( const AVX_f32x8& rhs )
{
*this = _mm256_sub_ps( *this, rhs );
return *this;
}
FS_INLINE AVX_f32x8& operator*=( const AVX_f32x8& rhs )
{
*this = _mm256_mul_ps( *this, rhs );
return *this;
}
FS_INLINE AVX_f32x8& operator/=( const AVX_f32x8& rhs )
{
*this = _mm256_div_ps( *this, rhs );
return *this;
}
FS_INLINE AVX_f32x8& operator&=( const AVX_f32x8& rhs )
{
*this = _mm256_and_ps( *this, rhs );
return *this;
}
FS_INLINE AVX_f32x8& operator|=( const AVX_f32x8& rhs )
{
*this = _mm256_or_ps( *this, rhs );
return *this;
}
FS_INLINE AVX_f32x8& operator^=( const AVX_f32x8& rhs )
{
*this = _mm256_xor_ps( *this, rhs );
return *this;
}
FS_INLINE AVX_f32x8 operator~() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m256i neg1 = _mm256_cmpeq_epi32( _mm256_setzero_si256(), _mm256_setzero_si256() );
#else
const __m256i neg1 = _mm256_set1_epi32( -1 );
#endif
return _mm256_xor_ps( *this, _mm256_castsi256_ps( neg1 ) );
}
FS_INLINE AVX_f32x8 operator-() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m256i minInt = _mm256_slli_epi32( _mm256_cmpeq_epi32( _mm256_setzero_si256(), _mm256_setzero_si256() ), 31 );
#else
const __m256i minInt = _mm256_set1_epi32( 0x80000000 );
#endif
return _mm256_xor_ps( *this, _mm256_castsi256_ps( minInt ) );
}
FS_INLINE __m256i operator==( const AVX_f32x8& rhs )
{
return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_EQ_OS ) );
}
FS_INLINE __m256i operator!=( const AVX_f32x8& rhs )
{
return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_NEQ_OS ) );
}
FS_INLINE __m256i operator>( const AVX_f32x8& rhs )
{
return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_GT_OS ) );
}
FS_INLINE __m256i operator<( const AVX_f32x8& rhs )
{
return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_LT_OS ) );
}
FS_INLINE __m256i operator>=( const AVX_f32x8& rhs )
{
return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_GE_OS ) );
}
FS_INLINE __m256i operator<=( const AVX_f32x8& rhs )
{
return _mm256_castps_si256( _mm256_cmp_ps( *this, rhs, _CMP_LE_OS ) );
}
};
FASTSIMD_INTERNAL_OPERATORS_FLOAT( AVX_f32x8 )
struct AVX2_i32x8
{
FASTSIMD_INTERNAL_TYPE_SET( AVX2_i32x8, __m256i );
FS_INLINE static AVX2_i32x8 Incremented()
{
return _mm256_set_epi32( 7, 6, 5, 4, 3, 2, 1, 0 );
}
FS_INLINE explicit AVX2_i32x8( int32_t f )
{
*this = _mm256_set1_epi32( f );
}
FS_INLINE explicit AVX2_i32x8( int32_t i0, int32_t i1, int32_t i2, int32_t i3, int32_t i4, int32_t i5, int32_t i6, int32_t i7 )
{
*this = _mm256_set_epi32( i7, i6, i5, i4, i3, i2, i1, i0 );
}
FS_INLINE AVX2_i32x8& operator+=( const AVX2_i32x8& rhs )
{
*this = _mm256_add_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8& operator-=( const AVX2_i32x8& rhs )
{
*this = _mm256_sub_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8& operator*=( const AVX2_i32x8& rhs )
{
*this = _mm256_mullo_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8& operator&=( const AVX2_i32x8& rhs )
{
*this = _mm256_and_si256( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8& operator|=( const AVX2_i32x8& rhs )
{
*this = _mm256_or_si256( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8& operator^=( const AVX2_i32x8& rhs )
{
*this = _mm256_xor_si256( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8& operator>>=( int32_t rhs )
{
*this = _mm256_srai_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8& operator<<=( int32_t rhs )
{
*this = _mm256_slli_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX2_i32x8 operator~() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m256i neg1 = _mm256_cmpeq_epi32( _mm256_setzero_si256(), _mm256_setzero_si256() );
#else
const __m256i neg1 = _mm256_set1_epi32( -1 );
#endif
return _mm256_xor_si256( *this, neg1 );
}
FS_INLINE AVX2_i32x8 operator-() const
{
return _mm256_sub_epi32( _mm256_setzero_si256(), *this );
}
FS_INLINE AVX2_i32x8 operator==( const AVX2_i32x8& rhs )
{
return _mm256_cmpeq_epi32( *this, rhs );
}
FS_INLINE AVX2_i32x8 operator>( const AVX2_i32x8& rhs )
{
return _mm256_cmpgt_epi32( *this, rhs );
}
FS_INLINE AVX2_i32x8 operator<( const AVX2_i32x8& rhs )
{
return _mm256_cmpgt_epi32( rhs, *this );
}
};
FASTSIMD_INTERNAL_OPERATORS_INT( AVX2_i32x8, int32_t )
template<eLevel LEVEL_T>
class AVX_T
{
public:
static_assert( LEVEL_T >= Level_AVX && LEVEL_T <= Level_AVX2, "Cannot create template with unsupported SIMD level" );
static constexpr eLevel SIMD_Level = LEVEL_T;
template<size_t ElementSize = 8>
static constexpr size_t VectorSize = 256 / ElementSize;
typedef AVX_f32x8 float32v;
typedef AVX2_i32x8 int32v;
typedef AVX2_i32x8 mask32v;
// Load
FS_INLINE static float32v Load_f32( void const* p )
{
return _mm256_loadu_ps( reinterpret_cast<float const*>(p) );
}
FS_INLINE static int32v Load_i32( void const* p )
{
return _mm256_loadu_si256( reinterpret_cast<__m256i const*>(p) );
}
// Store
FS_INLINE static void Store_f32( void* p, float32v a )
{
_mm256_storeu_ps( reinterpret_cast<float*>(p), a );
}
FS_INLINE static void Store_i32( void* p, int32v a )
{
_mm256_storeu_si256( reinterpret_cast<__m256i*>(p), a );
}
// Cast
FS_INLINE static float32v Casti32_f32( int32v a )
{
return _mm256_castsi256_ps( a );
}
FS_INLINE static int32v Castf32_i32( float32v a )
{
return _mm256_castps_si256( a );
}
// Convert
FS_INLINE static float32v Converti32_f32( int32v a )
{
return _mm256_cvtepi32_ps( a );
}
FS_INLINE static int32v Convertf32_i32( float32v a )
{
return _mm256_cvtps_epi32( a );
}
// Select
FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
{
return _mm256_blendv_ps( b, a, _mm256_castsi256_ps( m ) );
}
FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
{
return _mm256_castps_si256( _mm256_blendv_ps( _mm256_castsi256_ps( b ), _mm256_castsi256_ps( a ), _mm256_castsi256_ps( m ) ) );
}
// Min, Max
FS_INLINE static float32v Min_f32( float32v a, float32v b )
{
return _mm256_min_ps( a, b );
}
FS_INLINE static float32v Max_f32( float32v a, float32v b )
{
return _mm256_max_ps( a, b );
}
FS_INLINE static int32v Min_i32( int32v a, int32v b )
{
return _mm256_min_epi32( a, b );
}
FS_INLINE static int32v Max_i32( int32v a, int32v b )
{
return _mm256_max_epi32( a, b );
}
// Bitwise
FS_INLINE static float32v BitwiseAndNot_f32( float32v a, float32v b )
{
return _mm256_andnot_ps( b, a );
}
FS_INLINE static int32v BitwiseAndNot_i32( int32v a, int32v b )
{
return _mm256_andnot_si256( b, a );
}
FS_INLINE static float32v BitwiseShiftRightZX_f32( float32v a, int32_t b )
{
return Casti32_f32( _mm256_srli_epi32( Castf32_i32( a ), b ) );
}
FS_INLINE static int32v BitwiseShiftRightZX_i32( int32v a, int32_t b )
{
return _mm256_srli_epi32( a, b );
}
// Abs
FS_INLINE static float32v Abs_f32( float32v a )
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m256i intMax = _mm256_srli_epi32( _mm256_cmpeq_epi32( _mm256_setzero_si256(), _mm256_setzero_si256() ), 1 );
#else
const __m256i intMax = _mm256_set1_epi32( 0x7FFFFFFF );
#endif
return _mm256_and_ps( a, _mm256_castsi256_ps( intMax ) );
}
FS_INLINE static int32v Abs_i32( int32v a )
{
return _mm256_abs_epi32( a );
}
// Float math
FS_INLINE static float32v Sqrt_f32( float32v a )
{
return _mm256_sqrt_ps( a );
}
FS_INLINE static float32v InvSqrt_f32( float32v a )
{
return _mm256_rsqrt_ps( a );
}
FS_INLINE static float32v Reciprocal_f32( float32v a )
{
return _mm256_rcp_ps( a );
}
// Floor, Ceil, Round
FS_INLINE static float32v Floor_f32( float32v a )
{
return _mm256_round_ps( a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC );
}
FS_INLINE static float32v Ceil_f32( float32v a )
{
return _mm256_round_ps( a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC );
}
FS_INLINE static float32v Round_f32( float32v a )
{
return _mm256_round_ps( a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC );
}
//Mask
FS_INLINE static int32v Mask_i32( int32v a, mask32v m )
{
return a & m;
}
FS_INLINE static float32v Mask_f32( float32v a, mask32v m )
{
return _mm256_and_ps( a, _mm256_castsi256_ps( m ) );
}
FS_INLINE static int32v NMask_i32( int32v a, mask32v m )
{
return _mm256_andnot_si256( m, a );
}
FS_INLINE static float32v NMask_f32( float32v a, mask32v m )
{
return _mm256_andnot_ps( _mm256_castsi256_ps( m ), a );
}
FS_INLINE static bool AnyMask_bool( mask32v m )
{
return _mm256_movemask_ps( _mm256_castsi256_ps( m ) );
}
};
#if FASTSIMD_COMPILE_AVX
typedef AVX_T<Level_AVX> AVX;
#endif
#if FASTSIMD_COMPILE_AVX2
typedef AVX_T<Level_AVX2> AVX2;
#if FASTSIMD_USE_FMA
template<>
FS_INLINE AVX2::float32v FMulAdd_f32<AVX2>( AVX2::float32v a, AVX2::float32v b, AVX2::float32v c )
{
return _mm256_fmadd_ps( a, b, c );
}
template<>
FS_INLINE AVX2::float32v FNMulAdd_f32<AVX2>( AVX2::float32v a, AVX2::float32v b, AVX2::float32v c )
{
return _mm256_fnmadd_ps( a, b, c );
}
#endif
#endif
}

View File

@ -0,0 +1,516 @@
#pragma once
#include <immintrin.h>
#include "VecTools.h"
namespace FastSIMD
{
struct AVX512_f32x16
{
FASTSIMD_INTERNAL_TYPE_SET( AVX512_f32x16, __m512 );
FS_INLINE static AVX512_f32x16 Incremented()
{
return _mm512_set_ps( 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f );
}
FS_INLINE explicit AVX512_f32x16( float f )
{
*this = _mm512_set1_ps( f );
}
FS_INLINE explicit AVX512_f32x16( float f0, float f1, float f2, float f3, float f4, float f5, float f6, float f7, float f8, float f9, float f10, float f11, float f12, float f13, float f14, float f15 )
{
*this = _mm512_set_ps( f15, f14, f13, f12, f11, f10, f9, f8, f7, f6, f5, f4, f3, f2, f1, f0 );
}
FS_INLINE AVX512_f32x16& operator+=( const AVX512_f32x16& rhs )
{
*this = _mm512_add_ps( *this, rhs );
return *this;
}
FS_INLINE AVX512_f32x16& operator-=( const AVX512_f32x16& rhs )
{
*this = _mm512_sub_ps( *this, rhs );
return *this;
}
FS_INLINE AVX512_f32x16& operator*=( const AVX512_f32x16& rhs )
{
*this = _mm512_mul_ps( *this, rhs );
return *this;
}
FS_INLINE AVX512_f32x16& operator/=( const AVX512_f32x16& rhs )
{
*this = _mm512_div_ps( *this, rhs );
return *this;
}
FS_INLINE AVX512_f32x16& operator&=( const AVX512_f32x16& rhs )
{
*this = _mm512_and_ps( *this, rhs );
return *this;
}
FS_INLINE AVX512_f32x16& operator|=( const AVX512_f32x16& rhs )
{
*this = _mm512_or_ps( *this, rhs );
return *this;
}
FS_INLINE AVX512_f32x16& operator^=( const AVX512_f32x16& rhs )
{
*this = _mm512_xor_ps( *this, rhs );
return *this;
}
FS_INLINE AVX512_f32x16 operator~() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m512i neg1 = _mm512_cmpeq_epi32( _mm512_setzero_si512(), _mm512_setzero_si512() );
#else
const __m512i neg1 = _mm512_set1_epi32( -1 );
#endif
return _mm512_xor_ps( *this, _mm512_castsi512_ps( neg1 ) );
}
FS_INLINE AVX512_f32x16 operator-() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m512i minInt = _mm512_slli_epi32( _mm512_cmpeq_epi32( _mm512_setzero_si512(), _mm512_setzero_si512() ), 31 );
#else
const __m512i minInt = _mm512_set1_epi32( 0x80000000 );
#endif
return _mm512_xor_ps( *this, _mm512_castsi512_ps( minInt ) );
}
FS_INLINE __mmask16 operator==( const AVX512_f32x16& rhs )
{
return _mm512_cmp_ps_mask( *this, rhs, _CMP_EQ_OS );
}
FS_INLINE __mmask16 operator!=( const AVX512_f32x16& rhs )
{
return _mm512_cmp_ps_mask( *this, rhs, _CMP_NEQ_OS );
}
FS_INLINE __mmask16 operator>( const AVX512_f32x16& rhs )
{
return _mm512_cmp_ps_mask( *this, rhs, _CMP_GT_OS );
}
FS_INLINE __mmask16 operator<( const AVX512_f32x16& rhs )
{
return _mm512_cmp_ps_mask( *this, rhs, _CMP_LT_OS );
}
FS_INLINE __mmask16 operator>=( const AVX512_f32x16& rhs )
{
return _mm512_cmp_ps_mask( *this, rhs, _CMP_GE_OS );
}
FS_INLINE __mmask16 operator<=( const AVX512_f32x16& rhs )
{
return _mm512_cmp_ps_mask( *this, rhs, _CMP_LE_OS );
}
};
FASTSIMD_INTERNAL_OPERATORS_FLOAT( AVX512_f32x16 )
struct AVX512_i32x16
{
FASTSIMD_INTERNAL_TYPE_SET( AVX512_i32x16, __m512i );
FS_INLINE static AVX512_i32x16 Incremented()
{
return _mm512_set_epi32( 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 );
}
FS_INLINE explicit AVX512_i32x16( int32_t i )
{
*this = _mm512_set1_epi32( i );
}
FS_INLINE explicit AVX512_i32x16( int32_t i0, int32_t i1, int32_t i2, int32_t i3, int32_t i4, int32_t i5, int32_t i6, int32_t i7, int32_t i8, int32_t i9, int32_t i10, int32_t i11, int32_t i12, int32_t i13, int32_t i14, int32_t i15 )
{
*this = _mm512_set_epi32( i15, i14, i13, i12, i11, i10, i9, i8, i7, i6, i5, i4, i3, i2, i1, i0 );
}
FS_INLINE AVX512_i32x16& operator+=( const AVX512_i32x16& rhs )
{
*this = _mm512_add_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16& operator-=( const AVX512_i32x16& rhs )
{
*this = _mm512_sub_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16& operator*=( const AVX512_i32x16& rhs )
{
*this = _mm512_mullo_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16& operator&=( const AVX512_i32x16& rhs )
{
*this = _mm512_and_si512( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16& operator|=( const AVX512_i32x16& rhs )
{
*this = _mm512_or_si512( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16& operator^=( const AVX512_i32x16& rhs )
{
*this = _mm512_xor_si512( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16& operator>>=( int32_t rhs )
{
*this = _mm512_srai_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16& operator<<=( int32_t rhs )
{
*this = _mm512_slli_epi32( *this, rhs );
return *this;
}
FS_INLINE AVX512_i32x16 operator~() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m512i neg1 = _mm512_cmpeq_epi32( _mm512_setzero_si512(), _mm512_setzero_si512() );
#else
const __m512i neg1 = _mm512_set1_epi32( -1 );
#endif
return _mm512_xor_si512( *this, neg1 );
}
FS_INLINE AVX512_i32x16 operator-() const
{
return _mm512_sub_epi32( _mm512_setzero_si512(), *this );
}
FS_INLINE __mmask16 operator==( const AVX512_i32x16& rhs )
{
return _mm512_cmpeq_epi32_mask( *this, rhs );
}
FS_INLINE __mmask16 operator>( const AVX512_i32x16& rhs )
{
return _mm512_cmpgt_epi32_mask( *this, rhs );
}
FS_INLINE __mmask16 operator<( const AVX512_i32x16& rhs )
{
return _mm512_cmplt_epi32_mask( *this, rhs );
}
};
FASTSIMD_INTERNAL_OPERATORS_INT( AVX512_i32x16, int32_t )
template<eLevel LEVEL_T>
class AVX512_T
{
public:
static_assert( LEVEL_T == Level_AVX512, "Cannot create template with unsupported SIMD level" );
static constexpr eLevel SIMD_Level = LEVEL_T;
template<size_t ElementSize = 8>
static constexpr size_t VectorSize = 512 / ElementSize;
typedef AVX512_f32x16 float32v;
typedef AVX512_i32x16 int32v;
typedef __mmask16 mask32v;
// Load
FS_INLINE static float32v Load_f32( void const* p )
{
return _mm512_loadu_ps( p );
}
FS_INLINE static int32v Load_i32( void const* p )
{
return _mm512_loadu_si512( p );
}
// Store
FS_INLINE static void Store_f32( void* p, float32v a )
{
_mm512_storeu_ps( p, a );
}
FS_INLINE static void Store_i32( void* p, int32v a )
{
_mm512_storeu_si512( p, a );
}
// Cast
FS_INLINE static float32v Casti32_f32( int32v a )
{
return _mm512_castsi512_ps( a );
}
FS_INLINE static int32v Castf32_i32( float32v a )
{
return _mm512_castps_si512( a );
}
// Convert
FS_INLINE static float32v Converti32_f32( int32v a )
{
return _mm512_cvtepi32_ps( a );
}
FS_INLINE static int32v Convertf32_i32( float32v a )
{
return _mm512_cvtps_epi32( a );
}
// Select
FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
{
return _mm512_mask_blend_ps( m, b, a );
}
FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
{
return _mm512_mask_blend_epi32( m, b, a );
}
// Min, Max
FS_INLINE static float32v Min_f32( float32v a, float32v b )
{
return _mm512_min_ps( a, b );
}
FS_INLINE static float32v Max_f32( float32v a, float32v b )
{
return _mm512_max_ps( a, b );
}
FS_INLINE static int32v Min_i32( int32v a, int32v b )
{
return _mm512_min_epi32( a, b );
}
FS_INLINE static int32v Max_i32( int32v a, int32v b )
{
return _mm512_max_epi32( a, b );
}
// Bitwise
FS_INLINE static float32v BitwiseAndNot_f32( float32v a, float32v b )
{
return _mm512_andnot_ps( b, a );
}
FS_INLINE static int32v BitwiseAndNot_i32( int32v a, int32v b )
{
return _mm512_andnot_si512( b, a );
}
FS_INLINE static float32v BitwiseShiftRightZX_f32( float32v a, int32_t b )
{
return Casti32_f32( _mm512_srli_epi32( Castf32_i32( a ), b ) );
}
FS_INLINE static int32v BitwiseShiftRightZX_i32( int32v a, int32_t b )
{
return _mm512_srli_epi32( a, b );
}
// Abs
FS_INLINE static float32v Abs_f32( float32v a )
{
return _mm512_abs_ps( a );
}
FS_INLINE static int32v Abs_i32( int32v a )
{
return _mm512_abs_epi32( a );
}
// Float math
FS_INLINE static float32v Sqrt_f32( float32v a )
{
return _mm512_sqrt_ps( a );
}
FS_INLINE static float32v InvSqrt_f32( float32v a )
{
return _mm512_rsqrt14_ps( a );
}
FS_INLINE static float32v Reciprocal_f32( float32v a )
{
return _mm512_rcp14_ps( a );
}
// Floor, Ceil, Round
FS_INLINE static float32v Floor_f32( float32v a )
{
return _mm512_roundscale_ps( a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC );
}
FS_INLINE static float32v Ceil_f32( float32v a )
{
return _mm512_roundscale_ps( a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC );
}
FS_INLINE static float32v Round_f32( float32v a )
{
return _mm512_roundscale_ps( a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC );
}
//Mask
FS_INLINE static int32v Mask_i32( int32v a, mask32v m )
{
return _mm512_maskz_mov_epi32( m, a );
}
FS_INLINE static float32v Mask_f32( float32v a, mask32v m )
{
return _mm512_maskz_mov_ps( m, a );
}
FS_INLINE static int32v NMask_i32( int32v a, mask32v m )
{
return _mm512_maskz_mov_epi32( ~m, a );
}
FS_INLINE static float32v NMask_f32( float32v a, mask32v m )
{
return _mm512_maskz_mov_ps( ~m, a );
}
FS_INLINE static bool AnyMask_bool( mask32v m )
{
return m;
}
};
#if FASTSIMD_COMPILE_AVX512
typedef AVX512_T<Level_AVX512> AVX512;
#if FASTSIMD_USE_FMA
template<>
FS_INLINE AVX512::float32v FMulAdd_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::float32v c )
{
return _mm512_fmadd_ps( a, b, c );
}
template<>
FS_INLINE AVX512::float32v FNMulAdd_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::float32v c )
{
return _mm512_fnmadd_ps( a, b, c );
}
#endif
// Masked float
template<>
FS_INLINE AVX512::float32v MaskedAdd_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
{
return _mm512_mask_add_ps( a, m, a, b );
}
template<>
FS_INLINE AVX512::float32v MaskedSub_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
{
return _mm512_mask_sub_ps( a, m, a, b );
}
template<>
FS_INLINE AVX512::float32v MaskedMul_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
{
return _mm512_mask_mul_ps( a, m, a, b );
}
// Masked int32
template<>
FS_INLINE AVX512::int32v MaskedAdd_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
{
return _mm512_mask_add_epi32( a, m, a, b );
}
template<>
FS_INLINE AVX512::int32v MaskedSub_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
{
return _mm512_mask_sub_epi32( a, m, a, b );
}
template<>
FS_INLINE AVX512::int32v MaskedMul_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
{
return _mm512_mask_mullo_epi32( a, m, a, b );
}
// NMasked float
template<>
FS_INLINE AVX512::float32v NMaskedAdd_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
{
return _mm512_mask_add_ps( a, ~m, a, b );
}
template<>
FS_INLINE AVX512::float32v NMaskedSub_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
{
return _mm512_mask_sub_ps( a, ~m, a, b );
}
template<>
FS_INLINE AVX512::float32v NMaskedMul_f32<AVX512>( AVX512::float32v a, AVX512::float32v b, AVX512::mask32v m )
{
return _mm512_mask_mul_ps( a, ~m, a, b );
}
// NMasked int32
template<>
FS_INLINE AVX512::int32v NMaskedAdd_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
{
return _mm512_mask_add_epi32( a, ~m, a, b );
}
template<>
FS_INLINE AVX512::int32v NMaskedSub_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
{
return _mm512_mask_sub_epi32( a, ~m, a, b );
}
template<>
FS_INLINE AVX512::int32v NMaskedMul_i32<AVX512>( AVX512::int32v a, AVX512::int32v b, AVX512::mask32v m )
{
return _mm512_mask_mul_epi32( a, ~m, a, b );
}
#endif
}

View File

@ -0,0 +1,424 @@
#pragma once
#include <arm_neon.h>
#include "VecTools.h"
struct NEON_f32x4
{
FASTSIMD_INTERNAL_TYPE_SET( NEON_f32x4, float32x4_t );
constexpr FS_INLINE static uint8_t Size()
{
return 4;
}
FS_INLINE static NEON_f32x4 Zero()
{
return vdupq_n_f32( 0 );
}
FS_INLINE static NEON_f32x4 Incremented()
{
alignas(16) const float f[4]{ 0.0f, 1.0f, 2.0f, 3.0f };
return vld1q_f32( f );
}
FS_INLINE explicit NEON_f32x4( float f )
{
*this = vdupq_n_f32( f );
}
FS_INLINE explicit NEON_f32x4( float f0, float f1, float f2, float f3 )
{
alignas(16) const float f[4]{ f0, f1, f2, f3 };
*this = vld1q_f32( f );
}
FS_INLINE NEON_f32x4& operator+=( const NEON_f32x4& rhs )
{
*this = vaddq_f32( *this, rhs );
return *this;
}
FS_INLINE NEON_f32x4& operator-=( const NEON_f32x4& rhs )
{
*this = vsubq_f32( *this, rhs );
return *this;
}
FS_INLINE NEON_f32x4& operator*=( const NEON_f32x4& rhs )
{
*this = vmulq_f32( *this, rhs );
return *this;
}
FS_INLINE NEON_f32x4& operator/=( const NEON_f32x4& rhs )
{
float32x4_t reciprocal = vrecpeq_f32( rhs );
// use a couple Newton-Raphson steps to refine the estimate. Depending on your
// application's accuracy requirements, you may be able to get away with only
// one refinement (instead of the two used here). Be sure to test!
reciprocal = vmulq_f32( vrecpsq_f32( rhs, reciprocal ), reciprocal );
reciprocal = vmulq_f32( vrecpsq_f32( rhs, reciprocal ), reciprocal );
// and finally, compute a/b = a*(1/b)
*this = vmulq_f32( *this, reciprocal );
return *this;
}
FS_INLINE NEON_f32x4 operator-() const
{
return vnegq_f32( *this );
}
};
FASTSIMD_INTERNAL_OPERATORS_FLOAT( NEON_f32x4 )
struct NEON_i32x4
{
FASTSIMD_INTERNAL_TYPE_SET( NEON_i32x4, int32x4_t );
constexpr FS_INLINE static uint8_t Size()
{
return 4;
}
FS_INLINE static NEON_i32x4 Zero()
{
return vdupq_n_s32( 0 );
}
FS_INLINE static NEON_i32x4 Incremented()
{
alignas(16) const int32_t f[4]{ 0, 1, 2, 3 };
return vld1q_s32( f );
}
FS_INLINE explicit NEON_i32x4( int32_t i )
{
*this = vdupq_n_s32( i );
}
FS_INLINE explicit NEON_i32x4( int32_t i0, int32_t i1, int32_t i2, int32_t i3 )
{
alignas(16) const int32_t f[4]{ i0, i1, i2, i3 };
*this = vld1q_s32( f );
}
FS_INLINE NEON_i32x4& operator+=( const NEON_i32x4& rhs )
{
*this = vaddq_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4& operator-=( const NEON_i32x4& rhs )
{
*this = vsubq_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4& operator*=( const NEON_i32x4& rhs )
{
*this = vmulq_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4& operator&=( const NEON_i32x4& rhs )
{
*this = vandq_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4& operator|=( const NEON_i32x4& rhs )
{
*this = vorrq_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4& operator^=( const NEON_i32x4& rhs )
{
*this = veorq_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4& operator>>=( const int32_t rhs )
{
*this = vshrq_n_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4& operator<<=( const int32_t rhs )
{
*this = vshlq_n_s32( *this, rhs );
return *this;
}
FS_INLINE NEON_i32x4 operator~() const
{
return vmvnq_s32( *this );
}
FS_INLINE NEON_i32x4 operator-() const
{
return vnegq_s32( *this );
}
};
FASTSIMD_INTERNAL_OPERATORS_INT( NEON_i32x4, int32_t )
template<FastSIMD::eLevel LEVEL_T>
class FastSIMD_NEON_T
{
public:
static const FastSIMD::eLevel SIMD_Level = LEVEL_T;
static const size_t VectorSize = 128 / 8;
typedef NEON_f32x4 float32v;
typedef NEON_i32x4 int32v;
typedef NEON_i32x4 mask32v;
// Load
FS_INLINE static float32v Load_f32( void const* p )
{
return vld1q_f32( reinterpret_cast<float const*>(p) );
}
FS_INLINE static int32v Load_i32( void const* p )
{
return vld1q_s32( reinterpret_cast<int32_t const*>(p) );
}
// Store
FS_INLINE static void Store_f32( void* p, float32v a )
{
vst1q_f32( reinterpret_cast<float*>(p), a );
}
FS_INLINE static void Store_i32( void* p, int32v a )
{
vst1q_s32( reinterpret_cast<int32_t*>(p), a );
}
// Cast
FS_INLINE static float32v Casti32_f32( int32v a )
{
return vreinterpretq_f32_s32( a );
}
FS_INLINE static int32v Castf32_i32( float32v a )
{
return vreinterpretq_s32_f32( a );
}
// Convert
FS_INLINE static float32v Converti32_f32( int32v a )
{
return vcvtq_f32_s32( a );
}
FS_INLINE static int32v Convertf32_i32( float32v a )
{
return vcvtq_s32_f32( a );
}
// Comparisons
FS_INLINE static mask32v Equal_f32( float32v a, float32v b )
{
return vreinterpretq_s32_u32( vceq_f32( a, b ) );
}
FS_INLINE static mask32v GreaterThan_f32( float32v a, float32v b )
{
return vreinterpretq_s32_u32( vcgtq_f32( a, b ) );
}
FS_INLINE static mask32v LessThan_f32( float32v a, float32v b )
{
return vreinterpretq_s32_u32( vcltq_f32( a, b ) );
}
FS_INLINE static mask32v GreaterEqualThan_f32( float32v a, float32v b )
{
return vreinterpretq_s32_u32( vcgeq_f32( a, b ) );
}
FS_INLINE static mask32v LessEqualThan_f32( float32v a, float32v b )
{
return vreinterpretq_s32_u32( vcleq_f32( a, b ) );
}
FS_INLINE static mask32v Equal_i32( int32v a, int32v b )
{
return vceq_s32( a, b );
}
FS_INLINE static mask32v GreaterThan_i32( int32v a, int32v b )
{
return vcgtq_s32( a, b );
}
FS_INLINE static mask32v LessThan_i32( int32v a, int32v b )
{
return vcltq_s32( a, b );
}
// Select
FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
{
return vbslq_f32( vreinterpretq_u32_s32( mask ), b, a );
}
FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
{
return vbslq_s32( vreinterpretq_u32_s32( mask ), b, a );
}
// Min, Max
FS_INLINE static float32v Min_f32( float32v a, float32v b )
{
return vminq_f32( a, b );
}
FS_INLINE static float32v Max_f32( float32v a, float32v b )
{
return vmaxq_f32( a, b );
}
FS_INLINE static int32v Min_i32( int32v a, int32v b )
{
return vminq_s32( a, b );
}
FS_INLINE static int32v Max_i32( int32v a, int32v b )
{
return vmaxq_s32( a, b );
}
// Bitwise
FS_INLINE static float32v BitwiseAnd_f32( float32v a, float32v b )
{
return vreinterpretq_f32_s32( vandq_s32( vreinterpretq_s32_f32( a ), vreinterpretq_s32_f32( b ) ) );
}
FS_INLINE static float32v BitwiseOr_f32( float32v a, float32v b )
{
return vreinterpretq_f32_s32( vorrq_s32( vreinterpretq_s32_f32( a ), vreinterpretq_s32_f32( b ) ) );
}
FS_INLINE static float32v BitwiseXor_f32( float32v a, float32v b )
{
return vreinterpretq_f32_s32( veorq_s32( vreinterpretq_s32_f32( a ), vreinterpretq_s32_f32( b ) ) );
}
FS_INLINE static float32v BitwiseNot_f32( float32v a )
{
return vreinterpretq_f32_s32( vmvn_s32( vreinterpretq_s32_f32( a ), vreinterpretq_s32_f32( b ) ) );
}
FS_INLINE static float32v BitwiseAndNot_f32( float32v a, float32v b )
{
return vreinterpretq_f32_s32( vandq_s32( vreinterpretq_s32_f32( a ), vmvn_s32( vreinterpretq_s32_f32( b ) ) ) );
}
FS_INLINE static int32v BitwiseAndNot_i32( int32v a, int32v b )
{
return vandq_s32( a , vmvn_s32( b ) );
}
// Abs
FS_INLINE static float32v Abs_f32( float32v a )
{
return vabsq_f32( a );
}
FS_INLINE static int32v Abs_i32( int32v a )
{
return vabsq_s32( a );
}
// Float math
FS_INLINE static float32v Sqrt_f32( float32v a )
{
return vsqrtq_f32( a );
}
FS_INLINE static float32v InvSqrt_f32( float32v a )
{
return vrsqrteq_f32( a );
}
// Floor, Ceil, Round: http://dss.stephanierct.com/DevBlog/?p=8
FS_INLINE static float32v Floor_f32( float32v a )
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const float32x4_t f1 = vdupq_n_f32( 1.0f ); //_mm_castsi128_ps( _mm_slli_epi32( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 25 ), 23 ) );
#else
const float32x4_t f1 = vdupq_n_f32( 1.0f );
#endif
float32x4_t fval = vrndmq_f32( a );
return vsubq_f32( fval, BitwiseAnd_f32( vcltq_f32( a, fval ), f1 ) );
}
FS_INLINE static float32v Ceil_f32( float32v a )
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128 f1 = vdupq_n_f32( 1.0f ); //_mm_castsi128_ps( _mm_slli_epi32( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 25 ), 23 ) );
#else
const __m128 f1 = vdupq_n_f32( 1.0f );
#endif
float32x4_t fval = vrndmq_f32( a );
return vaddq_f32( fval, BitwiseAnd_f32( vcltq_f32( a, fval ), f1 ) );
}
template<FastSIMD::eLevel L = LEVEL_T>
FS_INLINE static FS_ENABLE_IF( L < FastSIMD::ELevel_SSE41, float32v ) Round_f32( float32v a )
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128 nearest2 = _mm_castsi128_ps( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 2 ) );
#else
const __m128 nearest2 = vdupq_n_f32( 1.99999988079071044921875f );
#endif
__m128 aTrunc = _mm_cvtepi32_ps( _mm_cvttps_epi32( a ) ); // truncate a
__m128 rmd = _mm_sub_ps( a, aTrunc ); // get remainder
__m128 rmd2 = _mm_mul_ps( rmd, nearest2 ); // mul remainder by near 2 will yield the needed offset
__m128 rmd2Trunc = _mm_cvtepi32_ps( _mm_cvttps_epi32( rmd2 ) ); // after being truncated of course
return _mm_add_ps( aTrunc, rmd2Trunc );
}
template<FastSIMD::eLevel L = LEVEL_T>
FS_INLINE static FS_ENABLE_IF( L >= FastSIMD::ELevel_SSE41, float32v ) Round_f32( float32v a )
{
return vrndnq_f32( a );
}
// Mask
FS_INLINE static int32v Mask_i32( int32v a, mask32v m )
{
return a & m;
}
FS_INLINE static float32v Mask_f32( float32v a, mask32v m )
{
return BitwiseAnd_f32( a, vreinterpretq_f32_s32( m ) );
}
};
#if FASTSIMD_COMPILE_NEON
typedef FastSIMD_SSE_T<FastSIMD::ELevel_NEON> FastSIMD_NEON;
#endif

View File

@ -0,0 +1,541 @@
#pragma once
#ifdef __GNUG__
#include <x86intrin.h>
#else
#include <intrin.h>
#endif
#include "VecTools.h"
namespace FastSIMD
{
struct SSE_f32x4
{
FASTSIMD_INTERNAL_TYPE_SET( SSE_f32x4, __m128 );
FS_INLINE static SSE_f32x4 Incremented()
{
return _mm_set_ps( 3.0f, 2.0f, 1.0f, 0.0f );
}
FS_INLINE explicit SSE_f32x4( float f )
{
*this = _mm_set1_ps( f );
}
FS_INLINE explicit SSE_f32x4( float f0, float f1, float f2, float f3 )
{
*this = _mm_set_ps( f3, f2, f1, f0 );
}
FS_INLINE SSE_f32x4& operator+=( const SSE_f32x4& rhs )
{
*this = _mm_add_ps( *this, rhs );
return *this;
}
FS_INLINE SSE_f32x4& operator-=( const SSE_f32x4& rhs )
{
*this = _mm_sub_ps( *this, rhs );
return *this;
}
FS_INLINE SSE_f32x4& operator*=( const SSE_f32x4& rhs )
{
*this = _mm_mul_ps( *this, rhs );
return *this;
}
FS_INLINE SSE_f32x4& operator/=( const SSE_f32x4& rhs )
{
*this = _mm_div_ps( *this, rhs );
return *this;
}
FS_INLINE SSE_f32x4& operator&=( const SSE_f32x4& rhs )
{
*this = _mm_and_ps( *this, rhs );
return *this;
}
FS_INLINE SSE_f32x4& operator|=( const SSE_f32x4& rhs )
{
*this = _mm_or_ps( *this, rhs );
return *this;
}
FS_INLINE SSE_f32x4& operator^=( const SSE_f32x4& rhs )
{
*this = _mm_xor_ps( *this, rhs );
return *this;
}
FS_INLINE SSE_f32x4 operator~() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128i neg1 = _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() );
#else
const __m128i neg1 = _mm_set1_epi32( -1 );
#endif
return _mm_xor_ps( *this, _mm_castsi128_ps( neg1 ) );
}
FS_INLINE SSE_f32x4 operator-() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128i minInt = _mm_slli_epi32( _mm_cmpeq_epi32( _mm_undefined_si128(), _mm_setzero_si128() ), 31 );
#else
const __m128i minInt = _mm_set1_epi32( 0x80000000 );
#endif
return _mm_xor_ps( *this, _mm_castsi128_ps( minInt ) );
}
FS_INLINE __m128i operator==( const SSE_f32x4& rhs )
{
return _mm_castps_si128( _mm_cmpeq_ps( *this, rhs ) );
}
FS_INLINE __m128i operator!=( const SSE_f32x4& rhs )
{
return _mm_castps_si128( _mm_cmpneq_ps( *this, rhs ) );
}
FS_INLINE __m128i operator>( const SSE_f32x4& rhs )
{
return _mm_castps_si128( _mm_cmpgt_ps( *this, rhs ) );
}
FS_INLINE __m128i operator<( const SSE_f32x4& rhs )
{
return _mm_castps_si128( _mm_cmplt_ps( *this, rhs ) );
}
FS_INLINE __m128i operator>=( const SSE_f32x4& rhs )
{
return _mm_castps_si128( _mm_cmpge_ps( *this, rhs ) );
}
FS_INLINE __m128i operator<=( const SSE_f32x4& rhs )
{
return _mm_castps_si128( _mm_cmple_ps( *this, rhs ) );
}
};
FASTSIMD_INTERNAL_OPERATORS_FLOAT( SSE_f32x4 )
template<eLevel LEVEL_T>
struct SSE_i32x4
{
FASTSIMD_INTERNAL_TYPE_SET( SSE_i32x4, __m128i );
FS_INLINE static SSE_i32x4 Incremented()
{
return _mm_set_epi32( 3, 2, 1, 0 );
}
FS_INLINE explicit SSE_i32x4( int32_t i )
{
*this = _mm_set1_epi32( i );
}
FS_INLINE explicit SSE_i32x4( int32_t i0, int32_t i1, int32_t i2, int32_t i3 )
{
*this = _mm_set_epi32( i3, i2, i1, i0 );
}
FS_INLINE SSE_i32x4& operator+=( const SSE_i32x4& rhs )
{
*this = _mm_add_epi32( *this, rhs );
return *this;
}
FS_INLINE SSE_i32x4& operator-=( const SSE_i32x4& rhs )
{
*this = _mm_sub_epi32( *this, rhs );
return *this;
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE SSE_i32x4& operator*=( const SSE_i32x4& rhs )
{
__m128i tmp1 = _mm_mul_epu32( *this, rhs ); /* mul 2,0*/
__m128i tmp2 = _mm_mul_epu32( _mm_srli_si128( *this, 4 ), _mm_srli_si128( rhs, 4 ) ); /* mul 3,1 */
*this = _mm_unpacklo_epi32( _mm_shuffle_epi32( tmp1, _MM_SHUFFLE( 0, 0, 2, 0 ) ), _mm_shuffle_epi32( tmp2, _MM_SHUFFLE( 0, 0, 2, 0 ) ) ); /* shuffle results to [63..0] and pack */
return *this;
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE SSE_i32x4& operator*=( const SSE_i32x4& rhs )
{
*this = _mm_mullo_epi32( *this, rhs );
return *this;
}
FS_INLINE SSE_i32x4& operator&=( const SSE_i32x4& rhs )
{
*this = _mm_and_si128( *this, rhs );
return *this;
}
FS_INLINE SSE_i32x4& operator|=( const SSE_i32x4& rhs )
{
*this = _mm_or_si128( *this, rhs );
return *this;
}
FS_INLINE SSE_i32x4& operator^=( const SSE_i32x4& rhs )
{
*this = _mm_xor_si128( *this, rhs );
return *this;
}
FS_INLINE SSE_i32x4& operator>>=( int32_t rhs )
{
*this = _mm_srai_epi32( *this, rhs );
return *this;
}
FS_INLINE SSE_i32x4& operator<<=( int32_t rhs )
{
*this = _mm_slli_epi32( *this, rhs );
return *this;
}
FS_INLINE SSE_i32x4 operator~() const
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128i neg1 = _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() );
#else
const __m128i neg1 = _mm_set1_epi32( -1 );
#endif
return _mm_xor_si128( *this, neg1 );
}
FS_INLINE SSE_i32x4 operator-() const
{
return _mm_sub_epi32( _mm_setzero_si128(), *this );
}
FS_INLINE SSE_i32x4 operator==( const SSE_i32x4& rhs )
{
return _mm_cmpeq_epi32( *this, rhs );
}
FS_INLINE SSE_i32x4 operator>( const SSE_i32x4& rhs )
{
return _mm_cmpgt_epi32( *this, rhs );
}
FS_INLINE SSE_i32x4 operator<( const SSE_i32x4& rhs )
{
return _mm_cmplt_epi32( *this, rhs );
}
};
FASTSIMD_INTERNAL_OPERATORS_INT_TEMPLATED( SSE_i32x4, int32_t )
template<eLevel LEVEL_T>
class SSE_T
{
public:
static_assert( LEVEL_T >= Level_SSE && LEVEL_T <= Level_SSE42, "Cannot create template with unsupported SIMD level" );
static constexpr eLevel SIMD_Level = LEVEL_T;
template<size_t ElementSize = 8>
static constexpr size_t VectorSize = 128 / ElementSize;
typedef SSE_f32x4 float32v;
typedef SSE_i32x4<LEVEL_T> int32v;
typedef SSE_i32x4<LEVEL_T> mask32v;
// Load
FS_INLINE static float32v Load_f32( void const* p )
{
return _mm_loadu_ps( reinterpret_cast<float const*>(p) );
}
FS_INLINE static int32v Load_i32( void const* p )
{
return _mm_loadu_si128( reinterpret_cast<__m128i const*>(p) );
}
// Store
FS_INLINE static void Store_f32( void* p, float32v a )
{
_mm_storeu_ps( reinterpret_cast<float*>(p), a );
}
FS_INLINE static void Store_i32( void* p, int32v a )
{
_mm_storeu_si128( reinterpret_cast<__m128i*>(p), a );
}
// Cast
FS_INLINE static float32v Casti32_f32( int32v a )
{
return _mm_castsi128_ps( a );
}
FS_INLINE static int32v Castf32_i32( float32v a )
{
return _mm_castps_si128( a );
}
// Convert
FS_INLINE static float32v Converti32_f32( int32v a )
{
return _mm_cvtepi32_ps( a );
}
FS_INLINE static int32v Convertf32_i32( float32v a )
{
return _mm_cvtps_epi32( a );
}
// Select
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
{
__m128 mf = _mm_castsi128_ps( m );
return _mm_xor_ps( b, _mm_and_ps( mf, _mm_xor_ps( a, b ) ) );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
{
return _mm_blendv_ps( b, a, _mm_castsi128_ps( m ) );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
{
return _mm_xor_si128( b, _mm_and_si128( m, _mm_xor_si128( a, b ) ) );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
{
return _mm_castps_si128( _mm_blendv_ps( _mm_castsi128_ps( b ), _mm_castsi128_ps( a ), _mm_castsi128_ps( m ) ) );
}
// Min, Max
FS_INLINE static float32v Min_f32( float32v a, float32v b )
{
return _mm_min_ps( a, b );
}
FS_INLINE static float32v Max_f32( float32v a, float32v b )
{
return _mm_max_ps( a, b );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE static int32v Min_i32( int32v a, int32v b )
{
return Select_i32( a < b, a, b );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE static int32v Min_i32( int32v a, int32v b )
{
return _mm_min_epi32( a, b );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE static int32v Max_i32( int32v a, int32v b )
{
return Select_i32( a > b, a, b );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE static int32v Max_i32( int32v a, int32v b )
{
return _mm_max_epi32( a, b );
}
// Bitwise
FS_INLINE static float32v BitwiseAndNot_f32( float32v a, float32v b )
{
return _mm_andnot_ps( b, a );
}
FS_INLINE static int32v BitwiseAndNot_i32( int32v a, int32v b )
{
return _mm_andnot_si128( b, a );
}
FS_INLINE static float32v BitwiseShiftRightZX_f32( float32v a, int32_t b )
{
return Casti32_f32( _mm_srli_epi32( Castf32_i32( a ), b ) );
}
FS_INLINE static int32v BitwiseShiftRightZX_i32( int32v a, int32_t b )
{
return _mm_srli_epi32( a, b );
}
// Abs
FS_INLINE static float32v Abs_f32( float32v a )
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128i intMax = _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 1 );
#else
const __m128i intMax = _mm_set1_epi32( 0x7FFFFFFF );
#endif
return _mm_and_ps( a, _mm_castsi128_ps( intMax ) );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSSE3)>* = nullptr>
FS_INLINE static int32v Abs_i32( int32v a )
{
__m128i signMask = _mm_srai_epi32( a, 31 );
return _mm_sub_epi32( _mm_xor_si128( a, signMask ), signMask );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSSE3)>* = nullptr>
FS_INLINE static int32v Abs_i32( int32v a )
{
return _mm_abs_epi32( a );
}
// Float math
FS_INLINE static float32v Sqrt_f32( float32v a )
{
return _mm_sqrt_ps( a );
}
FS_INLINE static float32v InvSqrt_f32( float32v a )
{
return _mm_rsqrt_ps( a );
}
FS_INLINE static float32v Reciprocal_f32( float32v a )
{
return _mm_rcp_ps( a );
}
// Floor, Ceil, Round: http://dss.stephanierct.com/DevBlog/?p=8
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE static float32v Floor_f32( float32v a )
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128 f1 = _mm_castsi128_ps( _mm_slli_epi32( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 25 ), 23 ) );
#else
const __m128 f1 = _mm_set1_ps( 1.0f );
#endif
__m128 fval = _mm_cvtepi32_ps( _mm_cvttps_epi32( a ) );
return _mm_sub_ps( fval, _mm_and_ps( _mm_cmplt_ps( a, fval ), f1 ) );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE static float32v Floor_f32( float32v a )
{
return _mm_round_ps( a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE static float32v Ceil_f32( float32v a )
{
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128 f1 = _mm_castsi128_ps( _mm_slli_epi32( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 25 ), 23 ) );
#else
const __m128 f1 = _mm_set1_ps( 1.0f );
#endif
__m128 fval = _mm_cvtepi32_ps( _mm_cvttps_epi32( a ) );
__m128 cmp = _mm_cmplt_ps( fval, a );
return _mm_add_ps( fval, _mm_and_ps( cmp, f1 ) );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE static float32v Ceil_f32( float32v a )
{
return _mm_round_ps( a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L < Level_SSE41)>* = nullptr>
FS_INLINE static float32v Round_f32( float32v a )
{
__m128 aSign = _mm_and_ps( a, _mm_castsi128_ps( int32v( 0x80000000 ) ) );
return _mm_cvtepi32_ps( _mm_cvttps_epi32( a + float32v(_mm_or_ps( aSign, float32v( 0.5f ) ) ) ) );
#if FASTSIMD_CONFIG_GENERATE_CONSTANTS
const __m128 nearest2 = _mm_castsi128_ps( _mm_srli_epi32( _mm_cmpeq_epi32( _mm_setzero_si128(), _mm_setzero_si128() ), 2 ) );
#else
const __m128 nearest2 = _mm_set1_ps( 1.99999988079071044921875f );
#endif
__m128 aTrunc = _mm_cvtepi32_ps( _mm_cvttps_epi32( a ) ); // truncate a
__m128 rmd = _mm_sub_ps( a, aTrunc ); // get remainder
__m128 rmd2 = _mm_mul_ps( rmd, nearest2 ); // mul remainder by near 2 will yield the needed offset
__m128 rmd2Trunc = _mm_cvtepi32_ps( _mm_cvttps_epi32( rmd2 ) ); // after being truncated of course
return _mm_add_ps( aTrunc, rmd2Trunc );
}
template<eLevel L = LEVEL_T, std::enable_if_t<(L >= Level_SSE41)>* = nullptr>
FS_INLINE static float32v Round_f32( float32v a )
{
return _mm_round_ps( a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC );
}
// Mask
FS_INLINE static int32v Mask_i32( int32v a, mask32v m )
{
return a & m;
}
FS_INLINE static float32v Mask_f32( float32v a, mask32v m )
{
return _mm_and_ps( a, _mm_castsi128_ps( m ) );
}
FS_INLINE static int32v NMask_i32( int32v a, mask32v m )
{
return _mm_andnot_si128( m, a );
}
FS_INLINE static float32v NMask_f32( float32v a, mask32v m )
{
return _mm_andnot_ps( _mm_castsi128_ps( m ), a );
}
FS_INLINE static bool AnyMask_bool( mask32v m )
{
return _mm_movemask_ps( _mm_castsi128_ps( m ) );
}
};
#if FASTSIMD_COMPILE_SSE
typedef SSE_T<Level_SSE> SSE;
#endif
#if FASTSIMD_COMPILE_SSE2
typedef SSE_T<Level_SSE2> SSE2;
#endif
#if FASTSIMD_COMPILE_SSE3
typedef SSE_T<Level_SSE3> SSE3;
#endif
#if FASTSIMD_COMPILE_SSSE3
typedef SSE_T<Level_SSSE3> SSSE3;
#endif
#if FASTSIMD_COMPILE_SSE41
typedef SSE_T<Level_SSE41> SSE41;
#endif
#if FASTSIMD_COMPILE_SSE42
typedef SSE_T<Level_SSE42> SSE42;
#endif
}

View File

@ -0,0 +1,429 @@
#pragma once
#include "VecTools.h"
#include <algorithm>
#include <cmath>
namespace FastSIMD
{
template<typename OUT, typename IN>
OUT ScalarCast( IN a )
{
union
{
OUT o;
IN i;
} u;
u.i = a;
return u.o;
}
struct Scalar_Float
{
FASTSIMD_INTERNAL_TYPE_SET( Scalar_Float, float );
FS_INLINE static Scalar_Float Incremented()
{
return 0.0f;
}
FS_INLINE Scalar_Float& operator+=( const Scalar_Float& rhs )
{
vector += rhs;
return *this;
}
FS_INLINE Scalar_Float& operator-=( const Scalar_Float& rhs )
{
vector -= rhs;
return *this;
}
FS_INLINE Scalar_Float& operator*=( const Scalar_Float& rhs )
{
vector *= rhs;
return *this;
}
FS_INLINE Scalar_Float& operator/=( const Scalar_Float& rhs )
{
vector /= rhs;
return *this;
}
FS_INLINE Scalar_Float& operator&=( const Scalar_Float& rhs )
{
*this = ScalarCast<float>( ScalarCast<int32_t, float>( *this ) & ScalarCast<int32_t, float>( rhs ) );
return *this;
}
FS_INLINE Scalar_Float& operator|=( const Scalar_Float& rhs )
{
*this = ScalarCast<float>( ScalarCast<int32_t, float>( *this ) | ScalarCast<int32_t, float>( rhs ) );
return *this;
}
FS_INLINE Scalar_Float& operator^=( const Scalar_Float& rhs )
{
*this = ScalarCast<float>( ScalarCast<int32_t, float>( *this ) ^ ScalarCast<int32_t, float>( rhs ) );
return *this;
}
FS_INLINE Scalar_Float operator~() const
{
return ScalarCast<float>( ~ScalarCast<int32_t, float>( *this ) );
}
FS_INLINE Scalar_Float operator-() const
{
return -vector;
}
FS_INLINE bool operator==( const Scalar_Float& rhs )
{
return vector == rhs;
}
FS_INLINE bool operator!=( const Scalar_Float& rhs )
{
return vector != rhs;
}
FS_INLINE bool operator>( const Scalar_Float& rhs )
{
return vector > rhs;
}
FS_INLINE bool operator<( const Scalar_Float& rhs )
{
return vector < rhs;
}
FS_INLINE bool operator>=( const Scalar_Float& rhs )
{
return vector >= rhs;
}
FS_INLINE bool operator<=( const Scalar_Float& rhs )
{
return vector <= rhs;
}
};
FASTSIMD_INTERNAL_OPERATORS_FLOAT( Scalar_Float )
struct Scalar_Int
{
FASTSIMD_INTERNAL_TYPE_SET( Scalar_Int, int32_t );
FS_INLINE static Scalar_Int Incremented()
{
return 0;
}
FS_INLINE Scalar_Int& operator+=( const Scalar_Int& rhs )
{
vector += rhs;
return *this;
}
FS_INLINE Scalar_Int& operator-=( const Scalar_Int& rhs )
{
vector -= rhs;
return *this;
}
FS_INLINE Scalar_Int& operator*=( const Scalar_Int& rhs )
{
vector *= rhs;
return *this;
}
FS_INLINE Scalar_Int& operator&=( const Scalar_Int& rhs )
{
vector &= rhs;
return *this;
}
FS_INLINE Scalar_Int& operator|=( const Scalar_Int& rhs )
{
vector |= rhs;
return *this;
}
FS_INLINE Scalar_Int& operator^=( const Scalar_Int& rhs )
{
vector ^= rhs;
return *this;
}
FS_INLINE Scalar_Int& operator>>=( int32_t rhs )
{
vector >>= rhs;
return *this;
}
FS_INLINE Scalar_Int& operator<<=( int32_t rhs )
{
vector <<= rhs;
return *this;
}
FS_INLINE Scalar_Int operator~() const
{
return ~vector;
}
FS_INLINE Scalar_Int operator-() const
{
return -vector;
}
FS_INLINE bool operator==( const Scalar_Int& rhs )
{
return vector == rhs;
}
FS_INLINE bool operator>( const Scalar_Int& rhs )
{
return vector > rhs;
}
FS_INLINE bool operator<( const Scalar_Int& rhs )
{
return vector < rhs;
}
};
FASTSIMD_INTERNAL_OPERATORS_INT( Scalar_Int, int32_t )
struct Scalar_Mask
{
FASTSIMD_INTERNAL_TYPE_SET( Scalar_Mask, bool );
FS_INLINE Scalar_Mask operator~() const
{
return !vector;
}
FS_INLINE Scalar_Mask& operator&=( const Scalar_Mask& rhs )
{
vector = vector && rhs;
return *this;
}
FS_INLINE Scalar_Mask& operator|=( const Scalar_Mask& rhs )
{
vector = vector || rhs;
return *this;
}
FS_INLINE Scalar_Mask operator&( const Scalar_Mask& rhs )
{
return vector && rhs;
}
FS_INLINE Scalar_Mask operator|( const Scalar_Mask& rhs )
{
return vector || rhs;
}
};
class Scalar
{
public:
static constexpr eLevel SIMD_Level = FastSIMD::Level_Scalar;
template<size_t ElementSize = 8>
static constexpr size_t VectorSize = 32 / ElementSize;
typedef Scalar_Float float32v;
typedef Scalar_Int int32v;
typedef Scalar_Mask mask32v;
// Load
FS_INLINE static float32v Load_f32( void const* p )
{
return *reinterpret_cast<float32v const*>(p);
}
FS_INLINE static int32v Load_i32( void const* p )
{
return *reinterpret_cast<int32v const*>(p);
}
// Store
FS_INLINE static void Store_f32( void* p, float32v a )
{
*reinterpret_cast<float32v*>(p) = a;
}
FS_INLINE static void Store_i32( void* p, int32v a )
{
*reinterpret_cast<int32v*>(p) = a;
}
// Cast
FS_INLINE static float32v Casti32_f32( int32v a )
{
return ScalarCast<float, int32_t>( a );
}
FS_INLINE static int32v Castf32_i32( float32v a )
{
return ScalarCast<int32_t, float>( a );
}
// Convert
FS_INLINE static float32v Converti32_f32( int32v a )
{
return static_cast<float>(a);
}
FS_INLINE static int32v Convertf32_i32( float32v a )
{
return static_cast<int32_t>(nearbyint( a ));
}
// Select
FS_INLINE static float32v Select_f32( mask32v m, float32v a, float32v b )
{
return m ? a : b;
}
FS_INLINE static int32v Select_i32( mask32v m, int32v a, int32v b )
{
return m ? a : b;
}
// Min, Max
FS_INLINE static float32v Min_f32( float32v a, float32v b )
{
return fminf( a, b );
}
FS_INLINE static float32v Max_f32( float32v a, float32v b )
{
return fmaxf( a, b );
}
FS_INLINE static int32v Min_i32( int32v a, int32v b )
{
return std::min( a, b );
}
FS_INLINE static int32v Max_i32( int32v a, int32v b )
{
return std::max( a, b );
}
// Bitwise
FS_INLINE static float32v BitwiseAndNot_f32( float32v a, float32v b )
{
return Casti32_f32( Castf32_i32( a ) & ~Castf32_i32( b ) );
}
FS_INLINE static int32v BitwiseAndNot_i32( int32v a, int32v b )
{
return a & ~b;
}
FS_INLINE static float32v BitwiseShiftRightZX_f32( float32v a, int32_t b )
{
return Casti32_f32( int32_t( uint32_t( Castf32_i32( a ) ) >> b ) );
}
FS_INLINE static int32v BitwiseShiftRightZX_i32( int32v a, int32_t b )
{
return int32_t( uint32_t( a ) >> b );
}
// Abs
FS_INLINE static float32v Abs_f32( float32v a )
{
return fabsf( a );
}
FS_INLINE static int32v Abs_i32( int32v a )
{
return abs( a );
}
// Float math
FS_INLINE static float32v Sqrt_f32( float32v a )
{
return sqrtf( a );
}
FS_INLINE static float32v InvSqrt_f32( float32v a )
{
float xhalf = 0.5f * (float)a;
a = Casti32_f32( 0x5f3759df - ((int32_t)Castf32_i32( a ) >> 1) );
a *= (1.5f - xhalf * (float)a * (float)a);
return a;
}
FS_INLINE static float32v Reciprocal_f32( float32v a )
{
// pow( pow(x,-0.5), 2 ) = pow( x, -1 ) = 1.0 / x
a = Casti32_f32( (0xbe6eb3beU - (int32_t)Castf32_i32( a )) >> 1 );
return a * a;
}
// Floor, Ceil, Round
FS_INLINE static float32v Floor_f32( float32v a )
{
return floorf( a );
}
FS_INLINE static float32v Ceil_f32( float32v a )
{
return ceilf( a );
}
FS_INLINE static float32v Round_f32( float32v a )
{
return nearbyintf( a );
}
// Mask
FS_INLINE static int32v Mask_i32( int32v a, mask32v m )
{
return m ? a : int32v(0);
}
FS_INLINE static float32v Mask_f32( float32v a, mask32v m )
{
return m ? a : float32v(0);
}
FS_INLINE static int32v NMask_i32( int32v a, mask32v m )
{
return m ? int32v(0) : a;
}
FS_INLINE static float32v NMask_f32( float32v a, mask32v m )
{
return m ? float32v(0) : a;
}
FS_INLINE static bool AnyMask_bool( mask32v m )
{
return m;
}
};
}

View File

@ -0,0 +1,22 @@
#pragma once
#include "FastSIMD/FastSIMD.h"
#include "FastSIMD/TypeList.h"
template<typename CLASS, typename FS>
class FS_T;
template<typename CLASS, FastSIMD::eLevel LEVEL>
CLASS* FastSIMD::ClassFactory()
{
if constexpr( ( CLASS::Supported_SIMD_Levels & LEVEL & FastSIMD::COMPILED_SIMD_LEVELS ) != 0 )
{
static_assert( std::is_base_of_v<CLASS, FS_T<CLASS, FS_SIMD_CLASS>> );
return new FS_T<CLASS, FS_SIMD_CLASS>;
}
return nullptr;
}
#define FASTSIMD_BUILD_CLASS( CLASS ) \
template CLASS* FastSIMD::ClassFactory<CLASS, FS_SIMD_CLASS::SIMD_Level>();
#include "../FastSIMD_BuildList.inl"

View File

@ -0,0 +1,66 @@
#pragma once
#include <cinttypes>
#include "FastSIMD/FastSIMD.h"
#include "FastSIMD/FunctionList.h"
#define FASTSIMD_INTERNAL_TYPE_SET( CLASS, TYPE ) \
TYPE vector; \
FS_INLINE CLASS() { } \
FS_INLINE CLASS( const TYPE& v ) : vector(v) {}; \
FS_INLINE CLASS& operator = ( const TYPE& v ) { vector = v; return *this; } \
FS_INLINE operator TYPE() const { return vector; }
#define FASTSIMD_INTERNAL_OPERATOR( TYPE, TYPE2, OPERATOR, OPERATOREQUALS ) \
FS_INLINE static TYPE operator OPERATOR ( TYPE lhs, TYPE2 rhs ) \
{ \
lhs OPERATOREQUALS rhs; \
return lhs; \
}
#define FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE, TYPE2, OPERATOR, OPERATOREQUALS ) \
template<FastSIMD::eLevel L> \
FS_INLINE static TYPE operator OPERATOR ( TYPE lhs, TYPE2 rhs ) \
{ \
lhs OPERATOREQUALS rhs; \
return lhs; \
}
#define FASTSIMD_INTERNAL_OPERATORS_FLOAT( TYPE ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, +, += ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, -, -= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, *, *= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, /, /= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, &, &= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, |, |= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, ^, ^= )
#define FASTSIMD_INTERNAL_OPERATORS_FLOAT_TEMPLATED( TYPE ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, +, += ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, -, -= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, *, *= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, /, /= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, &, &= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, |, |= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, ^, ^= )
#define FASTSIMD_INTERNAL_OPERATORS_INT( TYPE, TYPE2 ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, +, += ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, -, -= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, *, *= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, &, &= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, |, |= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, const TYPE&, ^, ^= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, TYPE2, >>, >>= ) \
FASTSIMD_INTERNAL_OPERATOR( TYPE, TYPE2, <<, <<= )
#define FASTSIMD_INTERNAL_OPERATORS_INT_TEMPLATED( TYPE, TYPE2 ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, +, += ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, -, -= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, *, *= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, &, &= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, |, |= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, const TYPE<L>&, ^, ^= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, TYPE2, >>, >>= ) \
FASTSIMD_INTERNAL_OPERATOR_TEMPLATED( TYPE<L>, TYPE2, <<, <<= )

View File

@ -0,0 +1,29 @@
#include "fast_noise_2.h"
FastNoise2::FastNoise2() {
// TODO Testing
set_encoded_node_tree("DQAFAAAAAAAAQAgAAAAAAD8=");
}
void FastNoise2::set_encoded_node_tree(String data) {
CharString cs = data.utf8();
_generator = FastNoise::NewFromEncodedNodeTree(cs.get_data());
}
String FastNoise2::get_encoded_node_tree() const {
// TODO
return "";
}
void FastNoise2::get_noise_2d(unsigned int count, const float *src_x, const float *src_y, float *dst) {
_generator->GenPositionArray2D(dst, count, src_x, src_y, 0, 0, _seed);
}
void FastNoise2::get_noise_3d(
unsigned int count, const float *src_x, const float *src_y, const float *src_z, float *dst) {
_generator->GenPositionArray3D(dst, count, src_x, src_y, src_z, 0, 0, 0, _seed);
}
void FastNoise2::_bind_methods() {
// TODO
}

26
util/noise/fast_noise_2.h Normal file
View File

@ -0,0 +1,26 @@
#ifndef VOXEL_FAST_NOISE_2_H
#define VOXEL_FAST_NOISE_2_H
#include "FastNoise/FastNoise.h"
#include <core/resource.h>
// Can't call it FastNoise? because FastNoise is a namespace already
class FastNoise2 : public Resource {
GDCLASS(FastNoise2, Resource)
public:
FastNoise2();
void set_encoded_node_tree(String data);
String get_encoded_node_tree() const;
void get_noise_2d(unsigned int count, const float *src_x, const float *src_y, float *dst);
void get_noise_3d(unsigned int count, const float *src_x, const float *src_y, const float *src_z, float *dst);
private:
static void _bind_methods();
FastNoise::SmartNode<> _generator;
int _seed = 1337;
};
#endif // VOXEL_FAST_NOISE_2_H