From f3a8858e09d27495cf1bbad2f4d7ec4f5047ed94 Mon Sep 17 00:00:00 2001 From: hybrid Date: Mon, 3 Sep 2007 15:15:12 +0000 Subject: [PATCH] Fixed inline assembler for gcc in irrMath. Removed some code using the irrMath functions. git-svn-id: svn://svn.code.sf.net/p/irrlicht/code/trunk@894 dfc29bdd-3216-0410-991c-e03cc46cb475 --- include/irrMath.h | 156 +++++++++++--------- source/Irrlicht/CAttributeImpl.h | 2 +- source/Irrlicht/COpenGLExtensionHandler.cpp | 4 +- source/Irrlicht/CQ3LevelMesh.cpp | 23 +-- 4 files changed, 99 insertions(+), 86 deletions(-) diff --git a/include/irrMath.h b/include/irrMath.h index 58262596..9d0fdd7a 100644 --- a/include/irrMath.h +++ b/include/irrMath.h @@ -232,27 +232,30 @@ namespace core } -#ifdef IRRLICHT_FAST_MATH REALINLINE void clearFPUException () { +#ifdef feclearexcept + feclearexcept(FE_ALL_EXCEPT); +#elif defined(_MSC_VER) __asm fnclex; +#elif defined(__GNUC__) + __asm__ __volatile__ ("fclex \n\t"); +#else +# warn clearFPUException not supported. +#endif } -// comes from Nvidia -#if 1 REALINLINE f32 reciprocal_squareroot(const f32 x) { +#ifdef IRRLICHT_FAST_MATH + // comes from Nvidia +#if 1 u32 tmp = (u32(IEEE_1_0 << 1) + IEEE_1_0 - *(u32*)&x) >> 1; f32 y = *(f32*)&tmp; return y * (1.47f - 0.47f * x * y * y); - } -#endif - -// an sse2 version -#if 0 - REALINLINE f32 reciprocal_squareroot(const f32 x) - { +#else + // an sse2 version __asm { movss xmm0, x @@ -260,117 +263,137 @@ namespace core movss x, xmm0 } return x; - } #endif +#else // no fast math + return 1.f / sqrtf ( x ); +#endif + } + - //! i do not divide through 0.. (fpu expection) - // instead set f to a high value to get a return value near zero.. - // -1000000000000.f.. is use minus to stay negative.. - // must test's here (plane.normal dot anything ) checks on <= 0.f REALINLINE f32 reciprocal ( const f32 f ) { +#ifdef IRRLICHT_FAST_MATH + //! i do not divide through 0.. (fpu expection) + // instead set f to a high value to get a return value near zero.. + // -1000000000000.f.. is use minus to stay negative.. + // must test's here (plane.normal dot anything ) checks on <= 0.f return 1.f / f; //u32 x = (-(AIR(f) != 0 ) >> 31 ) & ( IR(f) ^ 0xd368d4a5 ) ^ 0xd368d4a5; //return 1.f / FR ( x ); +#else // no fast math + return 1.f / f; +#endif } + REALINLINE f32 reciprocal_approxim ( const f32 p ) { +#ifdef IRRLICHT_FAST_MATH register u32 x = 0x7F000000 - IR ( p ); const f32 r = FR ( x ); return r * (2.0f - p * r); +#else // no fast math + return 1.f / p; +#endif } REALINLINE s32 floor32(f32 x) { +#ifdef IRRLICHT_FAST_MATH const f32 h = 0.5f; s32 t; +#if defined(_MSC_VER) __asm { - fld x + fld x fsub h - fistp t + fistp t } - +#elif defined(__GNUC__) + __asm__ __volatile__ ( + "fsub %2 \n\t" + "fistpl %0" + : "=m" (t) + : "t" (x), "f" (h) + : "st" + ); +#else +# warn IRRLICHT_FAST_MATH not supported. + return (s32) floorf ( x ); +#endif return t; +#else // no fast math + return (s32) floorf ( x ); +#endif } + REALINLINE s32 ceil32 ( f32 x ) { +#ifdef IRRLICHT_FAST_MATH const f32 h = 0.5f; s32 t; +#if defined(_MSC_VER) __asm { - fld x + fld x fadd h - fistp t + fistp t } - +#elif defined(__GNUC__) + __asm__ __volatile__ ( + "fadd %2 \n\t" + "fistpl %0 \n\t" + : "=m"(t) + : "t"(x), "f"(h) + : "st" + ); +#else +# warn IRRLICHT_FAST_MATH not supported. + return (s32) ceilf ( x ); +#endif return t; - +#else // not fast math + return (s32) ceilf ( x ); +#endif } + REALINLINE s32 round32(f32 x) { +#if defined(IRRLICHT_FAST_MATH) s32 t; +#if defined(_MSC_VER) __asm { fld x fistp t } - - return t; - } - - +#elif defined(__GNUC__) + __asm__ __volatile__ ( + "fistpl %0 \n\t" + : "=m"(t) + : "t"(x) + : "st" + ); #else - - REALINLINE void clearFPUException () - { +# warn IRRLICHT_FAST_MATH not supported. + return (s32) round(x); +#endif + return t; +#else // no fast math + return (s32) round(x); +#endif } - - inline f32 reciprocal_squareroot(const f32 x) - { - return 1.f / sqrtf ( x ); - } - - - inline f32 reciprocal ( const f32 x ) - { - return 1.f / x; - } - - inline f32 reciprocal_approxim ( const f32 x ) - { - return 1.f / x; - } - - - inline s32 floor32 ( f32 x ) - { - return (s32) floorf ( x ); - } - - inline s32 ceil32 ( f32 x ) - { - return (s32) ceilf ( x ); - } - - inline s32 round32 ( f32 x ) - { - return (s32) ( x + 0.5f ); - } - - inline f32 f32_max3(const f32 a, const f32 b, const f32 c) { return a > b ? (a > c ? a : c) : (b > c ? b : c); @@ -381,8 +404,6 @@ namespace core return a < b ? (a < c ? a : c) : (b < c ? b : c); } -#endif - inline f32 fract ( f32 x ) { return x - floorf ( x ); @@ -390,10 +411,9 @@ namespace core inline f32 round ( f32 x ) { - return floorf ( x + 0.5f ); + return ::round(x); } - } // end namespace core } // end namespace irr diff --git a/source/Irrlicht/CAttributeImpl.h b/source/Irrlicht/CAttributeImpl.h index 08931984..b7f6a884 100644 --- a/source/Irrlicht/CAttributeImpl.h +++ b/source/Irrlicht/CAttributeImpl.h @@ -1957,7 +1957,7 @@ public: virtual void setString(const char* text) { - sscanf(text, "0x%x", (int*)(&Value)); + sscanf(text, "0x%x", (unsigned int*)(&Value)); } virtual E_ATTRIBUTE_TYPE getType() const diff --git a/source/Irrlicht/COpenGLExtensionHandler.cpp b/source/Irrlicht/COpenGLExtensionHandler.cpp index 3caa76ba..b2420365 100644 --- a/source/Irrlicht/COpenGLExtensionHandler.cpp +++ b/source/Irrlicht/COpenGLExtensionHandler.cpp @@ -61,7 +61,7 @@ void COpenGLExtensionHandler::dump() const void COpenGLExtensionHandler::initExtensions(bool stencilBuffer) { const f32 ver = core::fast_atof(reinterpret_cast(glGetString(GL_VERSION))); - Version = core::floor32(ver)*100+core::ceil32((ver-floor(ver))*10.0f); + Version = core::floor32(ver)*100+core::ceil32(core::fract(ver)*10.0f); if ( Version >= 102) os::Printer::log("OpenGL driver version is 1.2 or better.", ELL_INFORMATION); else @@ -375,7 +375,7 @@ void COpenGLExtensionHandler::initExtensions(bool stencilBuffer) else { const f32 ver = core::fast_atof(reinterpret_cast(shaderVersion)); - ShaderLanguageVersion = core::floor32(ver)*100+core::ceil32((ver-floor(ver))*10.0f); + ShaderLanguageVersion = core::floor32(ver)*100+core::ceil32(core::fract(ver)*10.0f); } } #endif diff --git a/source/Irrlicht/CQ3LevelMesh.cpp b/source/Irrlicht/CQ3LevelMesh.cpp index 5a4d65c7..af2407fa 100644 --- a/source/Irrlicht/CQ3LevelMesh.cpp +++ b/source/Irrlicht/CQ3LevelMesh.cpp @@ -989,13 +989,6 @@ inline f32 CQ3LevelMesh::Blend( const f64 s[3], const f64 t[3], const tBSPVertex return (f32) res; } -//!helper function -inline s32 s32_min ( s32 a, s32 b) -{ - s32 mask = (a - b) >> 31; - return (a & mask) | (b & ~mask); -} - void CQ3LevelMesh::S3DVertex2TCoords_64::copyto ( video::S3DVertex2TCoords &dest ) const { dest.Pos.X = core::round ( (f32) Pos.X ); @@ -1041,10 +1034,10 @@ void CQ3LevelMesh::copy ( S3DVertex2TCoords_64 * dest, const tBSPVertex * source if ( vertexcolor ) { - u32 a = s32_min ( source->color[3] * quake3::defaultModulate, 255 ); - u32 r = s32_min ( source->color[0] * quake3::defaultModulate, 255 ); - u32 g = s32_min ( source->color[1] * quake3::defaultModulate, 255 ); - u32 b = s32_min ( source->color[2] * quake3::defaultModulate, 255 ); + u32 a = core::s32_min ( source->color[3] * quake3::defaultModulate, 255 ); + u32 r = core::s32_min ( source->color[0] * quake3::defaultModulate, 255 ); + u32 g = core::s32_min ( source->color[1] * quake3::defaultModulate, 255 ); + u32 b = core::s32_min ( source->color[2] * quake3::defaultModulate, 255 ); dest->Color.set ( a * 1.f/255.f, r * 1.f/255.f, @@ -1082,10 +1075,10 @@ inline void CQ3LevelMesh::copy ( video::S3DVertex2TCoords * dest, const tBSPVert if ( vertexcolor ) { - u32 a = s32_min ( source->color[3] * quake3::defaultModulate, 255 ); - u32 r = s32_min ( source->color[0] * quake3::defaultModulate, 255 ); - u32 g = s32_min ( source->color[1] * quake3::defaultModulate, 255 ); - u32 b = s32_min ( source->color[2] * quake3::defaultModulate, 255 ); + u32 a = core::s32_min ( source->color[3] * quake3::defaultModulate, 255 ); + u32 r = core::s32_min ( source->color[0] * quake3::defaultModulate, 255 ); + u32 g = core::s32_min ( source->color[1] * quake3::defaultModulate, 255 ); + u32 b = core::s32_min ( source->color[2] * quake3::defaultModulate, 255 ); dest->Color.color = a << 24 | r << 16 | g << 8 | b; }