Fixed inline assembler for gcc in irrMath. Removed some code using the irrMath functions.

git-svn-id: svn://svn.code.sf.net/p/irrlicht/code/trunk@894 dfc29bdd-3216-0410-991c-e03cc46cb475
2007-09-03 15:15:12 +00:00 · 2007-09-03 15:15:12 +00:00 · f3a8858e09
parent 8b0f65e138
commit f3a8858e09
4 changed files with 99 additions and 86 deletions
--- a/include/irrMath.h
+++ b/include/irrMath.h
@ -232,27 +232,30 @@ namespace core
 	}


-#ifdef IRRLICHT_FAST_MATH

 	REALINLINE void clearFPUException ()
 	{
+#ifdef feclearexcept
+		feclearexcept(FE_ALL_EXCEPT);
+#elif defined(_MSC_VER)
 		__asm fnclex;
+#elif defined(__GNUC__)
+		__asm__ __volatile__ ("fclex \n\t");
+#else
+#  warn clearFPUException not supported.
+#endif
 	}
 		
-// comes from Nvidia
-#if 1
 	REALINLINE f32 reciprocal_squareroot(const f32 x)
 	{
+#ifdef IRRLICHT_FAST_MATH
+		// comes from Nvidia
+#if 1
 		u32 tmp = (u32(IEEE_1_0 << 1) + IEEE_1_0 - *(u32*)&x) >> 1;   
 		f32 y = *(f32*)&tmp;                                             
 		return y * (1.47f - 0.47f * x * y * y);
-	}
-#endif
-
-// an sse2 version
-#if 0
-	REALINLINE f32 reciprocal_squareroot(const f32 x)
-	{
+#else
+		// an sse2 version
 		__asm
 		{
 			movss	xmm0, x
@ -260,117 +263,137 @@ namespace core
 			movss	x, xmm0
 		}
 		return x;
-	}
 #endif
+#else // no fast math
+		return 1.f / sqrtf ( x );
+#endif
+	}
+


-	//! i do not divide through 0.. (fpu expection)
-	// instead set f to a high value to get a return value near zero..
-	// -1000000000000.f.. is use minus to stay negative..
-	// must test's here (plane.normal dot anything ) checks on <= 0.f
 	REALINLINE f32 reciprocal ( const f32 f )
 	{
+#ifdef IRRLICHT_FAST_MATH
+		//! i do not divide through 0.. (fpu expection)
+		// instead set f to a high value to get a return value near zero..
+		// -1000000000000.f.. is use minus to stay negative..
+		// must test's here (plane.normal dot anything ) checks on <= 0.f
 		return 1.f / f;
 		//u32 x = (-(AIR(f) != 0 ) >> 31 ) & ( IR(f) ^ 0xd368d4a5 ) ^ 0xd368d4a5;
 		//return 1.f / FR ( x );
+#else // no fast math
+		return 1.f / f;
+#endif
 	}

+
 	REALINLINE f32 reciprocal_approxim ( const f32 p )
 	{
+#ifdef IRRLICHT_FAST_MATH
 		register u32 x = 0x7F000000 - IR ( p );
 		const f32 r = FR ( x );
 		return r * (2.0f - p * r);
+#else // no fast math
+		return 1.f / p;
+#endif
 	}


 	REALINLINE s32 floor32(f32 x)
 	{
+#ifdef IRRLICHT_FAST_MATH
 		const f32 h = 0.5f;

 		s32 t;

+#if defined(_MSC_VER)
 		__asm
 		{
-			fld   x
+			fld	x
 			fsub	h
-			fistp t
+			fistp	t
 		}
-
+#elif defined(__GNUC__)
+		__asm__ __volatile__ (
+			"fsub %2 \n\t"
+			"fistpl %0"
+			: "=m" (t)
+			: "t" (x), "f" (h)
+			: "st"
+			);
+#else
+#  warn IRRLICHT_FAST_MATH not supported.
+		return (s32) floorf ( x );
+#endif
 		return t;
+#else // no fast math
+		return (s32) floorf ( x );
+#endif
 	}

+
 	REALINLINE s32 ceil32 ( f32 x )
 	{
+#ifdef IRRLICHT_FAST_MATH
 		const f32 h = 0.5f;

 		s32 t;

+#if defined(_MSC_VER)
 		__asm
 		{
-			fld   x
+			fld	x
 			fadd	h
-			fistp t
+			fistp	t
 		}
-
+#elif defined(__GNUC__)
+		__asm__ __volatile__ (
+			"fadd %2 \n\t"
+			"fistpl %0 \n\t"
+			: "=m"(t)
+			: "t"(x), "f"(h)
+			: "st"
+			);
+#else
+#  warn IRRLICHT_FAST_MATH not supported.
+		return (s32) ceilf ( x );
+#endif
 		return t;
-
+#else // not fast math
+		return (s32) ceilf ( x );
+#endif
 	}


+
 	REALINLINE s32 round32(f32 x)
 	{
+#if defined(IRRLICHT_FAST_MATH)
 		s32 t;

+#if defined(_MSC_VER)
 		__asm
 		{
 			fld   x
 			fistp t
 		}
-
-		return t;
-	}
-
-
+#elif defined(__GNUC__)
+		__asm__ __volatile__ (
+			"fistpl %0 \n\t"
+			: "=m"(t)
+			: "t"(x)
+			: "st"
+			);
 #else
-
-	REALINLINE void clearFPUException ()
-	{
+#  warn IRRLICHT_FAST_MATH not supported.
+		return (s32) round(x);
+#endif
+		return t;
+#else // no fast math
+		return (s32) round(x);
+#endif
 	}

-
-	inline f32 reciprocal_squareroot(const f32 x)
-	{
-		return 1.f / sqrtf ( x );
-	}
-
-
-	inline f32 reciprocal ( const f32 x )
-	{
-		return 1.f / x;
-	}
-
-	inline f32 reciprocal_approxim ( const f32 x )
-	{
-		return 1.f / x;
-	}
-
-
-	inline s32 floor32 ( f32 x )
-	{
-		return (s32) floorf ( x );
-	}
-
-	inline s32 ceil32 ( f32 x )
-	{
-		return (s32) ceilf ( x );
-	}
-
-	inline s32 round32 ( f32 x )
-	{
-		return (s32) ( x + 0.5f );
-	}
-
-
 	inline f32 f32_max3(const f32 a, const f32 b, const f32 c)
 	{
 		return a > b ? (a > c ? a : c) : (b > c ? b : c);
@ -381,8 +404,6 @@ namespace core
 		return a < b ? (a < c ? a : c) : (b < c ? b : c);
 	}

-#endif
-
 	inline f32 fract ( f32 x )
 	{
 		return x - floorf ( x );
@ -390,10 +411,9 @@ namespace core

 	inline f32 round ( f32 x )
 	{
-		return floorf ( x + 0.5f );
+		return ::round(x);
 	}

-
 } // end namespace core
 } // end namespace irr

--- a/source/Irrlicht/CAttributeImpl.h
+++ b/source/Irrlicht/CAttributeImpl.h
@ -1957,7 +1957,7 @@ public:

 	virtual void setString(const char* text) 
 	{
-		sscanf(text, "0x%x", (int*)(&Value));
+		sscanf(text, "0x%x", (unsigned int*)(&Value));
 	}

 	virtual E_ATTRIBUTE_TYPE getType() const
--- a/source/Irrlicht/COpenGLExtensionHandler.cpp
+++ b/source/Irrlicht/COpenGLExtensionHandler.cpp
@ -61,7 +61,7 @@ void COpenGLExtensionHandler::dump() const
 void COpenGLExtensionHandler::initExtensions(bool stencilBuffer)
 {
 	const f32 ver = core::fast_atof(reinterpret_cast<const c8*>(glGetString(GL_VERSION)));
-	Version = core::floor32(ver)*100+core::ceil32((ver-floor(ver))*10.0f);
+	Version = core::floor32(ver)*100+core::ceil32(core::fract(ver)*10.0f);
 	if ( Version >= 102)
 		os::Printer::log("OpenGL driver version is 1.2 or better.", ELL_INFORMATION);
 	else
@ -375,7 +375,7 @@ void COpenGLExtensionHandler::initExtensions(bool stencilBuffer)
 		else
 		{
 			const f32 ver = core::fast_atof(reinterpret_cast<const c8*>(shaderVersion));
-			ShaderLanguageVersion = core::floor32(ver)*100+core::ceil32((ver-floor(ver))*10.0f);
+			ShaderLanguageVersion = core::floor32(ver)*100+core::ceil32(core::fract(ver)*10.0f);
 		}
 	}
 #endif
--- a/source/Irrlicht/CQ3LevelMesh.cpp
+++ b/source/Irrlicht/CQ3LevelMesh.cpp
@ -989,13 +989,6 @@ inline f32 CQ3LevelMesh::Blend( const f64 s[3], const f64 t[3], const tBSPVertex
 	return (f32) res;
 }

-//!helper function 
-inline s32 s32_min ( s32 a, s32 b)
-{
-	s32 mask = (a - b) >> 31;
-	return (a & mask) | (b & ~mask);
-}
-
 void CQ3LevelMesh::S3DVertex2TCoords_64::copyto ( video::S3DVertex2TCoords &dest ) const
 {
 	dest.Pos.X = core::round ( (f32) Pos.X );
@ -1041,10 +1034,10 @@ void CQ3LevelMesh::copy ( S3DVertex2TCoords_64 * dest, const tBSPVertex * source

 	if ( vertexcolor )
 	{
-		u32 a = s32_min ( source->color[3] * quake3::defaultModulate, 255 );
-		u32 r = s32_min ( source->color[0] * quake3::defaultModulate, 255 );
-		u32 g = s32_min ( source->color[1] * quake3::defaultModulate, 255 );
-		u32 b = s32_min ( source->color[2] * quake3::defaultModulate, 255 );
+		u32 a = core::s32_min ( source->color[3] * quake3::defaultModulate, 255 );
+		u32 r = core::s32_min ( source->color[0] * quake3::defaultModulate, 255 );
+		u32 g = core::s32_min ( source->color[1] * quake3::defaultModulate, 255 );
+		u32 b = core::s32_min ( source->color[2] * quake3::defaultModulate, 255 );

 		dest->Color.set (	a * 1.f/255.f,
 							r * 1.f/255.f,
@ -1082,10 +1075,10 @@ inline void CQ3LevelMesh::copy ( video::S3DVertex2TCoords * dest, const tBSPVert

 	if ( vertexcolor )
 	{
-		u32 a = s32_min ( source->color[3] * quake3::defaultModulate, 255 );
-		u32 r = s32_min ( source->color[0] * quake3::defaultModulate, 255 );
-		u32 g = s32_min ( source->color[1] * quake3::defaultModulate, 255 );
-		u32 b = s32_min ( source->color[2] * quake3::defaultModulate, 255 );
+		u32 a = core::s32_min ( source->color[3] * quake3::defaultModulate, 255 );
+		u32 r = core::s32_min ( source->color[0] * quake3::defaultModulate, 255 );
+		u32 g = core::s32_min ( source->color[1] * quake3::defaultModulate, 255 );
+		u32 b = core::s32_min ( source->color[2] * quake3::defaultModulate, 255 );

 		dest->Color.color = a << 24 | r << 16 | g << 8 | b;
 	}