From d022964a25572218c81364d24dfb2aa5a2dedfab Mon Sep 17 00:00:00 2001
From: Rye Mutt <rye@alchemyviewer.org>
Date: Mon, 24 Aug 2020 01:12:52 -0400
Subject: [PATCH] Use _mm_setzero and _mm_set1 instead of static global
 variables for the SIMD epsilon and zero vectors as this generates better code
 under modern compilers

---
 indra/llappearance/llpolymorph.cpp |  7 ++++---
 indra/llmath/llsimdtypes.h         |  5 ++---
 indra/llmath/llvector4a.cpp        | 15 ++-------------
 indra/llmath/llvector4a.h          | 10 ++++------
 4 files changed, 12 insertions(+), 25 deletions(-)

diff --git a/indra/llappearance/llpolymorph.cpp b/indra/llappearance/llpolymorph.cpp
index 3f0423fc498..072efeeee34 100644
--- a/indra/llappearance/llpolymorph.cpp
+++ b/indra/llappearance/llpolymorph.cpp
@@ -445,7 +445,7 @@ LLVector4a LLPolyMorphTarget::getVertexDistortion(S32 requested_index, LLPolyMes
 //-----------------------------------------------------------------------------
 const LLVector4a *LLPolyMorphTarget::getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh)
 {
-	if (!mMorphData) return &LLVector4a::getZero();
+	if (!mMorphData) return nullptr;
 
 	LLVector4a* resultVec;
 	mMorphData->mCurrentIndex = 0;
@@ -471,7 +471,7 @@ const LLVector4a *LLPolyMorphTarget::getFirstDistortion(U32 *index, LLPolyMesh *
 //-----------------------------------------------------------------------------
 const LLVector4a *LLPolyMorphTarget::getNextDistortion(U32 *index, LLPolyMesh **poly_mesh)
 {
-	if (!mMorphData) return &LLVector4a::getZero();
+	if (!mMorphData) return nullptr;
 
 	LLVector4a* resultVec;
 	mMorphData->mCurrentIndex++;
@@ -517,7 +517,8 @@ const LLVector4a& LLPolyMorphTarget::getAvgDistortion()
 	}
 	else 
 	{
-		return LLVector4a::getZero();
+        static LLVector4a zero = LLVector4a::getZero();
+		return zero;
 	}
 }
 
diff --git a/indra/llmath/llsimdtypes.h b/indra/llmath/llsimdtypes.h
index 5290630ce58..805da7ce121 100644
--- a/indra/llmath/llsimdtypes.h
+++ b/indra/llmath/llsimdtypes.h
@@ -64,10 +64,9 @@ class LLSimdScalar
 		mQ = _mm_set_ss(f); 
 	}
 
-	static inline const LLSimdScalar& getZero()
+	static inline LLSimdScalar getZero()
 	{
-		extern const LLQuad F_ZERO_4A;
-		return reinterpret_cast<const LLSimdScalar&>(F_ZERO_4A);
+		return _mm_setzero_ps();
 	}
 
 	inline F32 getF32() const;
diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp
index 570fa41a439..25df427633d 100644
--- a/indra/llmath/llvector4a.cpp
+++ b/indra/llmath/llvector4a.cpp
@@ -28,17 +28,6 @@
 #include "llmath.h"
 #include "llquantize.h"
 
-extern const LLQuad F_ZERO_4A		= { 0, 0, 0, 0 };
-extern const LLQuad F_APPROXIMATELY_ZERO_4A = { 
-	F_APPROXIMATELY_ZERO,
-	F_APPROXIMATELY_ZERO,
-	F_APPROXIMATELY_ZERO,
-	F_APPROXIMATELY_ZERO
-};
-
-extern const LLVector4a LL_V4A_ZERO = reinterpret_cast<const LLVector4a&> ( F_ZERO_4A );
-extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F_APPROXIMATELY_ZERO_4A );
-
 /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes)
 {
         ll_memcpy_nonaliased_aligned_16((char*)dst, (char*)src, bytes);
@@ -128,7 +117,7 @@ void LLVector4a::quantize8( const LLVector4a& low, const LLVector4a& high )
 	{
 		LLVector4a maxError; maxError.setMul(delta, *reinterpret_cast<const LLVector4a*>(F_OOU8MAX_4A));
 		LLVector4a absVal; absVal.setAbs( val );
-		setSelectWithMask( absVal.lessThan( maxError ), F_ZERO_4A, val );
+		setSelectWithMask( absVal.lessThan( maxError ), getZero(), val );
 	}	
 }
 
@@ -175,6 +164,6 @@ void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high )
 	{
 		LLVector4a maxError; maxError.setMul(delta, *reinterpret_cast<const LLVector4a*>(F_OOU16MAX_4A));
 		LLVector4a absVal; absVal.setAbs( val );
-		setSelectWithMask( absVal.lessThan( maxError ), F_ZERO_4A, val );
+		setSelectWithMask( absVal.lessThan( maxError ), getZero(), val );
 	}	
 }
diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h
index 043a59a750a..19f29d59b3a 100644
--- a/indra/llmath/llvector4a.h
+++ b/indra/llmath/llvector4a.h
@@ -70,17 +70,15 @@ class LLVector4a
 	}
 
 	// Return a vector of all zeros
-	static inline const LLVector4a& getZero()
+	static inline LLVector4a getZero()
 	{
-		extern const LLVector4a LL_V4A_ZERO;
-		return LL_V4A_ZERO;
+		return _mm_setzero_ps();
 	}
 	
 	// Return a vector of all epsilon, where epsilon is a small float suitable for approximate equality checks
-	static inline const LLVector4a& getEpsilon()
+	static inline LLVector4a getEpsilon()
 	{
-		extern const LLVector4a LL_V4A_EPSILON;
-		return LL_V4A_EPSILON;
+		return _mm_set1_ps(F_APPROXIMATELY_ZERO);
 	}
 
 	// Copy 16 bytes from src to dst. Source and destination must be 16-byte aligned
-- 
GitLab