From d022964a25572218c81364d24dfb2aa5a2dedfab Mon Sep 17 00:00:00 2001 From: Rye Mutt <rye@alchemyviewer.org> Date: Mon, 24 Aug 2020 01:12:52 -0400 Subject: [PATCH] Use _mm_setzero and _mm_set1 instead of static global variables for the SIMD epsilon and zero vectors as this generates better code under modern compilers --- indra/llappearance/llpolymorph.cpp | 7 ++++--- indra/llmath/llsimdtypes.h | 5 ++--- indra/llmath/llvector4a.cpp | 15 ++------------- indra/llmath/llvector4a.h | 10 ++++------ 4 files changed, 12 insertions(+), 25 deletions(-) diff --git a/indra/llappearance/llpolymorph.cpp b/indra/llappearance/llpolymorph.cpp index 3f0423fc498..072efeeee34 100644 --- a/indra/llappearance/llpolymorph.cpp +++ b/indra/llappearance/llpolymorph.cpp @@ -445,7 +445,7 @@ LLVector4a LLPolyMorphTarget::getVertexDistortion(S32 requested_index, LLPolyMes //----------------------------------------------------------------------------- const LLVector4a *LLPolyMorphTarget::getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh) { - if (!mMorphData) return &LLVector4a::getZero(); + if (!mMorphData) return nullptr; LLVector4a* resultVec; mMorphData->mCurrentIndex = 0; @@ -471,7 +471,7 @@ const LLVector4a *LLPolyMorphTarget::getFirstDistortion(U32 *index, LLPolyMesh * //----------------------------------------------------------------------------- const LLVector4a *LLPolyMorphTarget::getNextDistortion(U32 *index, LLPolyMesh **poly_mesh) { - if (!mMorphData) return &LLVector4a::getZero(); + if (!mMorphData) return nullptr; LLVector4a* resultVec; mMorphData->mCurrentIndex++; @@ -517,7 +517,8 @@ const LLVector4a& LLPolyMorphTarget::getAvgDistortion() } else { - return LLVector4a::getZero(); + static LLVector4a zero = LLVector4a::getZero(); + return zero; } } diff --git a/indra/llmath/llsimdtypes.h b/indra/llmath/llsimdtypes.h index 5290630ce58..805da7ce121 100644 --- a/indra/llmath/llsimdtypes.h +++ b/indra/llmath/llsimdtypes.h @@ -64,10 +64,9 @@ class LLSimdScalar mQ = _mm_set_ss(f); } - static inline const LLSimdScalar& getZero() + static inline LLSimdScalar getZero() { - extern const LLQuad F_ZERO_4A; - return reinterpret_cast<const LLSimdScalar&>(F_ZERO_4A); + return _mm_setzero_ps(); } inline F32 getF32() const; diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 570fa41a439..25df427633d 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -28,17 +28,6 @@ #include "llmath.h" #include "llquantize.h" -extern const LLQuad F_ZERO_4A = { 0, 0, 0, 0 }; -extern const LLQuad F_APPROXIMATELY_ZERO_4A = { - F_APPROXIMATELY_ZERO, - F_APPROXIMATELY_ZERO, - F_APPROXIMATELY_ZERO, - F_APPROXIMATELY_ZERO -}; - -extern const LLVector4a LL_V4A_ZERO = reinterpret_cast<const LLVector4a&> ( F_ZERO_4A ); -extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F_APPROXIMATELY_ZERO_4A ); - /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { ll_memcpy_nonaliased_aligned_16((char*)dst, (char*)src, bytes); @@ -128,7 +117,7 @@ void LLVector4a::quantize8( const LLVector4a& low, const LLVector4a& high ) { LLVector4a maxError; maxError.setMul(delta, *reinterpret_cast<const LLVector4a*>(F_OOU8MAX_4A)); LLVector4a absVal; absVal.setAbs( val ); - setSelectWithMask( absVal.lessThan( maxError ), F_ZERO_4A, val ); + setSelectWithMask( absVal.lessThan( maxError ), getZero(), val ); } } @@ -175,6 +164,6 @@ void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high ) { LLVector4a maxError; maxError.setMul(delta, *reinterpret_cast<const LLVector4a*>(F_OOU16MAX_4A)); LLVector4a absVal; absVal.setAbs( val ); - setSelectWithMask( absVal.lessThan( maxError ), F_ZERO_4A, val ); + setSelectWithMask( absVal.lessThan( maxError ), getZero(), val ); } } diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 043a59a750a..19f29d59b3a 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -70,17 +70,15 @@ class LLVector4a } // Return a vector of all zeros - static inline const LLVector4a& getZero() + static inline LLVector4a getZero() { - extern const LLVector4a LL_V4A_ZERO; - return LL_V4A_ZERO; + return _mm_setzero_ps(); } // Return a vector of all epsilon, where epsilon is a small float suitable for approximate equality checks - static inline const LLVector4a& getEpsilon() + static inline LLVector4a getEpsilon() { - extern const LLVector4a LL_V4A_EPSILON; - return LL_V4A_EPSILON; + return _mm_set1_ps(F_APPROXIMATELY_ZERO); } // Copy 16 bytes from src to dst. Source and destination must be 16-byte aligned -- GitLab