Partial revert

549c4f49 · Rye Mutt · bcf61b3e · 549c4f49 · 549c4f49 · 549c4f49
Commit 549c4f49 authored 2 years ago by Rye Mutt
--- a/indra/llmath/llquaternion2.inl
+++ b/indra/llmath/llquaternion2.inl
@@ -49,6 +49,8 @@ inline LLVector4a& LLQuaternion2::getVector4aRw()

 inline void LLQuaternion2::mul(const LLQuaternion2& b)
 {
+	static LL_ALIGN_16(const unsigned int signMask[4]) = { 0x0, 0x0, 0x0, 0x80000000 };
+
 	LLVector4a sum1, sum2, prod1, prod2, prod3, prod4;
 	const LLVector4a& va = mQ;
 	const LLVector4a& vb = b.getVector4a();
@@ -73,7 +75,7 @@ inline void LLQuaternion2::mul(const LLQuaternion2& b)
 	prod4.setMul(Bzxyz,Ayzxz);

 	sum1.setAdd(prod2,prod3);
-	sum1 = _mm_xor_ps(sum1, _mm_castsi128_ps(_mm_set_epi32(0x0, 0x0, 0x0, 0x80000000)));	
+	sum1 = _mm_xor_ps(sum1, _mm_load_ps((const float*)signMask));	
 	sum2.setSub(prod1,prod4);
 	mQ.setAdd(sum1,sum2);
 }
@@ -85,7 +87,8 @@ inline void LLQuaternion2::mul(const LLQuaternion2& b)
 // Set this quaternion to the conjugate of src
 inline void LLQuaternion2::setConjugate(const LLQuaternion2& src)
 {
-	mQ = _mm_xor_ps(src.mQ, _mm_castsi128_ps(_mm_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x00000000)));	
+	static LL_ALIGN_16( const U32 F_QUAT_INV_MASK_4A[4] ) = { 0x80000000, 0x80000000, 0x80000000, 0x00000000 };
+	mQ = _mm_xor_ps(src.mQ, *reinterpret_cast<const LLQuad*>(&F_QUAT_INV_MASK_4A));	
 }

 // Renormalizes the quaternion. Assumes it has nonzero length.

--- a/indra/llmath/llsimdtypes.inl
+++ b/indra/llmath/llsimdtypes.inl
@@ -61,7 +61,7 @@ inline LLSimdScalar operator/(const LLSimdScalar& a, const LLSimdScalar& b)

 inline LLSimdScalar operator-(const LLSimdScalar& a)
 {
-	LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+	static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 };
 	ll_assert_aligned(signMask,16);
 	return _mm_xor_ps(*reinterpret_cast<const LLQuad*>(signMask), a);
 }
@@ -146,7 +146,9 @@ inline LLSimdScalar& LLSimdScalar::operator/=(const LLSimdScalar& rhs)

 inline LLSimdScalar LLSimdScalar::getAbs() const
 {
-	return _mm_and_ps( mQ, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)));
+	static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
+	ll_assert_aligned(F_ABS_MASK_4A,16);
+	return _mm_and_ps( mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A));
 }

 inline F32 LLSimdScalar::getF32() const

--- a/indra/llmath/llvector4a.inl
+++ b/indra/llmath/llvector4a.inl
@@ -244,7 +244,8 @@ inline void LLVector4a::setDiv(const LLVector4a& a, const LLVector4a& b)
 // Set this to the element-wise absolute value of src
 inline void LLVector4a::setAbs(const LLVector4a& src)
 {
-	mQ = _mm_and_ps(src.mQ, _mm_castsi128_ps(_mm_set_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF)));
+	static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
+	mQ = _mm_and_ps(src.mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A));
 }

 // Add to each component in this vector the corresponding component in rhs
@@ -528,7 +529,9 @@ inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F3

 inline LLBool32 LLVector4a::isFinite3() const
 {
-	const __m128i nanOrInfMaskV = _mm_set_epi32(0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000);
+	static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
+	ll_assert_aligned(nanOrInfMask,16);
+	const __m128i nanOrInfMaskV = *reinterpret_cast<const __m128i*> (nanOrInfMask);
 	const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV );
 	const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV ));
 	return !equalityCheck.areAnySet( LLVector4Logical::MASK_XYZ );
@@ -536,7 +539,8 @@ inline LLBool32 LLVector4a::isFinite3() const
 	
 inline LLBool32 LLVector4a::isFinite4() const
 {
-	const __m128i nanOrInfMaskV = _mm_set_epi32(0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000);
+	static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
+	const __m128i nanOrInfMaskV = *reinterpret_cast<const __m128i*> (nanOrInfMask);
 	const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV );
 	const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV ));
 	return !equalityCheck.areAnySet( LLVector4Logical::MASK_XYZW );
@@ -565,8 +569,8 @@ inline void LLVector4a::clamp( const LLVector4a& low, const LLVector4a& high )

 inline void LLVector4a::negate()
 {
-	const __m128i signMask = _mm_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000);
-	mQ = _mm_xor_ps(_mm_castsi128_ps(signMask), mQ);
+	static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+	mQ = _mm_xor_ps(*reinterpret_cast<const LLQuad*>(signMask), mQ);
 }

 inline void LLVector4a::setMoveHighLow(const LLVector4a& rhs)

--- a/indra/llmath/llvector4logical.h
+++ b/indra/llmath/llvector4logical.h
@@ -77,7 +77,9 @@ class LLVector4Logical
 	// Invert this mask
 	inline LLVector4Logical& invert()
 	{
-		mQ = _mm_andnot_ps( mQ, _mm_castsi128_ps(_mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)));
+		static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
+		ll_assert_aligned(allOnes,16);
+		mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) );
 		return *this;
 	}
 	

--- a/indra/newview/VIEWER_PROJECT_CODENAME.txt
+++ b/indra/newview/VIEWER_PROJECT_CODENAME.txt
-Lightbox
\ No newline at end of file
+AgileAkita
\ No newline at end of file