diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h old mode 100644 new mode 100755 index bbbdaa6497fc951d41cb3b2596f0258d5ad8a9df..baf91bb96d440df59e823d53b4ed0df45674593f --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -512,4 +512,13 @@ void LLPrivateMemoryPoolTester::operator delete[](void* addr) // LLSingleton moved to llsingleton.h +#define CHECK_ALIGNMENT + +#ifdef CHECK_ALIGNMENT +#define ll_assert_aligned(ptr,alignment) llassert((reinterpret_cast<uintptr_t>(ptr))%(alignment) == 0) +#else +#define ll_assert_aligned(ptr,alignment) +#endif + + #endif diff --git a/indra/llmath/llmatrix3a.cpp b/indra/llmath/llmatrix3a.cpp old mode 100644 new mode 100755 diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h old mode 100644 new mode 100755 index c7cdf7b32cb5d8d729fb42f7ef383424e9923268..01458521ec357cf6ef2cebbef0a13ab3a3d65774 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -67,11 +67,10 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) #define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - - #include <xmmintrin.h> #include <emmintrin.h> +#include "llmemory.h" #include "llsimdtypes.h" #include "llsimdtypes.inl" diff --git a/indra/llmath/llsimdtypes.inl b/indra/llmath/llsimdtypes.inl index 712239e4258b6bb4ed2ecb41ac45db5e2908a7ab..e905c84954b2be420c29d0c792b0dbcc814e1413 100644 --- a/indra/llmath/llsimdtypes.inl +++ b/indra/llmath/llsimdtypes.inl @@ -62,6 +62,7 @@ inline LLSimdScalar operator/(const LLSimdScalar& a, const LLSimdScalar& b) inline LLSimdScalar operator-(const LLSimdScalar& a) { static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + ll_assert_aligned(signMask,16); return _mm_xor_ps(*reinterpret_cast<const LLQuad*>(signMask), a); } @@ -146,6 +147,7 @@ inline LLSimdScalar& LLSimdScalar::operator/=(const LLSimdScalar& rhs) inline LLSimdScalar LLSimdScalar::getAbs() const { static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; + ll_assert_aligned(F_ABS_MASK_4A,16); return _mm_and_ps( mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A)); } diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index b66b7a70764f414480d4ab5c5c6cf7ce2ba848ca..49b8754cd0e7f479052a9081b6ff16a2633a8312 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -24,6 +24,7 @@ * $/LicenseInfo$ */ +#include "llmemory.h" #include "llmath.h" #include "llquantize.h" @@ -189,6 +190,8 @@ void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high ) LLVector4a oneOverDelta; { static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f }; + ll_assert_aligned(F_TWO_4A,16); + LLVector4a two; two.load4a( F_TWO_4A ); // Here we use _mm_rcp_ps plus one round of newton-raphson diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 596082509df104fd09651ac3366975885f6f3d2e..eada6ed1b7636024ac2a07261d1629eba8e58524 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -32,6 +32,7 @@ class LLRotation; #include <assert.h> #include "llpreprocessor.h" +#include "llmemory.h" /////////////////////////////////// // FIRST TIME USERS PLEASE READ @@ -90,6 +91,7 @@ class LLVector4a LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary + ll_assert_aligned(this,16); } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7ad22a563156dcbd1845816d2d97aff9ead85471..7c52ffef2171c6caecd8c16af7793def0d99f74b 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -475,6 +475,7 @@ inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F3 inline LLBool32 LLVector4a::isFinite3() const { static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; + ll_assert_aligned(nanOrInfMask,16); const __m128i nanOrInfMaskV = *reinterpret_cast<const __m128i*> (nanOrInfMask); const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV ); const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV )); diff --git a/indra/llmath/llvector4logical.h b/indra/llmath/llvector4logical.h index dd66b09d4391608eda6e4affa0674b4825cc288b..c5698f7cea62b1d56084906cf18a0801792070b7 100644 --- a/indra/llmath/llvector4logical.h +++ b/indra/llmath/llvector4logical.h @@ -27,6 +27,7 @@ #ifndef LL_VECTOR4LOGICAL_H #define LL_VECTOR4LOGICAL_H +#include "llmemory.h" //////////////////////////// // LLVector4Logical @@ -77,6 +78,7 @@ class LLVector4Logical inline LLVector4Logical& invert() { static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; + ll_assert_aligned(allOnes,16); mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) ); return *this; } diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp old mode 100644 new mode 100755 index da0fa329633ebc4b3a1bf2825ec7c2b1902ab05a..113d4835bb76af06931b28c0a17f44aeaeb2762e --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -95,17 +95,6 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1; extern BOOL gDebugGL; -void assert_aligned(void* ptr, uintptr_t alignment) -{ -#if 0 - uintptr_t t = (uintptr_t) ptr; - if (t%alignment != 0) - { - llerrs << "Alignment check failed." << llendl; - } -#endif -} - BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -6894,14 +6883,14 @@ void LLVolumeFace::resizeVertices(S32 num_verts) if (num_verts) { mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - assert_aligned(mPositions, 16); + ll_assert_aligned(mPositions, 16); mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - assert_aligned(mNormals, 16); + ll_assert_aligned(mNormals, 16); //pad texture coordinate block end to allow for QWORD reads S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); - assert_aligned(mTexCoords, 16); + ll_assert_aligned(mTexCoords, 16); } else { @@ -7027,11 +7016,11 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat //allocate new buffer space mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a)); - assert_aligned(mPositions, 16); + ll_assert_aligned(mPositions, 16); mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a)); - assert_aligned(mNormals, 16); + ll_assert_aligned(mNormals, 16); mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); - assert_aligned(mTexCoords, 16); + ll_assert_aligned(mTexCoords, 16); mNumVertices = new_count; diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp old mode 100644 new mode 100755 index 3e16ccf3dacbf5a9f67c4e31b586af7896e0f4a7..a8b5ee47d1a0194cf02ecd334dc17b03339cc6ef --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -546,6 +546,7 @@ void LLSpatialGroup::setVisible() void LLSpatialGroup::validate() { + ll_assert_aligned(this,64); #if LL_OCTREE_PARANOIA_CHECK sg_assert(!isState(DIRTY)); @@ -1195,6 +1196,8 @@ LLSpatialGroup::LLSpatialGroup(OctreeNode* node, LLSpatialPartition* part) : mCurUpdatingSlotp(NULL), mCurUpdatingTexture (NULL) { + ll_assert_aligned(this,64); + sNodeCount++; LLMemType mt(LLMemType::MTYPE_SPACE_PARTITION);