From 1435a8b9e6203911d2ebe9e3ba217f8eb20e3140 Mon Sep 17 00:00:00 2001
From: "Brad Payne (Vir Linden)" <vir@lindenlab.com>
Date: Wed, 4 Jan 2012 15:21:23 -0500
Subject: [PATCH] SH-2789 WIP - stricter calling of memcpyNonAliased16

---
 indra/llcommon/llmemory.cpp             |  3 +++
 indra/llcommon/llmemory.h               | 12 ++++++------
 indra/llmath/llvector4a.cpp             |  3 ---
 indra/llmath/llvector4a.h               |  1 +
 indra/llmath/tests/alignment_test.cpp   |  6 ++++--
 indra/newview/llfloatermodelpreview.cpp |  3 ++-
 indra/newview/llviewerjointmesh.cpp     |  6 ++++--
 7 files changed, 20 insertions(+), 14 deletions(-)
 mode change 100644 => 100755 indra/llcommon/llmemory.cpp
 mode change 100644 => 100755 indra/newview/llviewerjointmesh.cpp

diff --git a/indra/llcommon/llmemory.cpp b/indra/llcommon/llmemory.cpp
old mode 100644
new mode 100755
index 22204e756ab..afaf3666687
--- a/indra/llcommon/llmemory.cpp
+++ b/indra/llcommon/llmemory.cpp
@@ -63,11 +63,14 @@ LLPrivateMemoryPoolManager::mem_allocation_info_t LLPrivateMemoryPoolManager::sM
 
 void ll_assert_aligned_func(uintptr_t ptr,U32 alignment)
 {
+#ifdef SHOW_ASSERT
+	// Redundant, place to set breakpoints.
 	if (ptr%alignment!=0)
 	{
 		llwarns << "alignment check failed" << llendl;
 	}
 	llassert(ptr%alignment==0);
+#endif
 }
 
 //static
diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h
index 3eaf700bf18..1b54b561072 100755
--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@@ -83,7 +83,7 @@ inline void ll_aligned_free_16(void *p)
 inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32().
 {
 #if defined(LL_WINDOWS)
-	return _mm_malloc(size, 32);
+	return _aligned_malloc(size, 32);
 #elif defined(LL_DARWIN)
 	return ll_aligned_malloc( size, 32 );
 #else
@@ -98,7 +98,7 @@ inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed wi
 inline void ll_aligned_free_32(void *p)
 {
 #if defined(LL_WINDOWS)
-	_mm_free(p);
+	_aligned_free(p);
 #elif defined(LL_DARWIN)
 	ll_aligned_free( p );
 #else
@@ -107,10 +107,12 @@ inline void ll_aligned_free_32(void *p)
 }
 
 #else // USE_TCMALLOC
-// ll_aligned_foo are noops now that we use tcmalloc everywhere (tcmalloc aligns automatically at appropriate intervals)
+// ll_aligned_foo are noops now that we use tcmalloc everywhere
+// (tcmalloc aligns automatically at appropriate intervals)
 #define ll_aligned_malloc( size, align ) malloc(size)
 #define ll_aligned_free( ptr ) free(ptr)
 #define ll_aligned_malloc_16 malloc
+#define ll_aligned_realloc_16 realloc
 #define ll_aligned_free_16 free
 #define ll_aligned_malloc_32 malloc
 #define ll_aligned_free_32 free
@@ -524,11 +526,9 @@ void  LLPrivateMemoryPoolTester::operator delete[](void* addr)
 
 // LLSingleton moved to llsingleton.h
 
-#define CHECK_ALIGNMENT
-
 LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment);
 
-#ifdef CHECK_ALIGNMENT
+#ifdef SHOW_ASSERT
 #define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast<uintptr_t>(ptr),((U32)alignment))
 #else
 #define ll_assert_aligned(ptr,alignment)
diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp
index 480ccf4ed99..6edeb0fefe5 100755
--- a/indra/llmath/llvector4a.cpp
+++ b/indra/llmath/llvector4a.cpp
@@ -41,8 +41,6 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F
 
 /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes)
 {
-//	memcpy((void*)dst,(const void*)src,bytes);
-#if 1
 	assert(src != NULL);
 	assert(dst != NULL);
 	assert(bytes > 0);
@@ -92,7 +90,6 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F
 		dst += 4;
 		src += 4;
 	}
-#endif
 }
 
 void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec )
diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h
index 9de0e667742..0526793d3a6 100755
--- a/indra/llmath/llvector4a.h
+++ b/indra/llmath/llvector4a.h
@@ -84,6 +84,7 @@ class LLVector4a
 	}
 
 	// Copy words 16-byte blocks from src to dst. Source and destination must not overlap. 
+	// Source and dest must be 16-byte aligned and size must be multiple of 16.
 	static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes);
 
 	////////////////////////////////////
diff --git a/indra/llmath/tests/alignment_test.cpp b/indra/llmath/tests/alignment_test.cpp
index 51a7051e694..dc9b41957d5 100755
--- a/indra/llmath/tests/alignment_test.cpp
+++ b/indra/llmath/tests/alignment_test.cpp
@@ -72,6 +72,10 @@ void alignment_test_object_t::test<1>()
 	{
 		align_ptr = ll_aligned_malloc_16(sizeof(MyVector4a));
 		ensure("ll_aligned_malloc_16 failed", is_aligned(align_ptr,16));
+
+		align_ptr = ll_aligned_realloc_16(2*sizeof(MyVector4a));
+		ensure("ll_aligned_realloc_16 failed", is_aligned(align_ptr,16));
+
 		ll_aligned_free_16(align_ptr);
 
 		align_ptr = ll_aligned_malloc_32(sizeof(MyVector4a));
@@ -84,8 +88,6 @@ void alignment_test_object_t::test<1>()
 template<> template<>
 void alignment_test_object_t::test<2>()
 {
-	ensure("LLAlignment reality is broken: ", (1==1));
-
 	MyVector4a vec1;
 	ensure("LLAlignment vec1 unaligned", is_aligned(&vec1,16));
 	
diff --git a/indra/newview/llfloatermodelpreview.cpp b/indra/newview/llfloatermodelpreview.cpp
index 64bdcccd9f0..8aa3b955786 100755
--- a/indra/newview/llfloatermodelpreview.cpp
+++ b/indra/newview/llfloatermodelpreview.cpp
@@ -4770,7 +4770,8 @@ void LLModelPreview::genBuffers(S32 lod, bool include_skin_weights)
 			if (vf.mTexCoords)
 			{
 				vb->getTexCoord0Strider(tc_strider);
-				LLVector4a::memcpyNonAliased16((F32*) tc_strider.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32));
+				S32 tex_size = (num_vertices*2*sizeof(F32)+0xF) & ~0xF;
+				LLVector4a::memcpyNonAliased16((F32*) tc_strider.get(), (F32*) vf.mTexCoords, tex_size);
 			}
 			
 			if (vf.mNormals)
diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
old mode 100644
new mode 100755
index 76f4e18c27c..d604687678d
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -731,8 +731,10 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 				F32* vw = (F32*) vertex_weightsp.get();
 				F32* cw = (F32*) clothing_weightsp.get();	
 
-				LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), num_verts*2*sizeof(F32));
-				LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), num_verts*sizeof(F32));	
+				S32 tc_size = (num_verts*2*sizeof(F32)+0xF) & ~0xF;
+				LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), tc_size);
+				S32 vw_size = (num_verts*sizeof(F32)+0xF) & ~0xF;	
+				LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), vw_size);	
 				LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4*sizeof(F32));	
 			}
 
-- 
GitLab