diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h
index c61d06e924c0a9ba4c3219093fd2cc9975292abc..3eaf700bf18d8c26e7991a6051d0f709bd3887b3 100755
--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@@ -46,7 +46,7 @@ inline void ll_aligned_free( void* ptr )
 inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
 {
 #if defined(LL_WINDOWS)
-	return _mm_malloc(size, 16);
+	return _aligned_malloc(size, 16);
 #elif defined(LL_DARWIN)
 	return malloc(size); // default osx malloc is 16 byte aligned.
 #else
@@ -58,10 +58,21 @@ inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed wi
 #endif
 }
 
+inline void* ll_aligned_realloc_16(void* ptr, size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
+{
+#if defined(LL_WINDOWS)
+	return _aligned_realloc(ptr, size, 16);
+#elif defined(LL_DARWIN)
+	return realloc(ptr,size); // default osx malloc is 16 byte aligned.
+#else
+	return realloc(ptr,size); // FIXME not guaranteed to be aligned.
+#endif
+}
+
 inline void ll_aligned_free_16(void *p)
 {
 #if defined(LL_WINDOWS)
-	_mm_free(p);
+	_aligned_free(p);
 #elif defined(LL_DARWIN)
 	return free(p);
 #else
diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp
index 7602ef0cb28c095938144b73c7bbdacb69a4e427..480ccf4ed995e2c0fc63cf0dcb22f11f4a129fe9 100755
--- a/indra/llmath/llvector4a.cpp
+++ b/indra/llmath/llvector4a.cpp
@@ -41,14 +41,15 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F
 
 /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes)
 {
-	memcpy((void*)dst,(const void*)src,bytes);
-#if 0
+//	memcpy((void*)dst,(const void*)src,bytes);
+#if 1
 	assert(src != NULL);
 	assert(dst != NULL);
 	assert(bytes > 0);
 	assert((bytes % sizeof(F32))== 0); 
 	ll_assert_aligned(src,16);
 	ll_assert_aligned(dst,16);
+	assert(bytes%16==0);
 
 	F32* end = dst + (bytes / sizeof(F32) );
 
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 113d4835bb76af06931b28c0a17f44aeaeb2762e..9499ca29ace4a7f19c04d2dab0eb6fddae84ee64 100755
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -6914,14 +6914,17 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con
 //	S32 old_size = mNumVertices*16;
 
 	//positions
-	mPositions = (LLVector4a*) realloc(mPositions, new_size);
+	mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size);
+	ll_assert_aligned(mPositions,16);
 	
 	//normals
-	mNormals = (LLVector4a*) realloc(mNormals, new_size);
-	
+	mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size);
+	ll_assert_aligned(mNormals,16);
+
 	//tex coords
 	new_size = ((new_verts*8)+0xF) & ~0xF;
-	mTexCoords = (LLVector2*) realloc(mTexCoords, new_size);
+	mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size);
+	ll_assert_aligned(mTexCoords,16);
 	
 
 	//just clear binormals
@@ -6974,7 +6977,8 @@ void LLVolumeFace::pushIndex(const U16& idx)
 	S32 old_size = ((mNumIndices*2)+0xF) & ~0xF;
 	if (new_size != old_size)
 	{
-		mIndices = (U16*) realloc(mIndices, new_size);
+		mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size);
+		ll_assert_aligned(mIndices,16);
 	}
 	
 	mIndices[mNumIndices++] = idx;
@@ -7015,11 +7019,11 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 	}
 
 	//allocate new buffer space
-	mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a));
+	mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a));
 	ll_assert_aligned(mPositions, 16);
-	mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a));
+	mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a));
 	ll_assert_aligned(mNormals, 16);
-	mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF);
+	mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF);
 	ll_assert_aligned(mTexCoords, 16);
 	
 	mNumVertices = new_count;
@@ -7066,7 +7070,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 	new_count = mNumIndices + face.mNumIndices;
 
 	//allocate new index buffer
-	mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF);
+	mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF);
 	
 	//get destination address into new index buffer
 	U16* dst_idx = mIndices+mNumIndices;
diff --git a/indra/newview/llpolymesh.cpp b/indra/newview/llpolymesh.cpp
old mode 100644
new mode 100755
index 450f9b2be7238a61f594dee75d7a1a3416f3a46e..0860506086153bc8482c345c5946d666781bde09
--- a/indra/newview/llpolymesh.cpp
+++ b/indra/newview/llpolymesh.cpp
@@ -129,22 +129,22 @@ void LLPolyMeshSharedData::freeMeshData()
         {
                 mNumVertices = 0;
 
-                delete [] mBaseCoords;
+                ll_aligned_free_16(mBaseCoords);
                 mBaseCoords = NULL;
 
-                delete [] mBaseNormals;
+                ll_aligned_free_16(mBaseNormals);
                 mBaseNormals = NULL;
 
-                delete [] mBaseBinormals;
+                ll_aligned_free_16(mBaseBinormals);
                 mBaseBinormals = NULL;
 
-                delete [] mTexCoords;
+                ll_aligned_free_16(mTexCoords);
                 mTexCoords = NULL;
 
-                delete [] mDetailTexCoords;
+                ll_aligned_free_16(mDetailTexCoords);
                 mDetailTexCoords = NULL;
 
-                delete [] mWeights;
+                ll_aligned_free_16(mWeights);
                 mWeights = NULL;
         }
 
@@ -229,12 +229,12 @@ U32 LLPolyMeshSharedData::getNumKB()
 BOOL LLPolyMeshSharedData::allocateVertexData( U32 numVertices )
 {
         U32 i;
-        mBaseCoords = new LLVector3[ numVertices ];
-        mBaseNormals = new LLVector3[ numVertices ];
-        mBaseBinormals = new LLVector3[ numVertices ];
-        mTexCoords = new LLVector2[ numVertices ];
-        mDetailTexCoords = new LLVector2[ numVertices ];
-        mWeights = new F32[ numVertices ];
+        mBaseCoords = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3));
+        mBaseNormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3));
+        mBaseBinormals = (LLVector3*) ll_aligned_malloc_16(numVertices*sizeof(LLVector3));
+        mTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2));
+        mDetailTexCoords = (LLVector2*) ll_aligned_malloc_16(numVertices*sizeof(LLVector2));
+        mWeights = (F32*) ll_aligned_malloc_16(numVertices*sizeof(F32));
         for (i = 0; i < numVertices; i++)
         {
                 mWeights[i] = 0.f;