diff --git a/indra/cmake/GooglePerfTools.cmake b/indra/cmake/GooglePerfTools.cmake
index 09501e0406e637ceea1e1c92d10291504e9fe2da..73b3642ae646c3e5ff493157e92d32b9fcb7c9fb 100644
--- a/indra/cmake/GooglePerfTools.cmake
+++ b/indra/cmake/GooglePerfTools.cmake
@@ -3,7 +3,7 @@ include(Prebuilt)
 
 # If you want to enable or disable TCMALLOC in viewer builds, this is the place.
 # set ON or OFF as desired.
-set (USE_TCMALLOC ON)
+set (USE_TCMALLOC OFF)
 
 if (STANDALONE)
   include(FindGooglePerfTools)
diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h
index 9dd776ff57922271068e38ebd2ac82cafd537f23..f37e8459ffb92168240459c228a49f60e61f4edd 100644
--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@@ -58,27 +58,35 @@ inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed wi
 #endif
 }
 
-inline void* ll_aligned_realloc_16(void* ptr, size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
+inline void ll_aligned_free_16(void *p)
 {
 #if defined(LL_WINDOWS)
-	return _aligned_realloc(ptr, size, 16);
+	_aligned_free(p);
 #elif defined(LL_DARWIN)
-	return realloc(ptr,size); // default osx malloc is 16 byte aligned.
+	return free(p);
 #else
-	return realloc(ptr,size); // FIXME not guaranteed to be aligned.
+	free(p); // posix_memalign() is compatible with heap deallocator
 #endif
 }
 
-inline void ll_aligned_free_16(void *p)
+inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // returned hunk MUST be freed with ll_aligned_free_16().
 {
 #if defined(LL_WINDOWS)
-	_aligned_free(p);
+	return _aligned_realloc(ptr, size, 16);
 #elif defined(LL_DARWIN)
-	return free(p);
+	return realloc(ptr,size); // default osx malloc is 16 byte aligned.
 #else
-	free(p); // posix_memalign() is compatible with heap deallocator
+	//FIXME: memcpy is SLOW
+	void* ret = ll_aligned_malloc_16(size);
+	if (ptr)
+	{
+		memcpy(ret, ptr, old_size);
+		ll_aligned_free_16(ptr);
+	}
+	return ret;
 #endif
 }
+
 #else // USE_TCMALLOC
 // ll_aligned_foo_16 are not needed with tcmalloc
 #define ll_aligned_malloc_16 malloc
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 11fa7080ced9dd02ed4c0165bfdb9d3b54a17a36..ea09a3afe7379c93b8ec4a622793d1bc87ac4e29 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -6979,19 +6979,20 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con
 {
 	S32 new_verts = mNumVertices+1;
 	S32 new_size = new_verts*16;
-//	S32 old_size = mNumVertices*16;
+	S32 old_size = mNumVertices*16;
 
 	//positions
-	mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size);
+	mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size, old_size);
 	ll_assert_aligned(mPositions,16);
 	
 	//normals
-	mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size);
+	mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size, old_size);
 	ll_assert_aligned(mNormals,16);
 
 	//tex coords
 	new_size = ((new_verts*8)+0xF) & ~0xF;
-	mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size);
+	old_size = ((mNumVertices*8)+0xF) & ~0xF;
+	mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size, old_size);
 	ll_assert_aligned(mTexCoords,16);
 	
 
@@ -7045,7 +7046,7 @@ void LLVolumeFace::pushIndex(const U16& idx)
 	S32 old_size = ((mNumIndices*2)+0xF) & ~0xF;
 	if (new_size != old_size)
 	{
-		mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size);
+		mIndices = (U16*) ll_aligned_realloc_16(mIndices, new_size, old_size);
 		ll_assert_aligned(mIndices,16);
 	}
 	
@@ -7087,11 +7088,11 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 	}
 
 	//allocate new buffer space
-	mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a));
+	mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a));
 	ll_assert_aligned(mPositions, 16);
-	mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a));
+	mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a));
 	ll_assert_aligned(mNormals, 16);
-	mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF);
+	mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF, (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF);
 	ll_assert_aligned(mTexCoords, 16);
 	
 	mNumVertices = new_count;
@@ -7138,7 +7139,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 	new_count = mNumIndices + face.mNumIndices;
 
 	//allocate new index buffer
-	mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF);
+	mIndices = (U16*) ll_aligned_realloc_16(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF, (mNumIndices*sizeof(U16)+0xF) & ~0xF);
 	
 	//get destination address into new index buffer
 	U16* dst_idx = mIndices+mNumIndices;
diff --git a/indra/llmath/tests/alignment_test.cpp b/indra/llmath/tests/alignment_test.cpp
index ac0c45ae6f828d535dfdc791e9560274665bab6e..5c426c57c35c26c9c7457c9693701778b72a9c78 100644
--- a/indra/llmath/tests/alignment_test.cpp
+++ b/indra/llmath/tests/alignment_test.cpp
@@ -78,7 +78,7 @@ void alignment_test_object_t::test<1>()
 		align_ptr = ll_aligned_malloc_16(sizeof(MyVector4a));
 		ensure("ll_aligned_malloc_16 failed", is_aligned(align_ptr,16));
 
-		align_ptr = ll_aligned_realloc_16(align_ptr,2*sizeof(MyVector4a));
+		align_ptr = ll_aligned_realloc_16(align_ptr,2*sizeof(MyVector4a), sizeof(MyVector4a));
 		ensure("ll_aligned_realloc_16 failed", is_aligned(align_ptr,16));
 
 		ll_aligned_free_16(align_ptr);