diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 24528a8ce9e3b18bfa7c243cf95c8ff1b65971ce..c73f0e2755ff18ca86c5123f8c4a22bdf39ff9be 100755
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -96,6 +96,15 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1;
 
 extern BOOL gDebugGL;
 
+void assert_aligned(void* ptr, U32 alignment)
+{
+	U32 t = (U32) ptr;
+	if (t%alignment != 0)
+	{
+		llerrs << "WTF?" << llendl;
+	}
+}
+
 BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm)
 {    
 	LLVector3 test = (pt2-pt1)%(pt3-pt2);
@@ -1990,7 +1999,7 @@ void LLVolumeFace::VertexData::init()
 {
 	if (!mData)
 	{
-		mData = new LLVector4a[2];
+		mData = (LLVector4a*) malloc(sizeof(LLVector4a)*2);
 	}
 }
 
@@ -2011,7 +2020,7 @@ const LLVolumeFace::VertexData& LLVolumeFace::VertexData::operator=(const LLVolu
 	if (this != &rhs)
 	{
 		init();
-		LLVector4a::memcpyNonAliased16((F32*) mData, (F32*) rhs.mData, 8*sizeof(F32));
+		LLVector4a::memcpyNonAliased16((F32*) mData, (F32*) rhs.mData, 2*sizeof(LLVector4a));
 		mTexCoord = rhs.mTexCoord;
 	}
 	return *this;
@@ -2019,7 +2028,8 @@ const LLVolumeFace::VertexData& LLVolumeFace::VertexData::operator=(const LLVolu
 
 LLVolumeFace::VertexData::~VertexData()
 {
-	delete [] mData;
+	free(mData);
+	mData = NULL;
 }
 
 LLVector4a& LLVolumeFace::VertexData::getPosition()
@@ -5230,7 +5240,7 @@ LLVolumeFace::LLVolumeFace() :
 	mWeights(NULL),
 	mOctree(NULL)
 {
-	mExtents = new LLVector4a[3];
+	mExtents = (LLVector4a*) malloc(sizeof(LLVector4a)*3);
 	mCenter = mExtents+2;
 }
 
@@ -5251,7 +5261,7 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src)
 	mWeights(NULL),
 	mOctree(NULL)
 { 
-	mExtents = new LLVector4a[3];
+	mExtents = (LLVector4a*) malloc(sizeof(LLVector4a)*3);
 	mCenter = mExtents+2;
 	*this = src;
 }
@@ -5279,7 +5289,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
 
 	freeData();
 	
-	LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 12*sizeof(F32));
+	LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 3*sizeof(LLVector4a));
 
 	resizeVertices(src.mNumVertices);
 	resizeIndices(src.mNumIndices);
@@ -5287,7 +5297,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
 	if (mNumVertices)
 	{
 		S32 vert_size = mNumVertices*sizeof(LLVector4a);
-		S32 tc_size = (mNumVertices*8+0xF) & ~0xF;
+		S32 tc_size = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF;
 			
 		LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) src.mPositions, vert_size);
 		LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size);
@@ -5301,7 +5311,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
 		}
 		else
 		{
-			delete [] mBinormals;
+			free(mBinormals);
 			mBinormals = NULL;
 		}
 
@@ -5312,14 +5322,14 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
 		}
 		else
 		{
-			delete [] mWeights;
+			free(mWeights);
 			mWeights = NULL;
 		}
 	}
 
 	if (mNumIndices)
 	{
-		S32 idx_size = (mNumIndices*2+0xF) & ~0xF;
+		S32 idx_size = (mNumIndices*sizeof(U16)+0xF) & ~0xF;
 		
 		LLVector4a::memcpyNonAliased16((F32*) mIndices, (F32*) src.mIndices, idx_size);
 	}
@@ -5330,7 +5340,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
 
 LLVolumeFace::~LLVolumeFace()
 {
-	delete [] mExtents;
+	free(mExtents);
 	mExtents = NULL;
 
 	freeData();
@@ -5338,17 +5348,17 @@ LLVolumeFace::~LLVolumeFace()
 
 void LLVolumeFace::freeData()
 {
-	delete [] mPositions;
+	free(mPositions);
 	mPositions = NULL;
-	delete []  mNormals;
+	free( mNormals);
 	mNormals = NULL;
-	delete [] mTexCoords;
+	free(mTexCoords);
 	mTexCoords = NULL;
-	delete [] mIndices;
+	free(mIndices);
 	mIndices = NULL;
-	delete [] mBinormals;
+	free(mBinormals);
 	mBinormals = NULL;
-	delete [] mWeights;
+	free(mWeights);
 	mWeights = NULL;
 
 	delete mOctree;
@@ -5402,13 +5412,14 @@ bool LLVolumeFace::VertexMapData::ComparePosition::operator()(const LLVector3& a
 		return a.mV[1] < b.mV[1];
 	}
 	
-	return a.mV[2] < b.mV[2];			
+	return a.mV[2] < b.mV[2];
 }
 
 void LLVolumeFace::optimize(F32 angle_cutoff)
 {
 	LLVolumeFace new_face;
 
+	//map of points to vector of vertices at that point
 	VertexMapData::PointMap point_map;
 
 	//remove redundant vertices
@@ -6161,21 +6172,24 @@ void LLVolumeFace::createBinormals()
 
 void LLVolumeFace::resizeVertices(S32 num_verts)
 {
-	delete [] mPositions;
-	delete [] mNormals;
-	delete [] mBinormals;
-	delete [] mTexCoords;
+	free(mPositions);
+	free(mNormals);
+	free(mBinormals);
+	free(mTexCoords);
 
 	mBinormals = NULL;
 
 	if (num_verts)
 	{
-		mPositions = new LLVector4a[num_verts]; 
-		mNormals = new LLVector4a[num_verts]; 
+		mPositions = (LLVector4a*) malloc(sizeof(LLVector4a)*num_verts);
+		assert_aligned(mPositions, 16);
+		mNormals = (LLVector4a*) malloc(sizeof(LLVector4a)*num_verts);
+		assert_aligned(mNormals, 16);
 
 		//pad texture coordinate block end to allow for QWORD reads
-		S32 size = ((num_verts*8) + 0xF) & ~0xF;
-		mTexCoords = new LLVector2[size/8];
+		S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF;
+		mTexCoords = (LLVector2*) malloc(size);
+		assert_aligned(mTexCoords, 16);
 	}
 	else
 	{
@@ -6199,39 +6213,18 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con
 	S32 old_size = mNumVertices*16;
 
 	//positions
-	LLVector4a* dst = new LLVector4a[new_verts];
-	if (mPositions)
-	{
-		LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mPositions, old_size);
-		delete [] mPositions;
-	}
-	mPositions = dst;
-
+	mPositions = (LLVector4a*) realloc(mPositions, new_size);
+	
 	//normals
-	dst = new LLVector4a[new_verts]; 
-	if (mNormals)
-	{
-		LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mNormals, old_size);
-		delete [] mNormals;
-	}
-	mNormals = dst;
-
+	mNormals = (LLVector4a*) realloc(mNormals, new_size);
+	
 	//tex coords
 	new_size = ((new_verts*8)+0xF) & ~0xF;
-	old_size = ((mNumVertices*8)+0xF) & ~0xF;
-
-	{
-		LLVector2* dst = new LLVector2[new_size/8]; 
-		if (mTexCoords)
-		{
-			LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mTexCoords, old_size);
-			delete [] mTexCoords;
-		}
-		mTexCoords = dst;
-	}
+	mTexCoords = (LLVector2*) realloc(mTexCoords, new_size);
+	
 
 	//just clear binormals
-	delete [] mBinormals;
+	free(mBinormals);
 	mBinormals = NULL;
 
 	mPositions[mNumVertices] = pos;
@@ -6243,26 +6236,26 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con
 
 void LLVolumeFace::allocateBinormals(S32 num_verts)
 {
-	delete [] mBinormals;
-	mBinormals = new LLVector4a[num_verts]; 
+	free(mBinormals);
+	mBinormals = (LLVector4a*) malloc(sizeof(LLVector4a)*num_verts);
 }
 
 void LLVolumeFace::allocateWeights(S32 num_verts)
 {
-	delete [] mWeights; 
-	mWeights = new LLVector4a[num_verts]; 
+	free(mWeights);
+	mWeights = (LLVector4a*) malloc(sizeof(LLVector4a)*num_verts);
 }
 
 void LLVolumeFace::resizeIndices(S32 num_indices)
 {
-	delete [] mIndices;
+	free(mIndices);
 	
 	if (num_indices)
 	{
 		//pad index block end to allow for QWORD reads
-		S32 size = ((num_indices*2) + 0xF) & ~0xF;
+		S32 size = ((num_indices*sizeof(U16)) + 0xF) & ~0xF;
 		
-		mIndices = new U16[size/2];
+		mIndices = (U16*) malloc(size);
 	}
 	else
 	{
@@ -6280,13 +6273,7 @@ void LLVolumeFace::pushIndex(const U16& idx)
 	S32 old_size = ((mNumIndices*2)+0xF) & ~0xF;
 	if (new_size != old_size)
 	{
-		U16* dst = new U16[new_size/2];
-		if (mIndices)
-		{
-			LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mIndices, old_size);
-			delete [] mIndices;
-		}
-		mIndices = dst;
+		mIndices = (U16*) realloc(mIndices, new_size);
 	}
 	
 	mIndices[mNumIndices++] = idx;
@@ -6327,28 +6314,13 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 	}
 
 	//allocate new buffer space
-	LLVector4a* new_pos = new LLVector4a[new_count];
-	LLVector4a* new_norm = new LLVector4a[new_count];
-	LLVector2* new_tc = new LLVector2[((new_count*8+0xF) & ~0xF)/8];
+	mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a));
+	assert_aligned(mPositions, 16);
+	mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a));
+	assert_aligned(mNormals, 16);
+	mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF);
+	assert_aligned(mTexCoords, 16);
 	
-
-	if (mNumVertices > 0)
-	{ //copy old buffers
-		LLVector4a::memcpyNonAliased16((F32*) new_pos, (F32*) mPositions, mNumVertices*4*sizeof(F32));
-		LLVector4a::memcpyNonAliased16((F32*) new_norm, (F32*) mNormals, mNumVertices*4*sizeof(F32));
-		LLVector4a::memcpyNonAliased16((F32*) new_tc, (F32*) mTexCoords, mNumVertices*2*sizeof(F32));
-	}
-
-	//free old buffer space
-	delete [] mPositions;
-	delete [] mNormals;
-	delete [] mTexCoords;
-	
-	//point to new buffers
-	mPositions = new_pos;
-	mNormals = new_norm;
-	mTexCoords = new_tc;
-
 	mNumVertices = new_count;
 
 	//get destination address of appended face
@@ -6393,19 +6365,8 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 	new_count = mNumIndices + face.mNumIndices;
 
 	//allocate new index buffer
-	U16* new_indices = new U16[((new_count*2+0xF) & ~0xF)/2];
-	if (mNumIndices > 0)
-	{ //copy old index buffer
-		S32 old_size = (mNumIndices*2+0xF) & ~0xF;
-		LLVector4a::memcpyNonAliased16((F32*) new_indices, (F32*) mIndices, old_size);
-	}
-
-	//free old index buffer
-	delete [] mIndices;
+	mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF);
 	
-	//point to new index buffer
-	mIndices = new_indices;
-
 	//get destination address into new index buffer
 	U16* dst_idx = mIndices+mNumIndices;
 	mNumIndices = new_count;
diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp
index e4b7cd80ce594c71ab44adb1d84401fa09827358..ae1799695641b784d13e80bb1157edffbe9633f0 100755
--- a/indra/llprimitive/llmodel.cpp
+++ b/indra/llprimitive/llmodel.cpp
@@ -120,7 +120,7 @@ void load_face_from_dom_inputs(LLVolumeFace& face, const domInputLocalOffset_Arr
 			{
 				LLVector4a* norm = (LLVector4a*) face.mNormals + (j-min_idx);
 				norm->set(n[j*3+0], n[j*3+1], n[j*3+2]);
-				norm->normalize3fast();
+				norm->normalize3();
 			}
 		}
 		else if (strcmp(COMMON_PROFILE_INPUT_TEXCOORD, inputs[j]->getSemantic()) == 0)
@@ -1041,6 +1041,12 @@ void LLModel::smoothNormals(F32 angle_cutoff)
 	{
 		LLVolumeFace& vol_face = mVolumeFaces[j];
 
+		if (vol_face.mNumIndices > 65535)
+		{
+			llwarns << "Too many vertices for normal generation to work." << llendl;
+			continue;
+		}
+
 		//create faceted copy of current face with no texture coordinates (step 1)
 		LLVolumeFace faceted;
 
@@ -1048,16 +1054,16 @@ void LLModel::smoothNormals(F32 angle_cutoff)
 		//LLVector4a* src_norm = (LLVector4a*) vol_face.mNormals;
 
 
-		//bake out triangles into temporary face, clearing normals and texture coordinates
+		faceted.resizeVertices(vol_face.mNumIndices);
+		faceted.resizeIndices(vol_face.mNumIndices);
+		//bake out triangles into temporary face, clearing texture coordinates
 		for (U32 i = 0; i < vol_face.mNumIndices; ++i)
 		{
 			U32 idx = vol_face.mIndices[i];
-			LLVolumeFace::VertexData v;
-			v.setPosition(src_pos[idx]); 
-			v.getNormal().clear();
-			v.mTexCoord.clear();
-			faceted.pushVertex(v);
-			faceted.pushIndex(i);
+		
+			faceted.mPositions[i] = src_pos[idx];
+			faceted.mTexCoords[i] = LLVector2(0,0);
+			faceted.mIndices[i] = i;
 		}
 
 		//generate normals for temporary face
@@ -1080,7 +1086,7 @@ void LLModel::smoothNormals(F32 angle_cutoff)
 			rhs.setSub(p2, p0);
 
 			n0.setCross3(lhs, rhs);
-			n0.normalize3fast();
+			n0.normalize3();
 			n1 = n0;
 			n2 = n0;
 		}
@@ -1126,7 +1132,7 @@ void LLModel::smoothNormals(F32 angle_cutoff)
 
 		for (U32 i = 0; i < faceted.mNumVertices; ++i)
 		{
-			faceted.mNormals[i].normalize3fast();
+			faceted.mNormals[i].normalize3();
 
 			LLVolumeFace::VertexMapData v;
 			v.setPosition(faceted.mPositions[i]);
@@ -1139,16 +1145,17 @@ void LLModel::smoothNormals(F32 angle_cutoff)
 		LLVolumeFace new_face;
 
 		//bake out triangles into new face
+		new_face.resizeIndices(vol_face.mNumIndices);
+		new_face.resizeVertices(vol_face.mNumIndices);
+		
 		for (U32 i = 0; i < vol_face.mNumIndices; ++i)
 		{
 			U32 idx = vol_face.mIndices[i];
 			LLVolumeFace::VertexData v;
-			v.setPosition(vol_face.mPositions[idx]);
-			v.setNormal(vol_face.mNormals[idx]);
-			v.mTexCoord = vol_face.mTexCoords[idx];
-
-			new_face.pushVertex(v);
-			new_face.pushIndex(i);
+			new_face.mPositions[i] = vol_face.mPositions[idx];
+			new_face.mNormals[i].clear();
+			new_face.mTexCoords[i] = vol_face.mTexCoords[idx];
+			new_face.mIndices[i] = i;
 		}
 
 		//generate normals for new face
@@ -1171,7 +1178,7 @@ void LLModel::smoothNormals(F32 angle_cutoff)
 			rhs.setSub(p2, p0);
 
 			n0.setCross3(lhs, rhs);
-			n0.normalize3fast();
+			n0.normalize3();
 			n1 = n0;
 			n2 = n0;
 		}
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 71620feed374f3bbb9d17fc9a0011d646b9fbb35..66beb5f003a0f454192bfa56d877303e4ae1dbec 100755
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -603,7 +603,7 @@ void LLVertexBuffer::createGLBuffer()
 	{
 		static int gl_buffer_idx = 0;
 		mGLBuffer = ++gl_buffer_idx;
-		mMappedData = (U8*) ll_aligned_malloc_16(size);
+		mMappedData = (U8*) malloc(size);
 	}
 }
 
@@ -637,7 +637,7 @@ void LLVertexBuffer::createGLIndices()
 	}
 	else
 	{
-		mMappedIndexData = (U8*) ll_aligned_malloc_16(size);
+		mMappedIndexData = (U8*) malloc(size);
 		static int gl_buffer_idx = 0;
 		mGLIndices = ++gl_buffer_idx;
 	}
@@ -658,7 +658,7 @@ void LLVertexBuffer::destroyGLBuffer()
 		}
 		else
 		{
-			ll_aligned_free_16(mMappedData);
+			free(mMappedData);
 			mMappedData = NULL;
 			mEmpty = TRUE;
 		}
@@ -685,7 +685,7 @@ void LLVertexBuffer::destroyGLIndices()
 		}
 		else
 		{
-			ll_aligned_free_16(mMappedIndexData);
+			free(mMappedIndexData);
 			mMappedIndexData = NULL;
 			mEmpty = TRUE;
 		}
@@ -818,8 +818,8 @@ void LLVertexBuffer::resizeBuffer(S32 newnverts, S32 newnindices)
 			{
 				if (!useVBOs())
 				{
-					ll_aligned_free_16(mMappedData);
-					mMappedData = (U8*) ll_aligned_malloc_16(newsize);
+					free(mMappedData);
+					mMappedData = (U8*) malloc(newsize);
 				}
 				mResized = TRUE;
 			}
@@ -839,8 +839,8 @@ void LLVertexBuffer::resizeBuffer(S32 newnverts, S32 newnindices)
 			{
 				if (!useVBOs())
 				{
-					ll_aligned_free_16(mMappedIndexData);
-					mMappedIndexData = (U8*) ll_aligned_malloc_16(new_index_size);
+					free(mMappedIndexData);
+					mMappedIndexData = (U8*) malloc(new_index_size);
 				}
 				mResized = TRUE;
 			}