diff --git a/indra/llappearance/llavatarjoint.cpp b/indra/llappearance/llavatarjoint.cpp
index fb34a6c32933e2a4ed58f061663d43a17abc3b68..4820c1cd50a78420a2cc313344a78e872dae7752 100644
--- a/indra/llappearance/llavatarjoint.cpp
+++ b/indra/llappearance/llavatarjoint.cpp
@@ -249,7 +249,7 @@ void LLAvatarJointCollisionVolume::renderCollision()
 	updateWorldMatrix();
 	
 	gGL.pushMatrix();
-	gGL.multMatrix( &mXform.getWorldMatrix().mMatrix[0][0] );
+	gGL.multMatrix(mXform.getWorldMatrix());
 
 	gGL.diffuseColor3f( 0.f, 0.f, 1.f );
 	
diff --git a/indra/llappearance/llpolymesh.h b/indra/llappearance/llpolymesh.h
index 048536e21ea2d309d44e6f942ede92cba98378ef..8f91d1585cacb49826bcddbf6655c2f20ef8a98c 100644
--- a/indra/llappearance/llpolymesh.h
+++ b/indra/llappearance/llpolymesh.h
@@ -145,10 +145,10 @@ class LLPolyMeshSharedData
 class LLJointRenderData
 {
 public:
-	LLJointRenderData(const LLMatrix4* world_matrix, LLSkinJoint* skin_joint) : mWorldMatrix(world_matrix), mSkinJoint(skin_joint) {}
+	LLJointRenderData(const LLMatrix4a* world_matrix, LLSkinJoint* skin_joint) : mWorldMatrix(world_matrix), mSkinJoint(skin_joint) {}
 	~LLJointRenderData() = default;
 
-	const LLMatrix4*		mWorldMatrix;
+	const LLMatrix4a*		mWorldMatrix;
 	LLSkinJoint*			mSkinJoint;
 };
 
diff --git a/indra/llcharacter/lljoint.cpp b/indra/llcharacter/lljoint.cpp
index bdfcb056db8ad921927a2663d34ec2251c960308..6d9ef01acf3b92ba95a0dc249b3bb589f0228bd9 100644
--- a/indra/llcharacter/lljoint.cpp
+++ b/indra/llcharacter/lljoint.cpp
@@ -812,19 +812,15 @@ void LLJoint::setWorldPosition( const LLVector3& pos )
 		return;
 	}
 
-	LLMatrix4 temp_matrix = getWorldMatrix();
-	temp_matrix.mMatrix[VW][VX] = pos.mV[VX];
-	temp_matrix.mMatrix[VW][VY] = pos.mV[VY];
-	temp_matrix.mMatrix[VW][VZ] = pos.mV[VZ];
+	LLMatrix4a temp_matrix = getWorldMatrix();
+	temp_matrix.setTranslate_affine(pos);
 
-	LLMatrix4 parentWorldMatrix = mParent->getWorldMatrix();
-	LLMatrix4 invParentWorldMatrix = parentWorldMatrix.invert();
+	LLMatrix4a invParentWorldMatrix = mParent->getWorldMatrix();
+	invParentWorldMatrix.invert();
 
-	temp_matrix *= invParentWorldMatrix;
+	invParentWorldMatrix.mul(temp_matrix);
 
-	LLVector3 localPos(	temp_matrix.mMatrix[VW][VX],
-						temp_matrix.mMatrix[VW][VY],
-						temp_matrix.mMatrix[VW][VZ] );
+	LLVector3 localPos(	invParentWorldMatrix.getRow<LLMatrix4a::ROW_TRANS>().getF32ptr() );
 
 	setPosition( localPos );
 }
@@ -883,19 +879,19 @@ void LLJoint::setWorldRotation( const LLQuaternion& rot )
 		this->setRotation( rot );
 		return;
 	}
+	
+	LLMatrix4a parentWorldMatrix = mParent->getWorldMatrix();
+	LLQuaternion2 rota(rot);
+	LLMatrix4a temp_mat(rota);
 
-	LLMatrix4 temp_mat(rot);
-
-	LLMatrix4 parentWorldMatrix = mParent->getWorldMatrix();
-	parentWorldMatrix.mMatrix[VW][VX] = 0;
-	parentWorldMatrix.mMatrix[VW][VY] = 0;
-	parentWorldMatrix.mMatrix[VW][VZ] = 0;
+	LLMatrix4a invParentWorldMatrix = mParent->getWorldMatrix();
+	invParentWorldMatrix.setTranslate_affine(LLVector3(0.f));
 
-	LLMatrix4 invParentWorldMatrix = parentWorldMatrix.invert();
+	invParentWorldMatrix.invert();
 
-	temp_mat *= invParentWorldMatrix;
+	invParentWorldMatrix.mul(temp_mat);
 
-	setRotation(LLQuaternion(temp_mat));
+	setRotation(LLQuaternion(LLMatrix4(invParentWorldMatrix.getF32ptr())));
 }
 
 
@@ -948,7 +944,7 @@ void LLJoint::setScale( const LLVector3& requested_scale, bool apply_attachment_
 //--------------------------------------------------------------------
 // getWorldMatrix()
 //--------------------------------------------------------------------
-const LLMatrix4 &LLJoint::getWorldMatrix()
+const LLMatrix4a &LLJoint::getWorldMatrix()
 {
 	updateWorldMatrixParent();
 
diff --git a/indra/llcharacter/lljoint.h b/indra/llcharacter/lljoint.h
index 9755d796784804e2bef89dd34ae401926e86e297..5497b13cfcc296c34e4b50a1c410ab6b3856a86c 100644
--- a/indra/llcharacter/lljoint.h
+++ b/indra/llcharacter/lljoint.h
@@ -36,6 +36,7 @@
 #include "v3math.h"
 #include "v4math.h"
 #include "m4math.h"
+#include "llmatrix4a.h"
 #include "llquaternion.h"
 #include "xform.h"
 
@@ -262,7 +263,7 @@ class LLJoint
 	void setScale( const LLVector3& scale, bool apply_attachment_overrides = false );
 
 	// get/set world matrix
-	const LLMatrix4 &getWorldMatrix();
+	const LLMatrix4a& getWorldMatrix();
 	void setWorldMatrix( const LLMatrix4& mat );
 
 	void updateWorldMatrixChildren();
diff --git a/indra/llcharacter/lljointsolverrp3.cpp b/indra/llcharacter/lljointsolverrp3.cpp
index 0da3fcbabe454e9d86d7950cb3c675ed3a8e1f2f..083839c3ce3d2b4b61abf0c85aa89ffa32088c56 100644
--- a/indra/llcharacter/lljointsolverrp3.cpp
+++ b/indra/llcharacter/lljointsolverrp3.cpp
@@ -157,12 +157,14 @@ void LLJointSolverRP3::solve()
 	//-------------------------------------------------------------------------
 	// get the poleVector in world space
 	//-------------------------------------------------------------------------
-	LLMatrix4 worldJointAParentMat;
+	LLVector3 poleVec = mPoleVector;
 	if ( mJointA->getParent() )
 	{
-		worldJointAParentMat = mJointA->getParent()->getWorldMatrix();
+		LLVector4a pole_veca;
+		pole_veca.load3(mPoleVector.mV);
+		mJointA->getParent()->getWorldMatrix().rotate(pole_veca,pole_veca);
+		poleVec.set(pole_veca.getF32ptr());
 	}
-	LLVector3 poleVec = rotate_vector( mPoleVector, worldJointAParentMat );
 
 	//-------------------------------------------------------------------------
 	// compute the following:
diff --git a/indra/llcharacter/llkeyframestandmotion.cpp b/indra/llcharacter/llkeyframestandmotion.cpp
index ddccdf98124adc50404ad2ed78c4274649406ed7..7f400972b07a829b2b822d2f938d8ea4fa6187fe 100644
--- a/indra/llcharacter/llkeyframestandmotion.cpp
+++ b/indra/llcharacter/llkeyframestandmotion.cpp
@@ -275,40 +275,38 @@ BOOL LLKeyframeStandMotion::onUpdate(F32 time, U8* joint_mask)
 	//-------------------------------------------------------------------------
 	if ( mTrackAnkles )
 	{
-		LLVector4 dirLeft4 = mAnkleLeftJoint.getWorldMatrix().getFwdRow4();
-		LLVector4 dirRight4 = mAnkleRightJoint.getWorldMatrix().getFwdRow4();
-		LLVector3 dirLeft = vec4to3( dirLeft4 );
-		LLVector3 dirRight = vec4to3( dirRight4 );
+		const LLVector4a& dirLeft4 = mAnkleLeftJoint.getWorldMatrix().getRow<LLMatrix4a::ROW_FWD>();
+		const LLVector4a& dirRight4 = mAnkleRightJoint.getWorldMatrix().getRow<LLMatrix4a::ROW_FWD>();
 
-		LLVector3 up;
-		LLVector3 dir;
-		LLVector3 left;
+		LLVector4a up;
+		LLVector4a dir;
+		LLVector4a left;
 
-		up = mNormalLeft;
-		up.normVec();
+		up.load3(mNormalLeft.mV);
+		up.normalize3fast();
 		if (mFlipFeet)
 		{
-			up *= -1.0f;
+			up.negate();
 		}
-		dir = dirLeft;
-		dir.normVec();
-		left = up % dir;
-		left.normVec();
-		dir = left % up;
-		mRotationLeft = LLQuaternion( dir, left, up );
-
-		up = mNormalRight;
-		up.normVec();
+		dir = dirLeft4;
+		dir.normalize3fast();
+		left.setCross3(up,dir);
+		left.normalize3fast();
+		dir.setCross3(left,up);
+		mRotationLeft = LLQuaternion( LLVector3(dir.getF32ptr()), LLVector3(left.getF32ptr()), LLVector3(up.getF32ptr()));
+
+		up.load3(mNormalRight.mV);
+		up.normalize3fast();
 		if (mFlipFeet)
 		{
-			up *= -1.0f;
+			up.negate();
 		}
-		dir = dirRight;
-		dir.normVec();
-		left = up % dir;
-		left.normVec();
-		dir = left % up;
-		mRotationRight = LLQuaternion( dir, left, up );
+		dir = dirRight4;
+		dir.normalize3fast();
+		left.setCross3(up,dir);
+		left.normalize3fast();
+		dir.setCross3(left,up);
+		mRotationRight = LLQuaternion( LLVector3(dir.getF32ptr()), LLVector3(left.getF32ptr()), LLVector3(up.getF32ptr()));
 	}
 	mAnkleLeftJoint.setWorldRotation( mRotationLeft );
 	mAnkleRightJoint.setWorldRotation( mRotationRight );
diff --git a/indra/llmath/xform.cpp b/indra/llmath/xform.cpp
index 1ab177abb31e7742065377338093917ad50c7318..f96c88794a2245be5c677957b2abe661144b5add 100644
--- a/indra/llmath/xform.cpp
+++ b/indra/llmath/xform.cpp
@@ -82,30 +82,29 @@ void LLXformMatrix::updateMatrix(BOOL update_bounds)
 {
 	update();
 
-	mWorldMatrix.initAll(mScale, mWorldRotation, mWorldPosition);
+	LLMatrix4 world_matrix;
+	world_matrix.initAll(mScale, mWorldRotation, mWorldPosition);
+	mWorldMatrix.loadu(world_matrix);
 
 	if (update_bounds && (mChanged & MOVED))
 	{
-		mMin.mV[0] = mMax.mV[0] = mWorldMatrix.mMatrix[3][0];
-		mMin.mV[1] = mMax.mV[1] = mWorldMatrix.mMatrix[3][1];
-		mMin.mV[2] = mMax.mV[2] = mWorldMatrix.mMatrix[3][2];
+		mMax = mMin = mWorldMatrix.getRow<3>();
 
-		F32 f0 = (fabs(mWorldMatrix.mMatrix[0][0])+fabs(mWorldMatrix.mMatrix[1][0])+fabs(mWorldMatrix.mMatrix[2][0])) * 0.5f;
-		F32 f1 = (fabs(mWorldMatrix.mMatrix[0][1])+fabs(mWorldMatrix.mMatrix[1][1])+fabs(mWorldMatrix.mMatrix[2][1])) * 0.5f;
-		F32 f2 = (fabs(mWorldMatrix.mMatrix[0][2])+fabs(mWorldMatrix.mMatrix[1][2])+fabs(mWorldMatrix.mMatrix[2][2])) * 0.5f;
+		LLVector4a total_sum,sum1,sum2;
+		total_sum.setAbs(mWorldMatrix.getRow<0>());
+		sum1.setAbs(mWorldMatrix.getRow<1>());
+		sum2.setAbs(mWorldMatrix.getRow<2>());
+		sum1.add(sum2);
+		total_sum.add(sum1);
+		total_sum.mul(.5f);
 
-		mMin.mV[0] -= f0; 
-		mMin.mV[1] -= f1; 
-		mMin.mV[2] -= f2; 
-
-		mMax.mV[0] += f0; 
-		mMax.mV[1] += f1; 
-		mMax.mV[2] += f2; 
+		mMax.add(total_sum);
+		mMin.sub(total_sum);
 	}
 }
 
 void LLXformMatrix::getMinMax(LLVector3& min, LLVector3& max) const
 {
-	min = mMin;
-	max = mMax;
+	min.set(mMin.getF32ptr());
+	max.set(mMax.getF32ptr());
 }
diff --git a/indra/llmath/xform.h b/indra/llmath/xform.h
index 50d1d37a9aa08eed717b23d195f47f1335a4e2c0..03ff645cda2763b7b0a180e76e9048c33dad84c1 100644
--- a/indra/llmath/xform.h
+++ b/indra/llmath/xform.h
@@ -28,6 +28,7 @@
 
 #include "v3math.h"
 #include "m4math.h"
+#include "llmatrix4a.h"
 #include "llquaternion.h"
 
 const F32 MAX_OBJECT_Z 		= 4096.f; // should match REGION_HEIGHT_METERS, Pre-havok4: 768.f
@@ -130,20 +131,21 @@ class LLXform
 	const LLVector3&	getWorldPosition() const	{ return mWorldPosition; }
 };
 
+LL_ALIGN_PREFIX(16)
 class LLXformMatrix : public LLXform
 {
 public:
 	LLXformMatrix() : LLXform() {};
 	virtual ~LLXformMatrix() = default;
 
-	const LLMatrix4&    getWorldMatrix() const      { return mWorldMatrix; }
-	void setWorldMatrix (const LLMatrix4& mat)   { mWorldMatrix = mat; }
+	const LLMatrix4a&    getWorldMatrix() const      { return mWorldMatrix; }
+	void setWorldMatrix (const LLMatrix4a& mat)   { mWorldMatrix = mat; }
 
 	void init()
 	{
 		mWorldMatrix.setIdentity();
-		mMin.clearVec();
-		mMax.clearVec();
+		mMin.clear();
+		mMax.clear();
 
 		LLXform::init();
 	}
@@ -153,11 +155,11 @@ class LLXformMatrix : public LLXform
 	void getMinMax(LLVector3& min,LLVector3& max) const;
 
 protected:
-	LLMatrix4	mWorldMatrix;
-	LLVector3	mMin;
-	LLVector3	mMax;
+	LL_ALIGN_16(LLMatrix4a	mWorldMatrix);
+	LL_ALIGN_16(LLVector4a	mMin);
+	LL_ALIGN_16(LLVector4a	mMax);
 
-};
+} LL_ALIGN_POSTFIX(16);
 
 BOOL LLXform::setParent(LLXform* parent)
 {
diff --git a/indra/newview/llagentcamera.cpp b/indra/newview/llagentcamera.cpp
index bf56c29724d140972b63b7eebefaa93caa6ad73f..8a383452351cc77c0f3a40f54919e07a4e2dda7d 100644
--- a/indra/newview/llagentcamera.cpp
+++ b/indra/newview/llagentcamera.cpp
@@ -420,7 +420,7 @@ void LLAgentCamera::slamLookAt(const LLVector3 &look_at)
 //-----------------------------------------------------------------------------
 LLVector3 LLAgentCamera::calcFocusOffset(LLViewerObject *object, LLVector3 original_focus_point, S32 x, S32 y)
 {
-	const LLMatrix4& obj_matrix = object->getRenderMatrix();
+	const LLMatrix4a& obj_matrix = object->getRenderMatrix();
 	const LLQuaternion obj_rot = object->getRenderRotation();
 	const LLVector3 obj_pos = object->getRenderPosition();
 
@@ -455,24 +455,24 @@ LLVector3 LLAgentCamera::calcFocusOffset(LLViewerObject *object, LLVector3 origi
 
 	// find the largest ratio stored in obj_to_cam_ray_proportions
 	// this corresponds to the object's local axial plane (XY, YZ, XZ) that is *most* facing the camera
-	LLVector3 longest_object_axis;
+	LLVector4a focus_plane_normal;
 	// is x-axis longest?
 	if (obj_to_cam_ray_proportions.mV[VX] > obj_to_cam_ray_proportions.mV[VY] 
 		&& obj_to_cam_ray_proportions.mV[VX] > obj_to_cam_ray_proportions.mV[VZ])
 	{
 		// then grab it
-		longest_object_axis.setVec(obj_matrix.getFwdRow4());
+		focus_plane_normal = obj_matrix.getRow<LLMatrix4a::ROW_FWD>();
 	}
 	// is y-axis longest?
 	else if (obj_to_cam_ray_proportions.mV[VY] > obj_to_cam_ray_proportions.mV[VZ])
 	{
 		// then grab it
-		longest_object_axis.setVec(obj_matrix.getLeftRow4());
+		focus_plane_normal = obj_matrix.getRow<LLMatrix4a::ROW_LEFT>();
 	}
 	// otherwise, use z axis
 	else
 	{
-		longest_object_axis.setVec(obj_matrix.getUpRow4());
+		focus_plane_normal = obj_matrix.getRow<LLMatrix4a::ROW_UP>();
 	}
 
 	// Use this axis as the normal to project mouse click on to plane with that normal, at the object center.
@@ -481,11 +481,10 @@ LLVector3 LLAgentCamera::calcFocusOffset(LLViewerObject *object, LLVector3 origi
 	// We do this to allow the camera rotation tool to "tumble" the object by rotating the camera.
 	// If the focus point were the object surface under the mouse, camera rotation would introduce an undesirable
 	// eccentricity to the object orientation
-	LLVector3 focus_plane_normal(longest_object_axis);
-	focus_plane_normal.normalize();
+	focus_plane_normal.normalize3fast();
 
 	LLVector3d focus_pt_global;
-	gViewerWindow->mousePointOnPlaneGlobal(focus_pt_global, x, y, gAgent.getPosGlobalFromAgent(obj_pos), focus_plane_normal);
+	gViewerWindow->mousePointOnPlaneGlobal(focus_pt_global, x, y, gAgent.getPosGlobalFromAgent(obj_pos), LLVector3(focus_plane_normal.getF32ptr()));
 	LLVector3 focus_pt = gAgent.getPosAgentFromGlobal(focus_pt_global);
 
 	// find vector from camera to focus point in object space
@@ -1865,7 +1864,7 @@ LLVector3d LLAgentCamera::calcCameraPositionTargetGlobal(BOOL *hit_limit)
 			head_offset.mdV[VX] = gAgentAvatarp->mHeadOffset.mV[VX];
 			head_offset.mdV[VY] = gAgentAvatarp->mHeadOffset.mV[VY];
 			head_offset.mdV[VZ] = gAgentAvatarp->mHeadOffset.mV[VZ] + 0.1f;
-			const LLMatrix4& mat = ((LLViewerObject*) gAgentAvatarp->getParent())->getRenderMatrix();
+			const LLMatrix4 mat(((LLViewerObject*) gAgentAvatarp->getParent())->getRenderMatrix().getF32ptr());
 			camera_position_global = gAgent.getPosGlobalFromAgent
 								((gAgentAvatarp->getPosition()+
 								 LLVector3(head_offset)*gAgentAvatarp->getRotation()) * mat);
diff --git a/indra/newview/lldrawable.cpp b/indra/newview/lldrawable.cpp
index 94b349bee99196c79e58f6aa247f2ca581084678..36473601ce39634471eb535d92b2754b06403bfc 100644
--- a/indra/newview/lldrawable.cpp
+++ b/indra/newview/lldrawable.cpp
@@ -243,7 +243,7 @@ LLVOVolume* LLDrawable::getVOVolume() const
 	}
 }
 
-const LLMatrix4& LLDrawable::getRenderMatrix() const
+const LLMatrix4a& LLDrawable::getRenderMatrix() const
 { 
 	return isRoot() ? getWorldMatrix() : getParent()->getWorldMatrix();
 }
@@ -1304,9 +1304,7 @@ void LLSpatialBridge::updateSpatialExtents()
 	LLVector4a offset;
 	LLVector4a size = root_bounds[1];
 		
-	//VECTORIZE THIS
-	LLMatrix4a mat;
-	mat.loadu(mDrawable->getXform()->getWorldMatrix());
+	const LLMatrix4a& mat = mDrawable->getXform()->getWorldMatrix();
 
 	LLVector4a t;
 	t.splat(0.f);
@@ -1372,27 +1370,35 @@ LLCamera LLSpatialBridge::transformCamera(LLCamera& camera)
 {
 	LLCamera ret = camera;
 	LLXformMatrix* mat = mDrawable->getXform();
-	LLVector3 center = LLVector3(0,0,0) * mat->getWorldMatrix();
+	const LLVector4a& center = mat->getWorldMatrix().getRow<3>();
 
-	LLVector3 delta = ret.getOrigin() - center;
-	LLQuaternion rot = ~mat->getRotation();
+	LLQuaternion2 invRot;
+	invRot.setConjugate( LLQuaternion2(mat->getRotation()) );
 
-	delta *= rot;
-	LLVector3 lookAt = ret.getAtAxis();
-	LLVector3 up_axis = ret.getUpAxis();
-	LLVector3 left_axis = ret.getLeftAxis();
+	LLVector4a delta;
+	delta.load3(ret.getOrigin().mV);
+	delta.sub(center);
 
-	lookAt *= rot;
-	up_axis *= rot;
-	left_axis *= rot;
+	LLVector4a lookAt;
+	lookAt.load3(ret.getAtAxis().mV);
+	LLVector4a up_axis;
 
-	if (!delta.isFinite())
+	up_axis.load3(ret.getUpAxis().mV);
+	LLVector4a left_axis;
+	left_axis.load3(ret.getLeftAxis().mV);
+
+	delta.setRotated(invRot, delta);
+	lookAt.setRotated(invRot, lookAt);
+	up_axis.setRotated(invRot, up_axis);
+	left_axis.setRotated(invRot, left_axis);
+
+	if (!delta.isFinite3())
 	{
-		delta.clearVec();
+		delta.clear();
 	}
 
-	ret.setOrigin(delta);
-	ret.setAxes(lookAt, left_axis, up_axis);
+	ret.setOrigin(LLVector3(delta.getF32ptr()));
+	ret.setAxes(LLVector3(lookAt.getF32ptr()), LLVector3(left_axis.getF32ptr()), LLVector3(up_axis.getF32ptr()));
 		
 	return ret;
 }
@@ -1667,12 +1673,17 @@ const LLVector3	LLDrawable::getPositionAgent() const
 	{
 		if (isActive())
 		{
-			LLVector3 pos(0,0,0);
 			if (!isRoot())
 			{
-				pos = mVObjp->getPosition();
+				LLVector4a pos;
+				pos.load3(mVObjp->getPosition().mV);
+				getRenderMatrix().affineTransform(pos,pos);
+				return LLVector3(pos.getF32ptr());
+			}
+			else
+			{
+				return LLVector3(getRenderMatrix().getRow<3>().getF32ptr());
 			}
-			return pos * getRenderMatrix();
 		}
 		else
 		{
diff --git a/indra/newview/lldrawable.h b/indra/newview/lldrawable.h
index 5eaf9d4d97e66cb136216e2131d3c881167f81c7..289a80b9ec1f94d1d191eb3eb7e6a9386dd69cfc 100644
--- a/indra/newview/lldrawable.h
+++ b/indra/newview/lldrawable.h
@@ -87,8 +87,8 @@ class LLDrawable
 	const LLViewerObject *getVObj()	const						  { return mVObjp; }
 	LLVOVolume*	getVOVolume() const; // cast mVObjp tp LLVOVolume if OK
 
-	const LLMatrix4&      getWorldMatrix() const		{ return mXform.getWorldMatrix(); }
-	const LLMatrix4&	  getRenderMatrix() const;
+	const LLMatrix4a&     getWorldMatrix() const		{ return mXform.getWorldMatrix(); }
+	const LLMatrix4a&	  getRenderMatrix() const;
 	void				  setPosition(LLVector3 v) const { }
 	const LLVector3&	  getPosition() const			{ return mXform.getPosition(); }
 	const LLVector3&      getWorldPosition() const		{ return mXform.getPositionW(); }
diff --git a/indra/newview/lldrawpool.cpp b/indra/newview/lldrawpool.cpp
index 731c8a3c736b4763bb0a5071875e35d20edad28e..17ca9432f59cdc686b2c5c5864597cdf6e161b22 100644
--- a/indra/newview/lldrawpool.cpp
+++ b/indra/newview/lldrawpool.cpp
@@ -443,7 +443,7 @@ void LLRenderPass::applyModelMatrix(const LLDrawInfo& params)
 		gGL.loadMatrix(gGLModelView);
 		if (params.mModelMatrix)
 		{
-			gGL.multMatrix((GLfloat*) params.mModelMatrix->mMatrix);
+			gGL.multMatrix(*params.mModelMatrix);
 		}
 		gPipeline.mMatrixOpCount++;
 	}
diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp
index d708ce52f07dbd494555480154806d612c1fbe6c..16b43019474b7c9441cff650f66d6220a18f4043 100644
--- a/indra/newview/lldrawpoolavatar.cpp
+++ b/indra/newview/lldrawpoolavatar.cpp
@@ -166,11 +166,11 @@ void LLDrawPoolAvatar::prerender()
 	}
 }
 
-LLMatrix4& LLDrawPoolAvatar::getModelView()
+LLMatrix4a& LLDrawPoolAvatar::getModelView()
 {
-	static LLMatrix4 ret;
+	static LLMatrix4a ret;
 
-	ret = LLMatrix4(gGLModelView.getF32ptr());
+	ret = gGLModelView;
 
 	return ret;
 }
diff --git a/indra/newview/lldrawpoolavatar.h b/indra/newview/lldrawpoolavatar.h
index 32f239721db087dd0fbef2c72d451f9fbe470f28..cf7861c485a2e8f1898985df292c49f997d65385 100644
--- a/indra/newview/lldrawpoolavatar.h
+++ b/indra/newview/lldrawpoolavatar.h
@@ -180,7 +180,7 @@ typedef enum
 
 	LLDrawPoolAvatar(U32 type);
 
-	static LLMatrix4& getModelView();
+	static LLMatrix4a& getModelView();
 
 	/*virtual*/ S32  getNumPasses();
 	/*virtual*/ void beginRenderPass(S32 pass);
diff --git a/indra/newview/lldrawpoolterrain.cpp b/indra/newview/lldrawpoolterrain.cpp
index 5b039120a2e6ca3502fd690644677a94c253710b..b522587c658a1148dd108d623bc714e167624d3c 100644
--- a/indra/newview/lldrawpoolterrain.cpp
+++ b/indra/newview/lldrawpoolterrain.cpp
@@ -271,8 +271,11 @@ void LLDrawPoolTerrain::drawLoop()
 			if (!facep || !facep->getDrawable() || !facep->getDrawable()->getRegion())
 				continue;
 
-			LLMatrix4* model_matrix = &(facep->getDrawable()->getRegion()->mRenderMatrix);
-
+			LLMatrix4a* model_matrix = &(facep->getDrawable()->getRegion()->mRenderMatrix);
+			if(model_matrix && model_matrix->isIdentity())
+			{
+				model_matrix = NULL;
+			}
 			if (model_matrix != gGLLastMatrix)
 			{
 				llassert(gGL.getMatrixMode() == LLRender::MM_MODELVIEW);
@@ -280,7 +283,7 @@ void LLDrawPoolTerrain::drawLoop()
 				gGL.loadMatrix(gGLModelView);
 				if (model_matrix)
 				{
-					gGL.multMatrix((GLfloat*) model_matrix->mMatrix);
+					gGL.multMatrix(*model_matrix);
 				}
 				gPipeline.mMatrixOpCount++;
 			}
diff --git a/indra/newview/lldrawpooltree.cpp b/indra/newview/lldrawpooltree.cpp
index 648af83a07fb0586a02dc1fb022ad1439d83db19..efd7247c29eaf6f84a8b1543994bf95b0883c9cc 100644
--- a/indra/newview/lldrawpooltree.cpp
+++ b/indra/newview/lldrawpooltree.cpp
@@ -108,8 +108,11 @@ void LLDrawPoolTree::render(S32 pass)
 
 		if(buff)
 		{
-			LLMatrix4* model_matrix = &(face->getDrawable()->getRegion()->mRenderMatrix);
-
+			LLMatrix4a* model_matrix = &(face->getDrawable()->getRegion()->mRenderMatrix);
+			if(model_matrix && model_matrix->isIdentity())
+			{
+				model_matrix = NULL;
+			}
 			if (model_matrix != gGLLastMatrix)
 			{
 				gGLLastMatrix = model_matrix;
@@ -117,7 +120,7 @@ void LLDrawPoolTree::render(S32 pass)
 				if (model_matrix)
 				{
 					llassert(gGL.getMatrixMode() == LLRender::MM_MODELVIEW);
-					gGL.multMatrix((GLfloat*) model_matrix->mMatrix);
+					gGL.multMatrix(*model_matrix);
 				}
 				gPipeline.mMatrixOpCount++;
 			}
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index e1f1789edfb4d25f1e97de7360f32e1147d958df..98bd4ed9c3852dd28b1f8a28b64b30ddae9b8342 100644
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -546,7 +546,11 @@ void LLFace::updateCenterAgent()
 {
 	if (mDrawablep->isActive())
 	{
-		mCenterAgent = mCenterLocal * getRenderMatrix();
+		LLVector4a local_pos;
+		local_pos.load3(mCenterLocal.mV);
+
+		getRenderMatrix().affineTransform(local_pos,local_pos);
+		mCenterAgent.set(local_pos.getF32ptr());
 	}
 	else
 	{
@@ -574,17 +578,21 @@ void LLFace::renderSelected(LLViewerTexture *imagep, const LLColor4& color)
 		gGL.getTexUnit(0)->bind(imagep);
 	
 		gGL.pushMatrix();
+
+		const LLMatrix4a* model_matrix = NULL;
 		if (mDrawablep->isActive())
 		{
-			gGL.multMatrix((GLfloat*)mDrawablep->getRenderMatrix().mMatrix);
+			model_matrix = &(mDrawablep->getRenderMatrix());
 		}
 		else
 		{
-			gGL.multMatrix((GLfloat*)mDrawablep->getRegion()->mRenderMatrix.mMatrix);
+			model_matrix = &mDrawablep->getRegion()->mRenderMatrix;
+		}
+		if(model_matrix && !model_matrix->isIdentity())
+		{
+			gGL.multMatrix(*model_matrix);
 		}
 
-		gGL.diffuseColor4fv(color.mV);
-	
 		if (mDrawablep->isState(LLDrawable::RIGGED))
 		{
 			LLVOVolume* volume = mDrawablep->getVOVolume();
@@ -1040,7 +1048,7 @@ LLVector2 LLFace::surfaceToTexture(LLVector2 surface_coord, const LLVector4a& po
 // by planarProjection(). This is needed to match planar texgen parameters.
 void LLFace::getPlanarProjectedParams(LLQuaternion* face_rot, LLVector3* face_pos, F32* scale) const
 {
-	const LLMatrix4& vol_mat = getWorldMatrix();
+	const LLMatrix4a& vol_mat = getWorldMatrix();
 	const LLVolumeFace& vf = getViewerObject()->getVolume()->getVolumeFace(mTEOffset);
     if (!vf.mNormals)
     {
@@ -1053,15 +1061,15 @@ void LLFace::getPlanarProjectedParams(LLQuaternion* face_rot, LLVector3* face_po
         LL_WARNS( ) << "Volume face without tangents (object id: " << getViewerObject()->getID().asString() << ")" << LL_ENDL;
         return;
     }
-	const LLVector4a& normal4a = vf.mNormals[0];
+	const LLVector4a& normal = vf.mNormals[0];
 	const LLVector4a& tangent = vf.mTangents[0];
 
-	LLVector4a binormal4a;
-	binormal4a.setCross3(normal4a, tangent);
-	binormal4a.mul(tangent.getF32ptr()[3]);
+	LLVector4a binormal;
+	binormal.setCross3(normal, tangent);
+	binormal.mul(tangent.getF32ptr()[3]);
 
 	LLVector2 projected_binormal;
-	planarProjection(projected_binormal, normal4a, *vf.mCenter, binormal4a);
+	planarProjection(projected_binormal, normal, *vf.mCenter, binormal);
 	projected_binormal -= LLVector2(0.5f, 0.5f); // this normally happens in xform()
 	*scale = projected_binormal.length();
 	// rotate binormal to match what planarProjection() thinks it is,
@@ -1070,13 +1078,16 @@ void LLFace::getPlanarProjectedParams(LLQuaternion* face_rot, LLVector3* face_po
 	F32 ang = acos(projected_binormal.mV[VY]);
 	ang = (projected_binormal.mV[VX] < 0.f) ? -ang : ang;
 
+	ALGLMath::genRot(RAD_TO_DEG * ang, normal).rotate(binormal, binormal);
+
+	LLVector4a x_axis;
+	x_axis.setCross3(binormal, normal);
+
 	//VECTORIZE THIS
-	LLVector3 binormal(binormal4a.getF32ptr());
-	LLVector3 normal(normal4a.getF32ptr());
-	binormal.rotVec(ang, normal);
-	LLQuaternion local_rot( binormal % normal, binormal, normal );
-	*face_rot = local_rot * vol_mat.quaternion();
-	*face_pos = vol_mat.getTranslation();
+	LLQuaternion local_rot(LLVector3(x_axis.getF32ptr()), LLVector3(binormal.getF32ptr()), LLVector3(normal.getF32ptr()));
+	*face_rot = local_rot * LLMatrix4(vol_mat.getF32ptr()).quaternion();
+
+	face_pos->set(vol_mat.getRow<VW>().getF32ptr());
 }
 
 // Returns the necessary texture transform to align this face's TE to align_to's TE
@@ -1478,7 +1489,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
 			{
 				if (mDrawablep->isActive())
 				{
-					bump_quat = LLQuaternion(mDrawablep->getRenderMatrix());
+					bump_quat = LLQuaternion(LLMatrix4(mDrawablep->getRenderMatrix().getF32ptr()));
 				}
 
 				if (bump_code)
@@ -1976,10 +1987,6 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
 			
 			mVObjp->getVolume()->genTangents(f);
 			
-			LLVector4Logical mask;
-			mask.clear();
-			mask.setElement<3>();
-
 			LLVector4a* src = vf.mTangents;
 			LLVector4a* end = vf.mTangents+num_vertices;
 			LLVector4a* src2 = vf.mNormals;
@@ -1997,7 +2004,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
 				}
 				mat_vert.rotate(tangent_out, tangent_out);
 				tangent_out.normalize3fast();
-				tangent_out.setSelectWithMask(mask, *src, tangent_out);
+				tangent_out.copyComponent<3>(*src);
 				tangent_out.store4a(tangents);
 				
 				src++;
@@ -2458,7 +2465,7 @@ S32 LLFace::pushVertices(const U16* index_array) const
 	return mIndicesCount;
 }
 
-const LLMatrix4& LLFace::getRenderMatrix() const
+const LLMatrix4a& LLFace::getRenderMatrix() const
 {
 	return mDrawablep->getRenderMatrix();
 }
@@ -2474,7 +2481,7 @@ S32 LLFace::renderElements(const U16 *index_array) const
 	else
 	{
 		gGL.pushMatrix();
-		gGL.multMatrix((float*)getRenderMatrix().mMatrix);
+		gGL.multMatrix(getRenderMatrix());
 		ret = pushVertices(index_array);
 		gGL.popMatrix();
 	}
@@ -2534,7 +2541,10 @@ LLVector3 LLFace::getPositionAgent() const
 	}
 	else
 	{
-		return mCenterLocal * getRenderMatrix();
+		LLVector4a center_local;
+		center_local.load3(mCenterLocal.mV);
+		getRenderMatrix().affineTransform(center_local,center_local);
+		return LLVector3(center_local.getF32ptr());
 	}
 }
 
diff --git a/indra/newview/llface.h b/indra/newview/llface.h
index b6288918f9988bd8777afe51f4137416e66cdc11..cfe1b2a233dfd85117387746293d5309437bfff7 100644
--- a/indra/newview/llface.h
+++ b/indra/newview/llface.h
@@ -80,8 +80,8 @@ class LLFace : public LLTrace::MemTrackableNonVirtual<LLFace, 16>
 	}
 	~LLFace()  { destroy(); }
 
-	const LLMatrix4& getWorldMatrix()	const	{ return mVObjp->getWorldMatrix(mXform); }
-	const LLMatrix4& getRenderMatrix() const;
+	const LLMatrix4a& getWorldMatrix()	const	{ return mVObjp->getWorldMatrix(mXform); }
+	const LLMatrix4a& getRenderMatrix() const;
 	U32				getIndicesCount()	const	{ return mIndicesCount; };
 	S32				getIndicesStart()	const	{ return mIndicesIndex; };
 	U16				getGeomCount()		const	{ return mGeomCount; }		// vertex count for this face
diff --git a/indra/newview/llflexibleobject.cpp b/indra/newview/llflexibleobject.cpp
index b04442aa21a21a92d10ac3a9cef8700fdc17a2af..72d4910f2dc69d53ce5183c4b94718b85301a7f4 100644
--- a/indra/newview/llflexibleobject.cpp
+++ b/indra/newview/llflexibleobject.cpp
@@ -939,7 +939,7 @@ void LLVolumeImplFlexible::updateRelativeXform(bool force_identity)
 	vo->mRelativeXformInvTrans.transpose();
 }
 
-const LLMatrix4& LLVolumeImplFlexible::getWorldMatrix(LLXformMatrix* xform) const
+const LLMatrix4a& LLVolumeImplFlexible::getWorldMatrix(LLXformMatrix* xform) const
 {
 	return xform->getWorldMatrix();
 }
diff --git a/indra/newview/llflexibleobject.h b/indra/newview/llflexibleobject.h
index 787c65ea48a0dd16fe709c6443aacb645d174bee..dd076f516ce62413e660afe981ba8ed54b57b848 100644
--- a/indra/newview/llflexibleobject.h
+++ b/indra/newview/llflexibleobject.h
@@ -96,7 +96,7 @@ class LLVolumeImplFlexible final : public LLVolumeInterface
 		bool isVolumeUnique() const { return true; }
 		bool isVolumeGlobal() const { return true; }
 		bool isActive() const { return true; }
-		const LLMatrix4& getWorldMatrix(LLXformMatrix* xform) const;
+		const LLMatrix4a& getWorldMatrix(LLXformMatrix* xform) const;
 		void updateRelativeXform(bool force_identity);
 		void doFlexibleUpdate(); // Called to update the simulation
 		void doFlexibleRebuild(bool rebuild_volume); // Called to rebuild the geometry
diff --git a/indra/newview/llselectmgr.cpp b/indra/newview/llselectmgr.cpp
index 961f15fdf1d6437d5482a23c477ce1aa8783c733..5c5b07f0464adef0f69ee4e3099a5e61d4dc996a 100644
--- a/indra/newview/llselectmgr.cpp
+++ b/indra/newview/llselectmgr.cpp
@@ -1341,12 +1341,12 @@ void LLSelectMgr::getGrid(LLVector3& origin, LLQuaternion &rotation, LLVector3 &
 			size.setSub(max_extents, min_extents);
 			size.mul(0.5f);
 
-			mGridOrigin.set(center.getF32ptr());
 			LLDrawable* drawable = first_grid_object->mDrawable;
 			if (drawable && drawable->isActive())
 			{
-				mGridOrigin = mGridOrigin * first_grid_object->getRenderMatrix();
+				first_grid_object->getRenderMatrix().affineTransform(center,center);
 			}
+			mGridOrigin.set(center.getF32ptr());
 			mGridScale.set(size.getF32ptr());
 		}
 	}
@@ -5946,7 +5946,7 @@ void LLSelectMgr::renderSilhouettes(BOOL for_hud)
 
 		if (objectp->mDrawable->isActive())
 		{
-			gGL.multMatrix((F32*)objectp->getRenderMatrix().mMatrix);
+			gGL.multMatrix(objectp->getRenderMatrix());
 		}
 		else if (!is_hud_object)
 		{
@@ -6496,7 +6496,7 @@ void LLSelectNode::renderOneSilhouette(const LLColor4 &color)
 	
 	if (drawable->isActive())
 	{
-		gGL.multMatrix((F32*) objectp->getRenderMatrix().mMatrix);
+		gGL.multMatrix(objectp->getRenderMatrix());
 	}
 
 	LLVolume *volume = objectp->getVolume();
diff --git a/indra/newview/llskinningutil.cpp b/indra/newview/llskinningutil.cpp
index 0a07a6349254456bdc5faeb1b93f9ebf85360f03..9f541866b8bd47a93ff0d1aef84953e1653488b5 100644
--- a/indra/newview/llskinningutil.cpp
+++ b/indra/newview/llskinningutil.cpp
@@ -138,9 +138,7 @@ void LLSkinningUtil::initSkinningMatrixPalette(
         llassert(joint);
         if (joint)
         {
-            LLMatrix4a world;
-            world.loadu(joint->getWorldMatrix());
-            mat[j].setMul(world, skin->mInvBindMatrix[j]);
+            mat[j].setMul(joint->getWorldMatrix(), skin->mInvBindMatrix[j]);
         }
         else
         {
diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp
index 8d80dc3f3f75c6f56cf40c2b17e0f80d5184bf8e..2d55fa91732c53597b98b03680d97934945010ea 100644
--- a/indra/newview/llspatialpartition.cpp
+++ b/indra/newview/llspatialpartition.cpp
@@ -2748,7 +2748,7 @@ void renderPhysicsShapes(LLSpatialGroup* group)
 			if (bridge)
 			{
 				gGL.pushMatrix();
-				gGL.multMatrix((F32*)bridge->mDrawable->getRenderMatrix().mMatrix);
+				gGL.multMatrix(bridge->mDrawable->getRenderMatrix());
 				bridge->renderPhysicsShapes();
 				gGL.popMatrix();
 			}
@@ -2777,7 +2777,7 @@ void renderPhysicsShapes(LLSpatialGroup* group)
 				if (object && object->getPCode() == LLViewerObject::LL_VO_SURFACE_PATCH)
 				{
 					gGL.pushMatrix();
-					gGL.multMatrix((F32*) object->getRegion()->mRenderMatrix.mMatrix);
+					gGL.multMatrix(object->getRegion()->mRenderMatrix);
 					//push face vertices for terrain
 					for (S32 i = 0; i < drawable->getNumFaces(); ++i)
 					{
@@ -3924,11 +3924,8 @@ class LLOctreeIntersect : public LLOctreeTraveler<LLViewerOctreeEntry>
 
 			if (group->getSpatialPartition()->isBridge())
 			{
-				LLMatrix4 local_matrix = group->getSpatialPartition()->asBridge()->mDrawable->getRenderMatrix();
-				local_matrix.invert();
-				
-				LLMatrix4a local_matrix4a;
-				local_matrix4a.loadu(local_matrix);
+				LLMatrix4a local_matrix4a = group->getSpatialPartition()->asBridge()->mDrawable->getRenderMatrix();
+				local_matrix4a.invert();
 
 				local_matrix4a.affineTransform(mStart, local_start);
 				local_matrix4a.affineTransform(mEnd, local_end);
diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h
index 79ee97fa211f9daee37c4d2e34b0f5dfb0ab6b42..4b52506fb7abc9e645d71af35ba91bf65c7c0492 100644
--- a/indra/newview/llspatialpartition.h
+++ b/indra/newview/llspatialpartition.h
@@ -80,7 +80,7 @@ class LLDrawInfo : public LLRefCount, public LLTrace::MemTrackableNonVirtual<LLD
 
 	U32 mDebugColor;
 	const LLMatrix4a* mTextureMatrix;
-	const LLMatrix4* mModelMatrix;
+	const LLMatrix4a* mModelMatrix;
 	U16 mStart;
 	U16 mEnd;
 	U32 mCount;
diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index 1c7c838b746cf64b80359286d073cf73b849e9d4..7d89c97e263bee37bcda062462f15ab50ef818c9 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -111,14 +111,14 @@ void LLViewerJointMesh::uploadJointMatrices()
 	//calculate joint matrices
 	for (joint_num = 0; joint_num < reference_mesh->mJointRenderData.size(); joint_num++)
 	{
-		LLMatrix4 joint_mat = *reference_mesh->mJointRenderData[joint_num]->mWorldMatrix;
+		LLMatrix4a joint_mat = *reference_mesh->mJointRenderData[joint_num]->mWorldMatrix;
 
 		if (hardware_skinning)
 		{
-			joint_mat *= LLDrawPoolAvatar::getModelView();
+			joint_mat.setMul(LLDrawPoolAvatar::getModelView(),joint_mat);
 		}
-		gJointMatUnaligned[joint_num] = joint_mat;
-		gJointRotUnaligned[joint_num] = joint_mat.getMat3();
+		gJointMatUnaligned[joint_num] = LLMatrix4(joint_mat.getF32ptr());
+		gJointRotUnaligned[joint_num] = gJointMatUnaligned[joint_num].getMat3();
 	}
 
 	BOOL last_pivot_uploaded = FALSE;
@@ -312,8 +312,7 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
 	else
 	{
 		gGL.pushMatrix();
-		LLMatrix4 jointToWorld = getWorldMatrix();
-		gGL.multMatrix((GLfloat*)jointToWorld.mMatrix);
+		gGL.multMatrix(getWorldMatrix());
 		buff->setBuffer(mask);
 		buff->drawRange(LLRender::TRIANGLES, start, end, count, offset);
 		gGL.popMatrix();
diff --git a/indra/newview/llviewerobject.cpp b/indra/newview/llviewerobject.cpp
index 325f7c153438f149b9be06b2d4c989329d13684a..d346932514786d9f6ed2e6fd7c9d0e7f78db18de 100644
--- a/indra/newview/llviewerobject.cpp
+++ b/indra/newview/llviewerobject.cpp
@@ -4377,18 +4377,18 @@ const LLQuaternion LLViewerObject::getRenderRotation() const
 	{
 		if (!mDrawable->isRoot())
 		{
-			ret = getRotation() * LLQuaternion(mDrawable->getParent()->getWorldMatrix());
+			ret = getRotation() * LLQuaternion(LLMatrix4(mDrawable->getParent()->getWorldMatrix().getF32ptr()));
 		}
 		else
 		{
-			ret = LLQuaternion(mDrawable->getWorldMatrix());
+			ret = LLQuaternion(LLMatrix4(mDrawable->getWorldMatrix().getF32ptr()));
 		}
 	}
 	
 	return ret;
 }
 
-const LLMatrix4& LLViewerObject::getRenderMatrix() const
+const LLMatrix4a& LLViewerObject::getRenderMatrix() const
 {
 	return mDrawable->getWorldMatrix();
 }
diff --git a/indra/newview/llviewerobject.h b/indra/newview/llviewerobject.h
index f081e379210a3635671b3727b6765a367789581c..cf53732ac537edf7693afa4ea6c8eba8b92b5e4d 100644
--- a/indra/newview/llviewerobject.h
+++ b/indra/newview/llviewerobject.h
@@ -319,7 +319,7 @@ class LLViewerObject
 	const LLQuaternion getRotationRegion() const;
 	const LLQuaternion getRotationEdit() const;
 	const LLQuaternion getRenderRotation() const;
-	virtual	const LLMatrix4& getRenderMatrix() const;
+	virtual	const LLMatrix4a& getRenderMatrix() const;
 
 	void setPosition(const LLVector3 &pos, BOOL damped = FALSE);
 	void setPositionGlobal(const LLVector3d &position, BOOL damped = FALSE);
@@ -329,7 +329,7 @@ class LLViewerObject
 	void setPositionParent(const LLVector3 &pos_parent, BOOL damped = FALSE);
 	void setPositionAbsoluteGlobal( const LLVector3d &pos_global, BOOL damped = FALSE );
 
-	virtual const LLMatrix4& getWorldMatrix(LLXformMatrix* xform) const		{ return xform->getWorldMatrix(); }
+	virtual const LLMatrix4a& getWorldMatrix(LLXformMatrix* xform) const		{ return xform->getWorldMatrix(); }
 
 	inline void setRotation(const F32 x, const F32 y, const F32 z, BOOL damped = FALSE);
 	inline void setRotation(const LLQuaternion& quat, BOOL damped = FALSE);
diff --git a/indra/newview/llviewerregion.cpp b/indra/newview/llviewerregion.cpp
index c61ee9aadf2478624eded96b20b30a4cba86bf04..120b4421d56aaa88f928fbe5f482c6535e102731 100644
--- a/indra/newview/llviewerregion.cpp
+++ b/indra/newview/llviewerregion.cpp
@@ -595,6 +595,9 @@ LLViewerRegion::LLViewerRegion(const U64 &handle,
 	mRegionCacheMissCount(0)
 {
 	mWidth = region_width_meters;
+
+	mRenderMatrix.setIdentity();
+
 	mImpl->mOriginGlobal = from_region_handle(handle); 
 	updateRenderMatrix();
 
@@ -813,7 +816,7 @@ void LLViewerRegion::setOriginGlobal(const LLVector3d &origin_global)
 
 void LLViewerRegion::updateRenderMatrix()
 {
-	mRenderMatrix.setTranslation(getOriginAgent());
+	mRenderMatrix.setTranslate_affine(getOriginAgent());
 }
 
 void LLViewerRegion::setTimeDilation(F32 time_dilation)
diff --git a/indra/newview/llviewerregion.h b/indra/newview/llviewerregion.h
index b82c6bb0cc34ca62335f013fb47d1325bc145fdd..6e1169acca5d71532eba8c376e7ed0d43ddcc275 100644
--- a/indra/newview/llviewerregion.h
+++ b/indra/newview/llviewerregion.h
@@ -446,7 +446,7 @@ class LLViewerRegion final : public LLCapabilityProvider // implements this inte
 	F32Bits	mBitsReceived;
 	F32		mPacketsReceived;
 
-	LLMatrix4 mRenderMatrix;
+	LL_ALIGN_16(LLMatrix4a mRenderMatrix);
 
 	// These arrays are maintained in parallel. Ideally they'd be combined into a
 	// single array of an aggrigate data type but for compatibility with the old
diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp
index 00665d7743a35e3cb5e97dea845303a884604e35..ef085d7dd44975a6a8e6b1c2d46c7ca3dc277687 100644
--- a/indra/newview/llvoavatar.cpp
+++ b/indra/newview/llvoavatar.cpp
@@ -1261,7 +1261,10 @@ const LLVector3 LLVOAvatar::getRenderPosition() const
 	}
 	else
 	{
-		return getPosition() * mDrawable->getParent()->getRenderMatrix();
+		LLVector4a pos;
+		pos.load3(getPosition().mV);
+		mDrawable->getParent()->getRenderMatrix().affineTransform(pos,pos);
+		return LLVector3(pos.getF32ptr());
 	}
 }
 
@@ -1373,9 +1376,7 @@ void LLVOAvatar::calculateSpatialExtents(LLVector4a& newMin, LLVector4a& newMax)
             LLPolyMesh *mesh = polymesh_pair.second;
             for (S32 joint_num = 0; joint_num < mesh->mJointRenderData.size(); joint_num++)
             {
-                LLVector4a trans;
-                trans.load3( mesh->mJointRenderData[joint_num]->mWorldMatrix->getTranslation().mV);
-                update_min_max(newMin, newMax, trans);
+                update_min_max(newMin, newMax, mesh->mJointRenderData[joint_num]->mWorldMatrix->getRow<LLMatrix4a::ROW_TRANS>());
             }
         }
     }
@@ -1483,9 +1484,9 @@ void LLVOAvatar::calculateSpatialExtents(LLVector4a& newMin, LLVector4a& newMax)
                     // Ignore bounding box of HUD joints
                     continue;
                 }
-                LLMatrix4a mat;
+
                 LLVector4a new_extents[2];
-                mat.loadu(joint->getWorldMatrix());
+				const LLMatrix4a& mat = joint->getWorldMatrix();
 				mat.mulBoundBox(rig_info->getRiggedExtents(), new_extents);
                 update_min_max(newMin, newMax, new_extents[0]);
                 update_min_max(newMin, newMax, new_extents[1]);
@@ -1562,7 +1563,7 @@ void LLVOAvatar::renderCollisionVolumes()
 		collision_volume.updateWorldMatrix();
 
 		gGL.pushMatrix();
-		gGL.multMatrix( &collision_volume.getXform()->getWorldMatrix().mMatrix[0][0] );
+		gGL.multMatrix(collision_volume.getXform()->getWorldMatrix());
 
         LLVector3 begin_pos(0,0,0);
         LLVector3 end_pos(collision_volume.getEnd());
@@ -1671,7 +1672,7 @@ void LLVOAvatar::renderBones(const std::string &selected_joint)
 
         
 		gGL.pushMatrix();
-		gGL.multMatrix( &jointp->getXform()->getWorldMatrix().mMatrix[0][0] );
+		gGL.multMatrix(jointp->getXform()->getWorldMatrix());
 
         render_sphere_and_line(begin_pos, end_pos, sphere_scale, occ_color, visible_color);
         
@@ -1699,7 +1700,7 @@ void LLVOAvatar::renderJoints()
 		jointp->updateWorldMatrix();
 	
 		gGL.pushMatrix();
-		gGL.multMatrix( &jointp->getXform()->getWorldMatrix().mMatrix[0][0] );
+		gGL.multMatrix(jointp->getXform()->getWorldMatrix());
 
 		gGL.diffuseColor3f( 1.f, 0.f, 1.f );
 	
@@ -1793,8 +1794,7 @@ BOOL LLVOAvatar::lineSegmentIntersect(const LLVector4a& start, const LLVector4a&
 		{
 			mCollisionVolumes[i].updateWorldMatrix();
             
-			LLMatrix4a mat;
-			mat.loadu(mCollisionVolumes[i].getXform()->getWorldMatrix());
+			LLMatrix4a mat = mCollisionVolumes[i].getXform()->getWorldMatrix();
 			LLMatrix4a inverse = mat;
 			inverse.invert();
 			LLMatrix4a norm_mat = inverse;
@@ -4284,7 +4284,7 @@ void LLVOAvatar::updateOrientation(LLAgent& agent, F32 speed, F32 delta_time)
 				}
 			}
 
-			LLQuaternion root_rotation = mRoot->getWorldMatrix().quaternion();
+			LLQuaternion root_rotation = LLMatrix4(mRoot->getWorldMatrix().getF32ptr()).quaternion();
 			F32 root_roll, root_pitch, root_yaw;
 			root_rotation.getEulerAngles(&root_roll, &root_pitch, &root_yaw);
 
@@ -4293,7 +4293,7 @@ void LLVOAvatar::updateOrientation(LLAgent& agent, F32 speed, F32 delta_time)
 			// and head turn.  Once in motion, it must conform however.
 			BOOL self_in_mouselook = isSelf() && gAgentCamera.cameraMouselook();
 
-			LLVector3 pelvisDir( mRoot->getWorldMatrix().getFwdRow4().mV );
+			LLVector3 pelvisDir( mRoot->getWorldMatrix().getRow<LLMatrix4a::ROW_FWD>().getF32ptr() );
 
 			static LLCachedControl<F32> s_pelvis_rot_threshold_slow(gSavedSettings, "AvatarRotateThresholdSlow", 60.0);
 			static LLCachedControl<F32> s_pelvis_rot_threshold_fast(gSavedSettings, "AvatarRotateThresholdFast", 2.0);
@@ -10132,9 +10132,8 @@ void LLVOAvatar::dumpArchetypeXML(const std::string& prefix, bool group_by_weara
                 LLJointRiggingInfo& rig_info = mJointRiggingInfoTab[joint_num];
                 if (rig_info.isRiggedTo())
                 {
-                    LLMatrix4a mat;
                     LLVector4a new_extents[2];
-                    mat.loadu(joint->getWorldMatrix());
+					const LLMatrix4a& mat = joint->getWorldMatrix();
                     mat.mulBoundBox(rig_info.getRiggedExtents(), new_extents);
                     LLVector4a rrp[2];
                     rrp[0].setSub(new_extents[0],rpv);
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index 75e947de14c8363b1f240e27eff1dd4c2473cb81..df7b578ba9cd83fcff1efd7dbccc359d91c997ab 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -1819,7 +1819,7 @@ void LLVOVolume::updateRelativeXform(bool force_identity)
 	if (drawable->isState(LLDrawable::RIGGED) && mRiggedVolume.notNull())
 	{ //rigged volume (which is in agent space) is used for generating bounding boxes etc
 	  //inverse of render matrix should go to partition space
-		mRelativeXform.loadu((F32*)&getRenderMatrix().mMatrix[0][0]);
+		mRelativeXform = getRenderMatrix();
 		mRelativeXformInvTrans = mRelativeXform;
 		mRelativeXform.invert();
 		mRelativeXformInvTrans.transpose();
@@ -3914,7 +3914,7 @@ BOOL LLVOVolume::isHUDAttachment() const
 }
 
 
-const LLMatrix4& LLVOVolume::getRenderMatrix() const
+const LLMatrix4a& LLVOVolume::getRenderMatrix() const
 {
 	if (mDrawable->isActive() && !mDrawable->isRoot())
 	{
@@ -4533,7 +4533,7 @@ void LLVOVolume::onShift(const LLVector4a &shift_vector)
 	updateRelativeXform();
 }
 
-const LLMatrix4& LLVOVolume::getWorldMatrix(LLXformMatrix* xform) const
+const LLMatrix4a& LLVOVolume::getWorldMatrix(LLXformMatrix* xform) const
 {
 	if (mVolumeImpl)
 	{
@@ -5205,7 +5205,7 @@ void LLVolumeGeometryManager::registerFace(LLSpatialGroup* group, LLFace* facep,
 		tex_mat = facep->mTextureMatrix;	
 	}
 
-	const LLMatrix4* model_mat = NULL;
+	const LLMatrix4a* model_mat = NULL;
 
 	LLDrawable* drawable = facep->getDrawable();
 	
diff --git a/indra/newview/llvovolume.h b/indra/newview/llvovolume.h
index d8402b628aa527bd9be341e182b0928cd92406e9..4044a466dbd36ec7f053fb97d031a8ef193a4f77 100644
--- a/indra/newview/llvovolume.h
+++ b/indra/newview/llvovolume.h
@@ -88,7 +88,7 @@ class LLVolumeInterface
 	virtual bool isVolumeUnique() const = 0; // Do we need a unique LLVolume instance?
 	virtual bool isVolumeGlobal() const = 0; // Are we in global space?
 	virtual bool isActive() const = 0; // Is this object currently active?
-	virtual const LLMatrix4& getWorldMatrix(LLXformMatrix* xform) const = 0;
+	virtual const LLMatrix4a& getWorldMatrix(LLXformMatrix* xform) const = 0;
 	virtual void updateRelativeXform(bool force_identity = false) = 0;
 	virtual U32 getID() const = 0;
 	virtual void preRebuild() = 0;
@@ -140,7 +140,7 @@ class LLVOVolume final : public LLViewerObject
 	const LLVector3		getPivotPositionAgent() const;
 	const LLMatrix4a&	getRelativeXform() const				{ return mRelativeXform; }
 	const LLMatrix4a&	getRelativeXformInvTrans() const		{ return mRelativeXformInvTrans; }
-	/*virtual*/	const LLMatrix4&	getRenderMatrix() const;
+	/*virtual*/	const LLMatrix4a&	getRenderMatrix() const;
 				typedef std::map<LLUUID, S32> texture_cost_t;
 				U32 	getRenderCost(texture_cost_t &textures) const;
     /*virtual*/	F32		getEstTrianglesMax() const;
@@ -173,7 +173,7 @@ class LLVOVolume final : public LLViewerObject
 				BOOL	getVolumeChanged() const				{ return mVolumeChanged; }
 				
 	/*virtual*/ F32  	getRadius() const						{ return mVObjRadius; };
-				const LLMatrix4& getWorldMatrix(LLXformMatrix* xform) const;
+				const LLMatrix4a& getWorldMatrix(LLXformMatrix* xform) const;
 
 				void	markForUpdate(BOOL priority);
 				void	markForUnload()							{ LLViewerObject::markForUnload(TRUE); mVolumeChanged = TRUE; }
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index 94ba24a1f2be3179656376f702a7c8cd32399a18..6878fe0b103b6d31c5d04b82836105dcca6b4ba3 100644
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -247,7 +247,7 @@ bool	gAvatarBacklight = false;
 
 bool	gDebugPipeline = false;
 LLPipeline gPipeline;
-const LLMatrix4* gGLLastMatrix = NULL;
+const LLMatrix4a* gGLLastMatrix = NULL;
 
 LLTrace::BlockTimerStatHandle FTM_RENDER_GEOMETRY("Render Geometry");
 LLTrace::BlockTimerStatHandle FTM_RENDER_GRASS("Grass");
@@ -5435,7 +5435,7 @@ void LLPipeline::renderDebug()
 		if (!bridge->isDead() && hasRenderType(bridge->mDrawableType))
 		{
 			gGL.pushMatrix();
-			gGL.multMatrix((F32*)bridge->mDrawable->getRenderMatrix().mMatrix);
+			gGL.multMatrix(bridge->mDrawable->getRenderMatrix());
 			bridge->renderDebug();
 			gGL.popMatrix();
 		}
@@ -5709,7 +5709,7 @@ void LLPipeline::renderDebug()
 			if (bridge)
 			{
 				gGL.pushMatrix();
-				gGL.multMatrix((F32*)bridge->mDrawable->getRenderMatrix().mMatrix);
+				gGL.multMatrix(bridge->mDrawable->getRenderMatrix());
 			}
 
 			F32 alpha = llclamp((F32) (size-count)/size, 0.f, 1.f);
diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h
index b22ccd7896d29dc9239a6496e2d199b402616439..b05583558fb4ba49a3dbea05f6ce1ff4fbeae45e 100644
--- a/indra/newview/pipeline.h
+++ b/indra/newview/pipeline.h
@@ -981,6 +981,6 @@ void render_hud_elements();
 
 extern LLPipeline gPipeline;
 extern bool gDebugPipeline;
-extern const LLMatrix4* gGLLastMatrix;
+extern const LLMatrix4a* gGLLastMatrix;
 
 #endif