diff --git a/indra/llcommon/llerror.cpp b/indra/llcommon/llerror.cpp
index 5ed348e13cf178b1a335abe20dd6af1d014163cb..16ae1f06041f98635afac6a3ee9f2fc429323dc6 100644
--- a/indra/llcommon/llerror.cpp
+++ b/indra/llcommon/llerror.cpp
@@ -1493,3 +1493,20 @@ namespace LLError
    }
 }
 
+bool debugLoggingEnabled(const std::string& tag)
+{
+    const char* tags[] = {tag.c_str()};
+    ::size_t tag_count = 1;
+    LLError::CallSite _site(LLError::LEVEL_DEBUG, __FILE__, __LINE__, 
+                            typeid(_LL_CLASS_TO_LOG), __FUNCTION__, false, tags, tag_count);
+    if (LL_UNLIKELY(_site.shouldLog()))
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+
diff --git a/indra/llcommon/llerror.h b/indra/llcommon/llerror.h
index 3beef657239edcefae14c79128dbe0dcd8ae1c47..555a189b00a15859c0b4aeec1ee6e400e9dcda9a 100644
--- a/indra/llcommon/llerror.h
+++ b/indra/llcommon/llerror.h
@@ -363,4 +363,7 @@ typedef LLError::NoClassInfo _LL_CLASS_TO_LOG;
 #define LL_INFOS_ONCE(...)	lllog(LLError::LEVEL_INFO, true, ##__VA_ARGS__)
 #define LL_WARNS_ONCE(...)	lllog(LLError::LEVEL_WARN, true, ##__VA_ARGS__)
 
+// Check at run-time whether logging is enabled, without generating output
+bool debugLoggingEnabled(const std::string& tag);
+
 #endif // LL_LLERROR_H
diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h
index e11fa1bf7242652d4a56bae1cac29942573b8512..216334752a170c5364c1f675ba7e63ab9b9fb956 100644
--- a/indra/llmath/llmatrix4a.h
+++ b/indra/llmath/llmatrix4a.h
@@ -153,4 +153,27 @@ class LLMatrix4a
     }
 };
 
+inline LLVector4a rowMul(const LLVector4a &row, const LLMatrix4a &mat)
+{
+    LLVector4a result;
+    result = _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(0, 0, 0, 0)), mat.mMatrix[0]);
+    result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(1, 1, 1, 1)), mat.mMatrix[1]));
+    result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(2, 2, 2, 2)), mat.mMatrix[2]));
+    result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(3, 3, 3, 3)), mat.mMatrix[3]));
+    return result;
+}
+
+inline void matMul(const LLMatrix4a &a, const LLMatrix4a &b, LLMatrix4a &res)
+{
+    LLVector4a row0 = rowMul(a.mMatrix[0], b);
+    LLVector4a row1 = rowMul(a.mMatrix[1], b);
+    LLVector4a row2 = rowMul(a.mMatrix[2], b);
+    LLVector4a row3 = rowMul(a.mMatrix[3], b);
+
+    res.mMatrix[0] = row0;
+    res.mMatrix[1] = row1;
+    res.mMatrix[2] = row2;
+    res.mMatrix[3] = row3;
+}
+
 #endif
diff --git a/indra/newview/llskinningutil.cpp b/indra/newview/llskinningutil.cpp
index e1333b8352d321089907adba814916f7aed2389d..3ad30dac6921b71c136ebc1ec67c5d7c7efa62b8 100644
--- a/indra/newview/llskinningutil.cpp
+++ b/indra/newview/llskinningutil.cpp
@@ -243,8 +243,6 @@ void LLSkinningUtil::initSkinningMatrixPalette(
     const LLMeshSkinInfo* skin,
     LLVOAvatar *avatar)
 {
-    // BENTO - switching to use Matrix4a and SSE might speed this up.
-    // Note that we are mostly passing Matrix4a's to this routine anyway, just dubiously casted.
     for (U32 j = 0; j < count; ++j)
     {
         LLJoint *joint = NULL;
@@ -260,13 +258,23 @@ void LLSkinningUtil::initSkinningMatrixPalette(
 		{
 			joint = avatar->getJoint(skin->mJointNums[j]);
 		}
-        mat[j] = skin->mInvBindMatrix[j];
         if (joint)
         {
+#define MAT_USE_SSE
+#ifdef MAT_USE_SSE
+            LLMatrix4a bind, world, res;
+            bind.loadu(skin->mInvBindMatrix[j]);
+            world.loadu(joint->getWorldMatrix());
+            matMul(bind,world,res);
+            memcpy(mat[j].mMatrix,res.mMatrix,16*sizeof(float));
+#else
+            mat[j] = skin->mInvBindMatrix[j];
             mat[j] *= joint->getWorldMatrix();
+#endif
         }
         else
         {
+            mat[j] = skin->mInvBindMatrix[j];
             // This  shouldn't  happen   -  in  mesh  upload,  skinned
             // rendering  should  be disabled  unless  all joints  are
             // valid.  In other  cases of  skinned  rendering, invalid
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index fd77bc298591ffa2f9a1973d53d6c99c638668f6..f476a3641e282d2d3b07e40676b54cd5b3aca48a 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -4779,13 +4779,11 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
             if (rigged && pAvatarVO)
             {
                 pAvatarVO->addAttachmentOverridesForObject(vobj);
-#if 0
-				if (pAvatarVO->isSelf())
-				{
+                if (debugLoggingEnabled("Avatar") && pAvatarVO->isSelf())
+                {
                     bool verbose = true;
 					pAvatarVO->showAttachmentOverrides(verbose);
 				}
-#endif
             }
 
 			//for each face