diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index fca9471f1410c8d556cee45008e35691813309dc..563a325f03bb2f224a1668232fb6867aca24c52b 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -2431,11 +2431,10 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
 
 			LLSD::Binary pos = mdl[i]["Position"];
 			LLSD::Binary norm = mdl[i]["Normal"];
+            LLSD::Binary tangent = mdl[i]["Tangent"];
 			LLSD::Binary tc = mdl[i]["TexCoord0"];
 			LLSD::Binary idx = mdl[i]["TriangleList"];
 
-			
-
 			//copy out indices
             S32 num_indices = idx.size() / 2;
             face.resizeIndices(num_indices);
@@ -2534,6 +2533,33 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
 				}
 			}
 
+            {
+                if (!tangent.empty())
+                {
+                    face.allocateTangents(face.mNumVertices, true);
+                    U16* t = (U16*)&(tangent[0]);
+
+                    // store incoming tangents in mMikktSpaceTangents
+                    // NOTE: tangents coming from the asset may not be mikkt space, but they should always be used by the CLTF shaders to 
+                    // maintain compliance with the GLTF spec
+                    LLVector4a* t_out = face.mMikktSpaceTangents; 
+
+                    for (U32 j = 0; j < num_verts; ++j)
+                    {
+                        t_out->set((F32)t[0], (F32)t[1], (F32)t[2], (F32) t[3]);
+                        t_out->div(65535.f);
+                        t_out->mul(2.f);
+                        t_out->sub(1.f);
+
+                        F32* tp = t_out->getF32ptr();
+                        tp[3] = tp[3] < 0.f ? -1.f : 1.f;
+
+                        t_out++;
+                        t += 4;
+                    }
+                }
+            }
+
 			{
 				if (!tc.empty())
 				{
@@ -5429,124 +5455,135 @@ bool LLVolumeFace::cacheOptimize()
 	llassert(!mOptimized);
 	mOptimized = TRUE;
 
-    allocateTangents(mNumVertices, true);
+    if (!mNormals || !mTexCoords)
+    { // can't perform this operation without normals and texture coordinates
+        return false;
+    }
 
-    SMikkTSpaceInterface ms;
+    if (mMikktSpaceTangents == nullptr)
+    { // make sure to generate mikkt space tangents for cache optimizing since the index buffer may change
+        allocateTangents(mNumVertices, true);
 
-    ms.m_getNumFaces = [](const SMikkTSpaceContext* pContext)
-    {
-        MikktData* data = (MikktData*)pContext->m_pUserData;
-        LLVolumeFace* face = data->face;
-        return face->mNumIndices / 3;
-    };
+        SMikkTSpaceInterface ms;
 
-    ms.m_getNumVerticesOfFace = [](const SMikkTSpaceContext* pContext, const int iFace)
-    {
-        return 3;
-    };
+        ms.m_getNumFaces = [](const SMikkTSpaceContext* pContext)
+        {
+            MikktData* data = (MikktData*)pContext->m_pUserData;
+            LLVolumeFace* face = data->face;
+            return face->mNumIndices / 3;
+        };
 
-    ms.m_getPosition = [](const SMikkTSpaceContext* pContext, float fvPosOut[], const int iFace, const int iVert)
-    {
-        MikktData* data = (MikktData*)pContext->m_pUserData;
-        LLVolumeFace* face = data->face;
-        S32 idx = face->mIndices[iFace * 3 + iVert];
-        auto& vert = face->mPositions[idx];
-        F32* v = vert.getF32ptr();
-        fvPosOut[0] = v[0];
-        fvPosOut[1] = v[1];
-        fvPosOut[2] = v[2];
-    };
-
-    ms.m_getNormal = [](const SMikkTSpaceContext* pContext, float fvNormOut[], const int iFace, const int iVert)
-    {
-        MikktData* data = (MikktData*)pContext->m_pUserData;
-        LLVolumeFace* face = data->face;
-        S32 idx = face->mIndices[iFace * 3 + iVert];
-        auto& norm = face->mNormals[idx];
-        F32* n = norm.getF32ptr();
-        fvNormOut[0] = n[0];
-        fvNormOut[1] = n[1];
-        fvNormOut[2] = n[2];
-    };
-
-    ms.m_getTexCoord = [](const SMikkTSpaceContext* pContext, float fvTexcOut[], const int iFace, const int iVert)
-    {
-        MikktData* data = (MikktData*)pContext->m_pUserData;
-        LLVolumeFace* face = data->face;
-        S32 idx = face->mIndices[iFace * 3 + iVert];
-        auto& tc = face->mTexCoords[idx];
-        fvTexcOut[0] = tc.mV[0];
-        fvTexcOut[1] = tc.mV[1];
-    };
-
-    ms.m_setTSpaceBasic = [](const SMikkTSpaceContext* pContext, const float fvTangent[], const float fSign, const int iFace, const int iVert)
-    {
-        MikktData* data = (MikktData*)pContext->m_pUserData;
-        LLVolumeFace* face = data->face;
-        S32 i = iFace * 3 + iVert;
-        S32 idx = face->mIndices[i];
+        ms.m_getNumVerticesOfFace = [](const SMikkTSpaceContext* pContext, const int iFace)
+        {
+            return 3;
+        };
 
-        LLVector3 p(face->mPositions[idx].getF32ptr());
-        LLVector3 n(face->mNormals[idx].getF32ptr());
-        LLVector3 t(fvTangent);
+        ms.m_getPosition = [](const SMikkTSpaceContext* pContext, float fvPosOut[], const int iFace, const int iVert)
+        {
+            MikktData* data = (MikktData*)pContext->m_pUserData;
+            LLVolumeFace* face = data->face;
+            S32 idx = face->mIndices[iFace * 3 + iVert];
+            auto& vert = face->mPositions[idx];
+            F32* v = vert.getF32ptr();
+            fvPosOut[0] = v[0];
+            fvPosOut[1] = v[1];
+            fvPosOut[2] = v[2];
+        };
+
+        ms.m_getNormal = [](const SMikkTSpaceContext* pContext, float fvNormOut[], const int iFace, const int iVert)
+        {
+            MikktData* data = (MikktData*)pContext->m_pUserData;
+            LLVolumeFace* face = data->face;
+            S32 idx = face->mIndices[iFace * 3 + iVert];
+            auto& norm = face->mNormals[idx];
+            F32* n = norm.getF32ptr();
+            fvNormOut[0] = n[0];
+            fvNormOut[1] = n[1];
+            fvNormOut[2] = n[2];
+        };
+
+        ms.m_getTexCoord = [](const SMikkTSpaceContext* pContext, float fvTexcOut[], const int iFace, const int iVert)
+        {
+            MikktData* data = (MikktData*)pContext->m_pUserData;
+            LLVolumeFace* face = data->face;
+            S32 idx = face->mIndices[iFace * 3 + iVert];
+            auto& tc = face->mTexCoords[idx];
+            fvTexcOut[0] = tc.mV[0];
+            fvTexcOut[1] = tc.mV[1];
+        };
+
+        ms.m_setTSpaceBasic = [](const SMikkTSpaceContext* pContext, const float fvTangent[], const float fSign, const int iFace, const int iVert)
+        {
+            MikktData* data = (MikktData*)pContext->m_pUserData;
+            LLVolumeFace* face = data->face;
+            S32 i = iFace * 3 + iVert;
+            S32 idx = face->mIndices[i];
 
-        data->t[i].set(fvTangent);
-        data->t[i].mV[3] = fSign;
-    };
+            LLVector3 p(face->mPositions[idx].getF32ptr());
+            LLVector3 n(face->mNormals[idx].getF32ptr());
+            LLVector3 t(fvTangent);
 
-    ms.m_setTSpace = nullptr;
+            // assert that this tangent hasn't already been set
+            llassert(data->t[i].magVec() < 0.1f);
 
-    MikktData data(this);
+            data->t[i].set(fvTangent);
+            data->t[i].mV[3] = fSign;
+        };
 
-    SMikkTSpaceContext ctx = { &ms, &data };
+        ms.m_setTSpace = nullptr;
 
-    genTangSpaceDefault(&ctx);
+        MikktData data(this);
 
-    //re-weld
-    meshopt_Stream mos[] = 
-    {
-        { &data.p[0], sizeof(LLVector3), sizeof(LLVector3) },
-        { &data.n[0], sizeof(LLVector3), sizeof(LLVector3) },
-        { &data.t[0], sizeof(LLVector4), sizeof(LLVector4) },
-        { &data.tc[0], sizeof(LLVector2), sizeof(LLVector2) },
-        { data.w.empty() ? nullptr : &data.w[0], sizeof(LLVector4), sizeof(LLVector4) }
-    };
+        SMikkTSpaceContext ctx = { &ms, &data };
 
-    std::vector<U32> remap;
-    remap.resize(data.p.size());
+        genTangSpaceDefault(&ctx);
 
-    U32 stream_count = data.w.empty() ? 4 : 5;
+        //re-weld
+        meshopt_Stream mos[] =
+        {
+            { &data.p[0], sizeof(LLVector3), sizeof(LLVector3) },
+            { &data.n[0], sizeof(LLVector3), sizeof(LLVector3) },
+            { &data.t[0], sizeof(LLVector4), sizeof(LLVector4) },
+            { &data.tc[0], sizeof(LLVector2), sizeof(LLVector2) },
+            { data.w.empty() ? nullptr : &data.w[0], sizeof(LLVector4), sizeof(LLVector4) }
+        };
 
-    U32 vert_count = meshopt_generateVertexRemapMulti(&remap[0], nullptr, data.p.size(), data.p.size(), mos, stream_count);
+        std::vector<U32> remap;
+        remap.resize(data.p.size());
 
-    std::vector<U32> indices;
-    indices.resize(mNumIndices);
+        U32 stream_count = data.w.empty() ? 4 : 5;
 
-    //copy results back into volume
-    resizeVertices(vert_count);
+        U32 vert_count = meshopt_generateVertexRemapMulti(&remap[0], nullptr, data.p.size(), data.p.size(), mos, stream_count);
 
-    if (!data.w.empty())
-    {
-        allocateWeights(vert_count);
-    }
+        std::vector<U32> indices;
+        indices.resize(mNumIndices);
 
-    allocateTangents(mNumVertices, true);
+        //copy results back into volume
+        resizeVertices(vert_count);
 
-    for (int i = 0; i < mNumIndices; ++i)
-    {
-        U32 src_idx = i;
-        U32 dst_idx = remap[i];
-        mIndices[i] = dst_idx;
+        if (!data.w.empty())
+        {
+            allocateWeights(vert_count);
+        }
 
-        mPositions[dst_idx].load3(data.p[src_idx].mV);
-        mNormals[dst_idx].load3(data.n[src_idx].mV);
-        mTexCoords[dst_idx] = data.tc[src_idx];
-       
-        mMikktSpaceTangents[dst_idx].loadua(data.t[src_idx].mV);
+        allocateTangents(mNumVertices, true);
 
-        if (mWeights)
+        for (int i = 0; i < mNumIndices; ++i)
         {
-            mWeights[dst_idx].loadua(data.w[src_idx].mV);
+            U32 src_idx = i;
+            U32 dst_idx = remap[i];
+            mIndices[i] = dst_idx;
+
+            mPositions[dst_idx].load3(data.p[src_idx].mV);
+            mNormals[dst_idx].load3(data.n[src_idx].mV);
+            mTexCoords[dst_idx] = data.tc[src_idx];
+
+            mMikktSpaceTangents[dst_idx].loadua(data.t[src_idx].mV);
+
+            if (mWeights)
+            {
+                mWeights[dst_idx].loadua(data.w[src_idx].mV);
+            }
         }
     }
 
diff --git a/indra/llprimitive/lldaeloader.cpp b/indra/llprimitive/lldaeloader.cpp
index 50f4a4306e3fc99489cccc764ee07351569b97d7..9470146ce4a7510c5caadcac4bd577fbe33be102 100644
--- a/indra/llprimitive/lldaeloader.cpp
+++ b/indra/llprimitive/lldaeloader.cpp
@@ -2551,6 +2551,9 @@ bool LLDAELoader::loadModelsFromDomMesh(domMesh* mesh, std::vector<LLModel*>& mo
 	LLVolume::face_list_t remainder;
 	do 
 	{
+        // generate tangents and cache optimize before normalizing
+        ret->preprocessVolumeFaces();
+
 		// Insure we do this once with the whole gang and not per-model
 		//
 		if (!normalized && !mNoNormalize)
@@ -2561,10 +2564,11 @@ bool LLDAELoader::loadModelsFromDomMesh(domMesh* mesh, std::vector<LLModel*>& mo
 
 		ret->trimVolumeFacesToSize(LL_SCULPT_MESH_MAX_FACES, &remainder);
 
-		if (!mNoOptimize)
-		{
-			ret->remapVolumeFaces();
-		}
+        // remove unused/redundant vertices after normalizing
+		//if (!mNoOptimize)
+		//{
+		//	ret->remapVolumeFaces();
+		//}
 
 		volume_faces = remainder.size();
 
diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp
index 285c5f656b89003c2678c86b928e5a670960bafd..1ce287d7737aab79ab12f1cf1586bcf8bf58aa5d 100644
--- a/indra/llprimitive/llmodel.cpp
+++ b/indra/llprimitive/llmodel.cpp
@@ -187,6 +187,15 @@ void LLModel::trimVolumeFacesToSize(U32 new_count, LLVolume::face_list_t* remain
 	}
 }
 
+// generate mikkt space tangents and cache optimize
+void LLModel::preprocessVolumeFaces()
+{
+    for (auto& face : mVolumeFaces)
+    {
+        face.cacheOptimize();
+    }
+}
+
 // Shrink the model to fit
 // on a 1x1x1 cube centered at the origin.
 // The positions and extents
@@ -296,6 +305,7 @@ void LLModel::normalizeVolumeFaces()
 			// the positions to fit within the unit cube.
 			LLVector4a* pos = (LLVector4a*) face.mPositions;
 			LLVector4a* norm = (LLVector4a*) face.mNormals;
+            LLVector4a* t = (LLVector4a*)face.mMikktSpaceTangents;
 
 			for (U32 j = 0; j < face.mNumVertices; ++j)
 			{
@@ -306,6 +316,14 @@ void LLModel::normalizeVolumeFaces()
 					norm[j].mul(inv_scale);
 					norm[j].normalize3();
 				}
+
+                if (t)
+                {
+                    F32 w = t[j].getF32ptr()[3];
+                    t[j].mul(inv_scale);
+                    t[j].normalize3();
+                    t[j].getF32ptr()[3] = w;
+                }
 			}
 		}
 
@@ -726,10 +744,12 @@ LLSD LLModel::writeModel(
 				LLSD::Binary verts(face.mNumVertices*3*2);
 				LLSD::Binary tc(face.mNumVertices*2*2);
 				LLSD::Binary normals(face.mNumVertices*3*2);
+                LLSD::Binary tangents(face.mNumVertices * 4 * 2);
 				LLSD::Binary indices(face.mNumIndices*2);
 
 				U32 vert_idx = 0;
 				U32 norm_idx = 0;
+                U32 tan_idx = 0;
 				U32 tc_idx = 0;
 			
 				LLVector2* ftc = (LLVector2*) face.mTexCoords;
@@ -782,6 +802,22 @@ LLSD LLModel::writeModel(
 							normals[norm_idx++] = buff[1];
 						}
 					}
+
+                    if (face.mMikktSpaceTangents)
+                    { //normals
+                        F32* tangent = face.mMikktSpaceTangents[j].getF32ptr();
+
+                        for (U32 k = 0; k < 4; ++k)
+                        { //for each component
+                            //convert to 16-bit normalized
+                            U16 val = (U16)((tangent[k] + 1.f) * 0.5f * 65535);
+                            U8* buff = (U8*)&val;
+
+                            //write to binary buffer
+                            tangents[tan_idx++] = buff[0];
+                            tangents[tan_idx++] = buff[1];
+                        }
+                    }
 					
 					//texcoord
 					if (face.mTexCoords)
@@ -819,6 +855,11 @@ LLSD LLModel::writeModel(
 					mdl[model_names[idx]][i]["Normal"] = normals;
 				}
 
+                if (face.mMikktSpaceTangents)
+                {
+                    mdl[model_names[idx]][i]["Tangent"] = tangents;
+                }
+
 				if (face.mTexCoords)
 				{
 					mdl[model_names[idx]][i]["TexCoord0Domain"]["Min"] = min_tc.getValue();
diff --git a/indra/llprimitive/llmodel.h b/indra/llprimitive/llmodel.h
index 354ceb26b7e831361eb7467cbd80795ac683588f..ea97851ce85eca878a9138bb270e8efaac207ec4 100644
--- a/indra/llprimitive/llmodel.h
+++ b/indra/llprimitive/llmodel.h
@@ -182,6 +182,7 @@ class LLModel : public LLVolume
 	void addFace(const LLVolumeFace& face);
 
 	void sortVolumeFacesByMaterialName();
+    void preprocessVolumeFaces();
 	void normalizeVolumeFaces();
 	void trimVolumeFacesToSize(U32 new_count = LL_SCULPT_MESH_MAX_FACES, LLVolume::face_list_t* remainder = NULL);
     void remapVolumeFaces();
diff --git a/indra/newview/app_settings/shaders/class1/deferred/pbropaqueF.glsl b/indra/newview/app_settings/shaders/class1/deferred/pbropaqueF.glsl
index ca304f749a19bd82fbd7572e989a27dc42a507f1..f0f5208f52ef705b83e32d3dcf3cc988b906addd 100644
--- a/indra/newview/app_settings/shaders/class1/deferred/pbropaqueF.glsl
+++ b/indra/newview/app_settings/shaders/class1/deferred/pbropaqueF.glsl
@@ -94,6 +94,8 @@ void main()
     //col = vec3(0,0,0);
     //emissive = vary_tangent.xyz*0.5+0.5;
     //emissive = vec3(sign*0.5+0.5);
+    //emissive = vNt * 0.5 + 0.5;
+    //emissive = tnorm*0.5+0.5;
     // See: C++: addDeferredAttachments(), GLSL: softenLightF
     frag_data[0] = vec4(col, 0.0);                                                   // Diffuse
     frag_data[1] = vec4(emissive, vertex_color.a);                                   // PBR sRGB Emissive
diff --git a/indra/newview/llmodelpreview.cpp b/indra/newview/llmodelpreview.cpp
index c3fbada9db7d54524497fb2e75d4204f73bf59d3..2c0f0ae4432629fab225be905550a4cc9feda0b9 100644
--- a/indra/newview/llmodelpreview.cpp
+++ b/indra/newview/llmodelpreview.cpp
@@ -1308,9 +1308,10 @@ F32 LLModelPreview::genMeshOptimizerPerModel(LLModel *base_model, LLModel *targe
 
     // extra space for normals and text coords
     S32 tc_bytes_size = ((size_vertices * sizeof(LLVector2)) + 0xF) & ~0xF;
-    LLVector4a* combined_positions = (LLVector4a*)ll_aligned_malloc<64>(sizeof(LLVector4a) * 2 * size_vertices + tc_bytes_size);
+    LLVector4a* combined_positions = (LLVector4a*)ll_aligned_malloc<64>(sizeof(LLVector4a) * 3 * size_vertices + tc_bytes_size);
     LLVector4a* combined_normals = combined_positions + size_vertices;
-    LLVector2* combined_tex_coords = (LLVector2*)(combined_normals + size_vertices);
+    LLVector4a* combined_tangents = combined_normals + size_vertices;
+    LLVector2* combined_tex_coords = (LLVector2*)(combined_tangents + size_vertices);
 
     // copy indices and vertices into new buffers
     S32 combined_positions_shift = 0;
@@ -1320,6 +1321,9 @@ F32 LLModelPreview::genMeshOptimizerPerModel(LLModel *base_model, LLModel *targe
     {
         const LLVolumeFace &face = base_model->getVolumeFace(face_idx);
 
+        // ensure tangents have been generated or loaded
+        llassert(face.mMikktSpaceTangents);
+
         // Vertices
         S32 copy_bytes = face.mNumVertices * sizeof(LLVector4a);
         LLVector4a::memcpyNonAliased16((F32*)(combined_positions + combined_positions_shift), (F32*)face.mPositions, copy_bytes);
@@ -1327,6 +1331,9 @@ F32 LLModelPreview::genMeshOptimizerPerModel(LLModel *base_model, LLModel *targe
         // Normals
         LLVector4a::memcpyNonAliased16((F32*)(combined_normals + combined_positions_shift), (F32*)face.mNormals, copy_bytes);
 
+        // Tangents
+        LLVector4a::memcpyNonAliased16((F32*)(combined_tangents + combined_positions_shift), (F32*)face.mMikktSpaceTangents, copy_bytes);
+
         // Tex coords
         copy_bytes = face.mNumVertices * sizeof(LLVector2);
         memcpy((void*)(combined_tex_coords + combined_positions_shift), (void*)face.mTexCoords, copy_bytes);
@@ -1428,9 +1435,10 @@ F32 LLModelPreview::genMeshOptimizerPerModel(LLModel *base_model, LLModel *targe
 
     // IV. Repack back into individual faces
 
-    LLVector4a* buffer_positions = (LLVector4a*)ll_aligned_malloc<64>(sizeof(LLVector4a) * 2 * size_vertices + tc_bytes_size);
+    LLVector4a* buffer_positions = (LLVector4a*)ll_aligned_malloc<64>(sizeof(LLVector4a) * 3 * size_vertices + tc_bytes_size);
     LLVector4a* buffer_normals = buffer_positions + size_vertices;
-    LLVector2* buffer_tex_coords = (LLVector2*)(buffer_normals + size_vertices);
+    LLVector4a* buffer_tangents = buffer_normals + size_vertices;
+    LLVector2* buffer_tex_coords = (LLVector2*)(buffer_tangents + size_vertices);
     S32 buffer_idx_size = (size_indices * sizeof(U16) + 0xF) & ~0xF;
     U16* buffer_indices = (U16*)ll_aligned_malloc_16(buffer_idx_size);
     S32* old_to_new_positions_map = new S32[size_vertices];
@@ -1511,6 +1519,7 @@ F32 LLModelPreview::genMeshOptimizerPerModel(LLModel *base_model, LLModel *targe
                     // Copy vertice, normals, tcs
                     buffer_positions[buf_positions_copied] = combined_positions[idx];
                     buffer_normals[buf_positions_copied] = combined_normals[idx];
+                    buffer_tangents[buf_positions_copied] = combined_tangents[idx];
                     buffer_tex_coords[buf_positions_copied] = combined_tex_coords[idx];
 
                     old_to_new_positions_map[idx] = buf_positions_copied;
@@ -1549,12 +1558,13 @@ F32 LLModelPreview::genMeshOptimizerPerModel(LLModel *base_model, LLModel *targe
         {
             new_face.resizeIndices(buf_indices_copied);
             new_face.resizeVertices(buf_positions_copied);
-
+            new_face.allocateTangents(buf_positions_copied, true);
             S32 idx_size = (buf_indices_copied * sizeof(U16) + 0xF) & ~0xF;
             LLVector4a::memcpyNonAliased16((F32*)new_face.mIndices, (F32*)buffer_indices, idx_size);
 
             LLVector4a::memcpyNonAliased16((F32*)new_face.mPositions, (F32*)buffer_positions, buf_positions_copied * sizeof(LLVector4a));
             LLVector4a::memcpyNonAliased16((F32*)new_face.mNormals, (F32*)buffer_normals, buf_positions_copied * sizeof(LLVector4a));
+            LLVector4a::memcpyNonAliased16((F32*)new_face.mMikktSpaceTangents, (F32*)buffer_tangents, buf_positions_copied * sizeof(LLVector4a));
 
             U32 tex_size = (buf_positions_copied * sizeof(LLVector2) + 0xF)&~0xF;
             LLVector4a::memcpyNonAliased16((F32*)new_face.mTexCoords, (F32*)buffer_tex_coords, tex_size);