diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 184cb878c2d93a935f40b6b5e62ecdeda8f22cf1..18bb473aef63f83241e286eabdd268c4af61ac67 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -50,7 +50,6 @@
 #include "llvector4a.h"
 #include "llmatrix4a.h"
 #include "lltimer.h"
-#include <meshoptimizer.h>
 
 #define DEBUG_SILHOUETTE_BINORMALS 0
 #define DEBUG_SILHOUETTE_NORMALS 0 // TomY: Use this to display normals using the silhouette
@@ -2370,9 +2369,7 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
 	U32 uzip_result = LLUZipHelper::unzip_llsd(mdl, is, size);
 	if (uzip_result != LLUZipHelper::ZR_OK)
 	{
-#if SHOW_DEBUG
 		LL_DEBUGS("MeshStreaming") << "Failed to unzip LLSD blob for LoD with code " << uzip_result << " , will probably fetch from sim again." << LL_ENDL;
-#endif
 		return false;
 	}
 	
@@ -3766,19 +3763,19 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
 
 						LLVector4a t;
 						mat.affineTransform(v[v1], t);
-						vertices.emplace_back(LLVector3(t[0], t[1], t[2]));
+						vertices.emplace_back(t[0], t[1], t[2]);
 
 						norm_mat.rotate(n[v1], t);
 
 						t.normalize3fast();
-						normals.emplace_back(LLVector3(t[0], t[1], t[2]));
+						normals.emplace_back(t[0], t[1], t[2]);
 
 						mat.affineTransform(v[v2], t);
-						vertices.emplace_back(LLVector3(t[0], t[1], t[2]));
+						vertices.emplace_back(t[0], t[1], t[2]);
 
 						norm_mat.rotate(n[v2], t);
 						t.normalize3fast();
-						normals.emplace_back(LLVector3(t[0], t[1], t[2]));
+						normals.emplace_back(t[0], t[1], t[2]);
 					}
 				}
 			}
@@ -3950,19 +3947,19 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
 						
 						LLVector4a t;
 						mat.affineTransform(v[v1], t);
-						vertices.emplace_back(LLVector3(t[0], t[1], t[2]));
+						vertices.emplace_back(t[0], t[1], t[2]);
 
 						norm_mat.rotate(n[v1], t);
 
 						t.normalize3fast();
-						normals.emplace_back(LLVector3(t[0], t[1], t[2]));
+						normals.emplace_back(t[0], t[1], t[2]);
 
 						mat.affineTransform(v[v2], t);
-						vertices.emplace_back(LLVector3(t[0], t[1], t[2]));
+						vertices.emplace_back(t[0], t[1], t[2]);
 						
 						norm_mat.rotate(n[v2], t);
 						t.normalize3fast();
-						normals.emplace_back(LLVector3(t[0], t[1], t[2]));
+						normals.emplace_back(t[0], t[1], t[2]);
 					}
 				}		
 			}
@@ -4658,10 +4655,6 @@ LLVolumeFace::LLVolumeFace() :
 	mTexCoords(NULL),
 	mIndices(NULL),
 	mWeights(NULL),
-#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
-    mJustWeights(NULL),
-    mJointIndices(NULL),
-#endif
     mWeightsScrubbed(FALSE),
 	mOctree(NULL),
 	mOptimized(FALSE)
@@ -4688,10 +4681,6 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src)
 	mTexCoords(NULL),
 	mIndices(NULL),
 	mWeights(NULL),
-#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
-    mJustWeights(NULL),
-    mJointIndices(NULL),
-#endif
     mWeightsScrubbed(FALSE),
 	mOctree(NULL),
 	mOptimized(FALSE)
@@ -5229,132 +5218,241 @@ bool LLVolumeFace::cacheOptimize()
 	llassert(!mOptimized);
 	mOptimized = TRUE;
 
+	LLVCacheLRU cache;
+	
 	if (mNumVertices < 3 || mNumIndices < 3)
 	{ //nothing to do
 		return true;
 	}
 
-	struct buffer_data_t {
-		void** dst;		// Double pointer to volume attribute data. Avoids fixup after reallocating buffers on resize.
-		void* scratch;	// Scratch buffer. Allocated with vert count from meshopt_generateVertexRemapMulti
-		size_t stride;	// Stride between continguous attributes
-	};
-	std::vector< meshopt_Stream > streams;	// Contains data necessary for meshopt_generateVertexRemapMulti call
-	std::vector< buffer_data_t > buffers;	// Contains data necessary for meshopt_remapVertexBuffer calls.
+	//mapping of vertices to triangles and indices
+	std::vector<LLVCacheVertexData> vertex_data;
+
+	//mapping of triangles do vertices
+	std::vector<LLVCacheTriangleData> triangle_data;
 
+	try
 	{
-		static struct { size_t offs; size_t size; size_t stride; } ref_streams[] = {
-			{ offsetof(LLVolumeFace, mPositions),	sizeof(float) * 3, sizeof(mPositions[0]) },
-			{ offsetof(LLVolumeFace, mNormals),		sizeof(float) * 3, sizeof(mNormals[0]) },	// Subsection of mPositions allocation
-			{ offsetof(LLVolumeFace, mTexCoords),	sizeof(float) * 2, sizeof(mTexCoords[0]) },	// Subsection of mPositions allocation
-			{ offsetof(LLVolumeFace, mWeights),		sizeof(float) * 3, sizeof(mWeights[0]) },
-			{ offsetof(LLVolumeFace, mTangents),	sizeof(float) * 3, sizeof(mTangents[0]) },
-		};
+		triangle_data.resize(mNumIndices / 3);
+		vertex_data.resize(mNumVertices);
+	}
+	catch (const std::bad_alloc&)
+	{
+		LL_WARNS("LLVOLUME") << "Resize failed" << LL_ENDL;
+		return false;
+	}
+
+	for (S32 i = 0; i < mNumIndices; i++)
+	{ //populate vertex data and triangle data arrays
+		U16 idx = mIndices[i];
+		U32 tri_idx = i/3;
+
+		vertex_data[idx].mTriangles.push_back(&(triangle_data[tri_idx]));
+		vertex_data[idx].mIdx = idx;
+		triangle_data[tri_idx].mVertex[i%3] = &(vertex_data[idx]);
+	}
 
-		for (size_t i = 0; i < sizeof(ref_streams) / sizeof(ref_streams[0]); ++i)
+	std::vector<size_t> v;
+	for (size_t j = 0; j < triangle_data.size(); ++j)
+		v.push_back(j);
+	
+	/*F32 pre_acmr = 1.f;
+	//measure cache misses from before rebuild
+	{
+		LLVCacheFIFO test_cache;
+		for (U32 i = 0; i < mNumIndices; ++i)
 		{
-			void** ptr = reinterpret_cast<void**>((char*)this + ref_streams[i].offs);
-			if (*ptr)
-			{
-				streams.push_back({ *ptr, ref_streams[i].size, ref_streams[i].stride });
-				buffers.push_back({ ptr, nullptr, ref_streams[i].stride });
-			}
+			test_cache.addVertex(&vertex_data[mIndices[i]]);
+		}
+
+		for (U32 i = 0; i < mNumVertices; i++)
+		{
+			vertex_data[i].mCacheTag = -1;
+		}
+
+		pre_acmr = (F32) test_cache.mMisses/(mNumIndices/3);
+	}*/
+
+	for (S32 i = 0; i < mNumVertices; i++)
+	{ //initialize score values (no cache -- might try a fifo cache here)
+		LLVCacheVertexData& data = vertex_data[i];
+
+		data.mScore = find_vertex_score(data);
+		data.mActiveTriangles = data.mTriangles.size();
+
+		for (U32 j = 0; j < data.mActiveTriangles; ++j)
+		{
+			data.mTriangles[j]->mScore += data.mScore;
 		}
 	}
 
-	std::vector<unsigned int> remap(mNumIndices);
-	std::vector<U16> indices(mNumIndices);
-	try
+	//sort triangle data by score
+	std::sort( v.begin(), v.end(),
+			   [&triangle_data](size_t rhs, size_t lhs )
+			   { return triangle_data[rhs].mScore > triangle_data[lhs].mScore; }
+			   );
+		
+	std::vector<U16> new_indices;
+
+	LLVCacheTriangleData* tri;
+
+	//prime pump by adding first triangle to cache;
+	tri = &(triangle_data[v[0]]);
+	
+	cache.addTriangle(tri);
+	new_indices.push_back(tri->mVertex[0]->mIdx);
+	new_indices.push_back(tri->mVertex[1]->mIdx);
+	new_indices.push_back(tri->mVertex[2]->mIdx);
+	tri->complete();
+
+	U32 breaks = 0;
+	for (S32 i = 1; i < mNumIndices/3; ++i)
 	{
-		remap.reserve(mNumIndices);
-		indices.reserve(mNumIndices);
+		cache.updateScores();
+		tri = cache.mBestTriangle;
+		if (!tri)
+		{
+			breaks++;
+			for (size_t j = 0; j < triangle_data.size(); ++j)
+			{
+				if (triangle_data[v[j]].mActive)
+				{
+					tri = &(triangle_data[v[j]]);
+					break;
+				}
+			}
+		}	
+		
+		cache.addTriangle(tri);
+		new_indices.push_back(tri->mVertex[0]->mIdx);
+		new_indices.push_back(tri->mVertex[1]->mIdx);
+		new_indices.push_back(tri->mVertex[2]->mIdx);
+		tri->complete();
 	}
-	catch (const std::bad_alloc&)
+
+	for (S32 i = 0; i < mNumIndices; ++i)
 	{
-		return false;
+		mIndices[i] = new_indices[i];
 	}
 
-	size_t total_vertices = meshopt_generateVertexRemapMulti(remap.data(), mIndices, mNumIndices, mNumVertices, streams.data(), streams.size());
-	meshopt_remapIndexBuffer(indices.data(), mIndices, mNumIndices, remap.data());
-	bool failed = false;
-	for (auto& entry : buffers)
+	/*F32 post_acmr = 1.f;
+	//measure cache misses from after rebuild
 	{
-		// Create scratch buffer for attribute data. Avoids extra allocs in meshopt_remapVertexBuffer calls
-		void* buf_tmp = ll_aligned_malloc_16(entry.stride * total_vertices);
-		if (!buf_tmp)
+		LLVCacheFIFO test_cache;
+		for (U32 i = 0; i < mNumVertices; i++)
 		{
-			failed = true;
-			break;
+			vertex_data[i].mCacheTag = -1;
 		}
-		entry.scratch = buf_tmp;
-		// Write to scratch buffer
-		meshopt_remapVertexBuffer(entry.scratch, *entry.dst, mNumVertices, entry.stride, remap.data());
-	}
-	if (failed)
+
+		for (U32 i = 0; i < mNumIndices; ++i)
+		{
+			test_cache.addVertex(&vertex_data[mIndices[i]]);
+		}
+		
+		post_acmr = (F32) test_cache.mMisses/(mNumIndices/3);
+	}*/
+
+	//optimize for pre-TnL cache
+	
+	//allocate space for new buffer
+	S32 num_verts = mNumVertices;
+
+	LLVector4a* old_pos = mPositions;
+	LLVector4a* old_norm = old_pos + num_verts;
+	LLVector2* old_tc = (LLVector2*)(old_norm + num_verts);
+	mPositions = nullptr;
+	if (old_pos)
 	{
-		for (auto& entry : buffers)
+		if (!allocateVertices(num_verts))
 		{
-			// Release scratch buffer
-			ll_aligned_free_16(entry.scratch);
+			return false;
 		}
+	}
+	else
+	{
+		LL_WARNS("LLVOLUME") << "Posititions vector was null" << LL_ENDL;
 		return false;
 	}
 
-	if (mNumAllocatedVertices != total_vertices)
+	LLVector4a* old_wght = mWeights;
+	mWeights = nullptr;
+	if (old_wght)
 	{
-		// New allocations will be transparently accessable through dereffing dest_buffers.
-		if (!allocateVertices(total_vertices))
+		if(!allocateWeights(num_verts))
 		{
-			for (auto& entry : buffers)
-			{
-				// Release scratch buffer
-				ll_aligned_free_16(entry.scratch);
-			}
 			allocateVertices(0);
 			allocateWeights(0);
 			allocateTangents(0);
 			return false;
 		}
+	}
 
-		if (mWeights && !allocateWeights(total_vertices))
+	LLVector4a* old_tangent = mTangents;
+	mTangents = nullptr;
+	if (old_tangent)
+	{
+		if (!allocateTangents(num_verts))
 		{
-			for (auto& entry : buffers)
-			{
-				// Release scratch buffer
-				ll_aligned_free_16(entry.scratch);
-			}
 			allocateVertices(0);
 			allocateWeights(0);
 			allocateTangents(0);
 			return false;
 		}
+	}
 
-		if (mTangents && !allocateTangents(total_vertices))
-		{
-			for (auto& entry : buffers)
+	//allocate mapping of old indices to new indices
+	std::vector<S32> new_idx;
+
+	try
+	{
+		new_idx.resize(mNumVertices, -1);
+	}
+	catch (const std::bad_alloc&)
+	{
+		allocateVertices(0);
+		allocateWeights(0);
+		allocateTangents(0);
+		LL_WARNS("LLVOLUME") << "Resize failed: " << mNumVertices << LL_ENDL;
+		return false;
+	}
+
+	S32 cur_idx = 0;
+	for (S32 i = 0; i < mNumIndices; ++i)
+	{
+		U16 idx = mIndices[i];
+		if (new_idx[idx] == -1)
+		{ //this vertex hasn't been added yet
+			new_idx[idx] = cur_idx;
+
+			//copy vertex data
+			mPositions[cur_idx] = old_pos[idx];
+			mNormals[cur_idx] = old_norm[idx];
+			mTexCoords[cur_idx] = old_tc[idx];
+			if (mWeights)
 			{
-				// Release scratch buffer
-				ll_aligned_free_16(entry.scratch);
+				mWeights[cur_idx] = old_wght[idx];
 			}
-			allocateVertices(0);
-			allocateWeights(0);
-			allocateTangents(0);
-			return false;
+			if (mTangents)
+			{
+				mTangents[cur_idx] = old_tangent[idx];
+			}
+
+			cur_idx++;
 		}
 	}
 
-	meshopt_optimizeVertexCache(mIndices, indices.data(), mNumIndices, total_vertices);
-	meshopt_optimizeOverdraw(indices.data(), mIndices, mNumIndices, (float*)buffers[0].scratch, total_vertices, buffers[0].stride, 1.05f);
-	meshopt_optimizeVertexFetchRemap(remap.data(), indices.data(), mNumIndices, total_vertices);
-	meshopt_remapIndexBuffer(mIndices, indices.data(), mNumIndices, remap.data());
-	for (auto& entry : buffers)
+	for (S32 i = 0; i < mNumIndices; ++i)
 	{
-		// Write to llvolume attribute buffer
-		meshopt_remapVertexBuffer(*entry.dst, entry.scratch, total_vertices, entry.stride, remap.data());
-		// Release scratch buffer
-		ll_aligned_free_16(entry.scratch);
+		mIndices[i] = new_idx[mIndices[i]];
 	}
-	mNumVertices = total_vertices;
+
+	ll_aligned_free<64>(old_pos);
+	// DO NOT free mNormals and mTexCoords as they are part of mPositions buffer
+	ll_aligned_free_16(old_wght);
+	ll_aligned_free_16(old_tangent);
+
+	//std::string result = llformat("ACMR pre/post: %.3f/%.3f  --  %d triangles %d breaks", pre_acmr, post_acmr, mNumIndices/3, breaks);
+	//LL_INFOS() << result << LL_ENDL;
 
 	return true;
 }
diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h
index b0cdcf76d9def69b25fef57ca8cfcbb8820657b8..8b65eb210c0e9f69a9531043e548913d1ad28df1 100644
--- a/indra/llmath/llvolume.h
+++ b/indra/llmath/llvolume.h
@@ -877,7 +877,7 @@ class LLVolumeFace
 	bool allocateTangents(S32 num_verts);
 	bool allocateWeights(S32 num_verts);
 	bool allocateVertices(S32 num_verts, bool copy = false);
-    bool allocateIndices(S32 num_indices, bool copy = false);
+	bool allocateIndices(S32 num_indices, bool copy = false);
 	bool resizeIndices(S32 num_indices);
 	void fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, std::vector<U16>& idx);
 
@@ -892,6 +892,8 @@ class LLVolumeFace
 	class VertexMapData : public LLVolumeFace::VertexData
 	{
 	public:
+        VertexMapData() : mIndex(0) { }
+
 		U16 mIndex;
 
 		bool operator==(const LLVolumeFace::VertexData& rhs) const;
@@ -959,11 +961,6 @@ class LLVolumeFace
 	// mWeights.size() should be empty or match mVertices.size()  
 	LLVector4a* mWeights;
 
-#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
-    LLVector4a* mJustWeights;
-    U8* mJointIndices;
-#endif
-
     mutable BOOL mWeightsScrubbed;
 
     // Which joints are rigged to, and the bounding box of any rigged