diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 4468ba6d9b1079e31dcb250566b14194b34e879b..f608bbe2be33855306d0e941be433ef83865a44c 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -50,6 +50,7 @@
 #include "llvector4a.h"
 #include "llmatrix4a.h"
 #include "lltimer.h"
+#include <meshoptimizer.h>
 
 #define DEBUG_SILHOUETTE_BINORMALS 0
 #define DEBUG_SILHOUETTE_NORMALS 0 // TomY: Use this to display normals using the silhouette
@@ -5228,236 +5229,132 @@ bool LLVolumeFace::cacheOptimize()
 	llassert(!mOptimized);
 	mOptimized = TRUE;
 
-	LLVCacheLRU cache;
-	
 	if (mNumVertices < 3 || mNumIndices < 3)
 	{ //nothing to do
 		return true;
 	}
 
-	//mapping of vertices to triangles and indices
-	std::vector<LLVCacheVertexData> vertex_data;
-
-	//mapping of triangles do vertices
-	std::vector<LLVCacheTriangleData> triangle_data;
-
-	try
-	{
-		triangle_data.resize(mNumIndices / 3);
-		vertex_data.resize(mNumVertices);
-	}
-	catch (const std::bad_alloc&)
-	{
-		LL_WARNS("LLVOLUME") << "Resize failed" << LL_ENDL;
-		return false;
-	}
-
-	for (U32 i = 0; i < mNumIndices; i++)
-	{ //populate vertex data and triangle data arrays
-		U16 idx = mIndices[i];
-		U32 tri_idx = i/3;
-
-		vertex_data[idx].mTriangles.push_back(&(triangle_data[tri_idx]));
-		vertex_data[idx].mIdx = idx;
-		triangle_data[tri_idx].mVertex[i%3] = &(vertex_data[idx]);
-	}
+	struct buffer_data_t {
+		void** dst;		// Double pointer to volume attribute data. Avoids fixup after reallocating buffers on resize.
+		void* scratch;	// Scratch buffer. Allocated with vert count from meshopt_generateVertexRemapMulti
+		size_t stride;	// Stride between continguous attributes
+	};
+	std::vector< meshopt_Stream > streams;	// Contains data necessary for meshopt_generateVertexRemapMulti call
+	std::vector< buffer_data_t > buffers;	// Contains data necessary for meshopt_remapVertexBuffer calls.
 
-	/*F32 pre_acmr = 1.f;
-	//measure cache misses from before rebuild
 	{
-		LLVCacheFIFO test_cache;
-		for (U32 i = 0; i < mNumIndices; ++i)
-		{
-			test_cache.addVertex(&vertex_data[mIndices[i]]);
-		}
-
-		for (U32 i = 0; i < mNumVertices; i++)
-		{
-			vertex_data[i].mCacheTag = -1;
-		}
-
-		pre_acmr = (F32) test_cache.mMisses/(mNumIndices/3);
-	}*/
-
-	for (U32 i = 0; i < mNumVertices; i++)
-	{ //initialize score values (no cache -- might try a fifo cache here)
-		LLVCacheVertexData& data = vertex_data[i];
-
-		data.mScore = find_vertex_score(data);
-		data.mActiveTriangles = data.mTriangles.size();
+		static struct { size_t offs; size_t size; size_t stride; } ref_streams[] = {
+			{ offsetof(LLVolumeFace, mPositions),	sizeof(float) * 3, sizeof(mPositions[0]) },
+			{ offsetof(LLVolumeFace, mNormals),		sizeof(float) * 3, sizeof(mNormals[0]) },	// Subsection of mPositions allocation
+			{ offsetof(LLVolumeFace, mTexCoords),	sizeof(float) * 2, sizeof(mTexCoords[0]) },	// Subsection of mPositions allocation
+			{ offsetof(LLVolumeFace, mWeights),		sizeof(float) * 3, sizeof(mWeights[0]) },
+			{ offsetof(LLVolumeFace, mTangents),	sizeof(float) * 3, sizeof(mTangents[0]) },
+		};
 
-		for (U32 j = 0; j < data.mActiveTriangles; ++j)
+		for (size_t i = 0; i < sizeof(ref_streams) / sizeof(ref_streams[0]); ++i)
 		{
-			data.mTriangles[j]->mScore += data.mScore;
+			void** ptr = reinterpret_cast<void**>((char*)this + ref_streams[i].offs);
+			if (*ptr)
+			{
+				streams.push_back({ *ptr, ref_streams[i].size, ref_streams[i].stride });
+				buffers.push_back({ ptr, nullptr, ref_streams[i].stride });
+			}
 		}
 	}
 
-	//sort triangle data by score
-	std::sort(triangle_data.begin(), triangle_data.end());
-
-	std::vector<U16> new_indices;
-
-	LLVCacheTriangleData* tri;
-
-	//prime pump by adding first triangle to cache;
-	tri = &(triangle_data[0]);
-	cache.addTriangle(tri);
-	new_indices.push_back(tri->mVertex[0]->mIdx);
-	new_indices.push_back(tri->mVertex[1]->mIdx);
-	new_indices.push_back(tri->mVertex[2]->mIdx);
-	tri->complete();
-
-	U32 breaks = 0;
-	for (U32 i = 1; i < mNumIndices/3; ++i)
+	std::vector<unsigned int> remap(mNumIndices);
+	std::vector<U16> indices(mNumIndices);
+	try
 	{
-		cache.updateScores();
-		tri = cache.mBestTriangle;
-		if (!tri)
-		{
-			breaks++;
-			for (U32 j = 0; j < triangle_data.size(); ++j)
-			{
-				if (triangle_data[j].mActive)
-				{
-					tri = &(triangle_data[j]);
-					break;
-				}
-			}
-		}	
-		
-		cache.addTriangle(tri);
-		new_indices.push_back(tri->mVertex[0]->mIdx);
-		new_indices.push_back(tri->mVertex[1]->mIdx);
-		new_indices.push_back(tri->mVertex[2]->mIdx);
-		tri->complete();
+		remap.reserve(mNumIndices);
+		indices.reserve(mNumIndices);
 	}
-
-	for (U32 i = 0; i < mNumIndices; ++i)
+	catch (const std::bad_alloc&)
 	{
-		mIndices[i] = new_indices[i];
+		return false;
 	}
 
-	/*F32 post_acmr = 1.f;
-	//measure cache misses from after rebuild
+	size_t total_vertices = meshopt_generateVertexRemapMulti(remap.data(), mIndices, mNumIndices, mNumVertices, streams.data(), streams.size());
+	meshopt_remapIndexBuffer(indices.data(), mIndices, mNumIndices, remap.data());
+	bool failed = false;
+	for (auto& entry : buffers)
 	{
-		LLVCacheFIFO test_cache;
-		for (U32 i = 0; i < mNumVertices; i++)
+		// Create scratch buffer for attribute data. Avoids extra allocs in meshopt_remapVertexBuffer calls
+		void* buf_tmp = ll_aligned_malloc_16(entry.stride * total_vertices);
+		if (!buf_tmp)
 		{
-			vertex_data[i].mCacheTag = -1;
+			failed = true;
+			break;
 		}
-
-		for (U32 i = 0; i < mNumIndices; ++i)
+		entry.scratch = buf_tmp;
+		// Write to scratch buffer
+		meshopt_remapVertexBuffer(entry.scratch, *entry.dst, mNumVertices, entry.stride, remap.data());
+	}
+	if (failed)
+	{
+		for (auto& entry : buffers)
 		{
-			test_cache.addVertex(&vertex_data[mIndices[i]]);
+			// Release scratch buffer
+			ll_aligned_free_16(entry.scratch);
 		}
-		
-		post_acmr = (F32) test_cache.mMisses/(mNumIndices/3);
-	}*/
-
-	//optimize for pre-TnL cache
-	
-	//allocate space for new buffer
-	S32 num_verts = mNumVertices;
-	S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF;
-	LLVector4a* pos = (LLVector4a*) ll_aligned_malloc<64>(sizeof(LLVector4a)*2*num_verts+size);
-	if (pos == NULL)
-	{
-		LL_WARNS("LLVOLUME") << "Allocation of positions vector[" << sizeof(LLVector4a) * 2 * num_verts + size  << "] failed. " << LL_ENDL;
 		return false;
 	}
-	LLVector4a* norm = pos + num_verts;
-	LLVector2* tc = (LLVector2*) (norm + num_verts);
 
-	LLVector4a* wght = NULL;
-	if (mWeights)
+	if (mNumAllocatedVertices != total_vertices)
 	{
-		wght = (LLVector4a*)ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
-		if (wght == NULL)
+		// New allocations will be transparently accessable through dereffing dest_buffers.
+		if (!allocateVertices(total_vertices))
 		{
-			ll_aligned_free<64>(pos);
-			LL_WARNS("LLVOLUME") << "Allocation of weights[" << sizeof(LLVector4a) * num_verts << "] failed" << LL_ENDL;
+			for (auto& entry : buffers)
+			{
+				// Release scratch buffer
+				ll_aligned_free_16(entry.scratch);
+			}
+			allocateVertices(0);
+			allocateWeights(0);
+			allocateTangents(0);
 			return false;
 		}
-	}
 
-	LLVector4a* binorm = NULL;
-	if (mTangents)
-	{
-		binorm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
-		if (binorm == NULL)
+		if (mWeights && !allocateWeights(total_vertices))
 		{
-			ll_aligned_free<64>(pos);
-			ll_aligned_free_16(wght);
-			LL_WARNS("LLVOLUME") << "Allocation of binormals[" << sizeof(LLVector4a)*num_verts << "] failed" << LL_ENDL;
+			for (auto& entry : buffers)
+			{
+				// Release scratch buffer
+				ll_aligned_free_16(entry.scratch);
+			}
+			allocateVertices(0);
+			allocateWeights(0);
+			allocateTangents(0);
 			return false;
 		}
-	}
 
-	//allocate mapping of old indices to new indices
-	std::vector<S32> new_idx;
-
-	try
-	{
-		new_idx.resize(mNumVertices, -1);
-	}
-	catch (const std::bad_alloc&)
-	{
-		ll_aligned_free<64>(pos);
-		ll_aligned_free_16(wght);
-		ll_aligned_free_16(binorm);
-		LL_WARNS("LLVOLUME") << "Resize failed: " << mNumVertices << LL_ENDL;
-		return false;
-	}
-
-	S32 cur_idx = 0;
-	for (U32 i = 0; i < mNumIndices; ++i)
-	{
-		U16 idx = mIndices[i];
-		if (new_idx[idx] == -1)
-		{ //this vertex hasn't been added yet
-			new_idx[idx] = cur_idx;
-
-			//copy vertex data
-			pos[cur_idx] = mPositions[idx];
-			norm[cur_idx] = mNormals[idx];
-			tc[cur_idx] = mTexCoords[idx];
-			if (mWeights)
-			{
-				wght[cur_idx] = mWeights[idx];
-			}
-			if (mTangents)
+		if (mTangents && !allocateTangents(total_vertices))
+		{
+			for (auto& entry : buffers)
 			{
-				binorm[cur_idx] = mTangents[idx];
+				// Release scratch buffer
+				ll_aligned_free_16(entry.scratch);
 			}
-
-			cur_idx++;
+			allocateVertices(0);
+			allocateWeights(0);
+			allocateTangents(0);
+			return false;
 		}
 	}
 
-	for (U32 i = 0; i < mNumIndices; ++i)
+	meshopt_optimizeVertexCache(mIndices, indices.data(), mNumIndices, total_vertices);
+	meshopt_optimizeOverdraw(indices.data(), mIndices, mNumIndices, (float*)buffers[0].scratch, total_vertices, buffers[0].stride, 1.05f);
+	meshopt_optimizeVertexFetchRemap(remap.data(), indices.data(), mNumIndices, total_vertices);
+	meshopt_remapIndexBuffer(mIndices, indices.data(), mNumIndices, remap.data());
+	for (auto& entry : buffers)
 	{
-		mIndices[i] = new_idx[mIndices[i]];
+		// Write to llvolume attribute buffer
+		meshopt_remapVertexBuffer(*entry.dst, entry.scratch, total_vertices, entry.stride, remap.data());
+		// Release scratch buffer
+		ll_aligned_free_16(entry.scratch);
 	}
-	
-	ll_aligned_free<64>(mPositions);
-	// DO NOT free mNormals and mTexCoords as they are part of mPositions buffer
-	ll_aligned_free_16(mWeights);
-	ll_aligned_free_16(mTangents);
-#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
-    ll_aligned_free_16(mJointIndices);
-    ll_aligned_free_16(mJustWeights);
-    mJustWeights = NULL;
-    mJointIndices = NULL; // filled in later as necessary by skinning code for acceleration
-#endif
-
-	mPositions = pos;
-	mNormals = norm;
-	mTexCoords = tc;
-	mWeights = wght;    
-	mTangents = binorm;
-
-	//std::string result = llformat("ACMR pre/post: %.3f/%.3f  --  %d triangles %d breaks", pre_acmr, post_acmr, mNumIndices/3, breaks);
-	//LL_INFOS() << result << LL_ENDL;
+	mNumVertices = total_vertices;
 
 	return true;
 }