From a710bf9067bd4c4217b9febc0ad277a1636ec882 Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Mon, 9 Jan 2023 15:18:57 -0600
Subject: [PATCH] SL-18869 Optimizations -- Use _mm_prefetch to cut down on
 cache misses when iterating over render batches.

---
 indra/newview/lldrawpool.cpp          | 100 ++++++++++++++------------
 indra/newview/lldrawpoolalpha.cpp     |   2 +-
 indra/newview/lldrawpoolbump.cpp      |   4 +-
 indra/newview/lldrawpoolmaterials.cpp |   4 +-
 indra/newview/llspatialpartition.cpp  |  40 -----------
 indra/newview/llspatialpartition.h    |  19 ++++-
 indra/newview/llvograss.cpp           |   3 -
 indra/newview/llvopartgroup.cpp       |   3 -
 indra/newview/llvovolume.cpp          |   4 --
 indra/newview/pipeline.cpp            |  22 +++---
 10 files changed, 89 insertions(+), 112 deletions(-)

diff --git a/indra/newview/lldrawpool.cpp b/indra/newview/lldrawpool.cpp
index dd6b9147833..2abbe2f2f81 100644
--- a/indra/newview/lldrawpool.cpp
+++ b/indra/newview/lldrawpool.cpp
@@ -449,10 +449,14 @@ void teardown_texture_matrix(LLDrawInfo& params)
 void LLRenderPass::pushGLTFBatches(U32 type, U32 mask)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL;
-    for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)
+    auto* begin = gPipeline.beginRenderMap(type);
+    auto* end = gPipeline.endRenderMap(type);
+    for (LLCullResult::drawinfo_iterator i = begin; i != end; )
     {
         LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWPOOL("pushGLTFBatch");
         LLDrawInfo& params = **i;
+        LLCullResult::increment_iterator(i, end);
+
         auto& mat = params.mGLTFMaterial;
 
         mat->bind();
@@ -476,10 +480,14 @@ void LLRenderPass::pushRiggedGLTFBatches(U32 type, U32 mask)
     LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
     mask |= LLVertexBuffer::MAP_WEIGHT4;
-    for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)
+    auto* begin = gPipeline.beginRenderMap(type);
+    auto* end = gPipeline.endRenderMap(type);
+    for (LLCullResult::drawinfo_iterator i = begin; i != end; )
     {
         LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWPOOL("pushRiggedGLTFBatch");
         LLDrawInfo& params = **i;
+        LLCullResult::increment_iterator(i, end);
+
         auto& mat = params.mGLTFMaterial;
 
         mat->bind();
@@ -507,13 +515,14 @@ void LLRenderPass::pushRiggedGLTFBatches(U32 type, U32 mask)
 void LLRenderPass::pushBatches(U32 type, U32 mask, BOOL texture, BOOL batch_textures)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL;
-	for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)	
-	{
-		LLDrawInfo* pparams = *i;
-		if (pparams) 
-		{
-			pushBatch(*pparams, mask, texture, batch_textures);
-		}
+    auto* begin = gPipeline.beginRenderMap(type);
+    auto* end = gPipeline.endRenderMap(type);
+    for (LLCullResult::drawinfo_iterator i = begin; i != end; )
+    {
+        LLDrawInfo* pparams = *i;
+        LLCullResult::increment_iterator(i, end);
+
+		pushBatch(*pparams, mask, texture, batch_textures);
 	}
 }
 
@@ -523,34 +532,35 @@ void LLRenderPass::pushRiggedBatches(U32 type, U32 mask, BOOL texture, BOOL batc
     LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
     mask |= LLVertexBuffer::MAP_WEIGHT4;
-    for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)
+    auto* begin = gPipeline.beginRenderMap(type);
+    auto* end = gPipeline.endRenderMap(type);
+    for (LLCullResult::drawinfo_iterator i = begin; i != end; )
     {
         LLDrawInfo* pparams = *i;
-        if (pparams)
-        {
-            if (pparams->mAvatar.notNull() && (lastAvatar != pparams->mAvatar || lastMeshId != pparams->mSkinInfo->mHash))
-            {
-                uploadMatrixPalette(*pparams);
-                lastAvatar = pparams->mAvatar;
-                lastMeshId = pparams->mSkinInfo->mHash;
-            }
+        LLCullResult::increment_iterator(i, end);
 
-            pushBatch(*pparams, mask, texture, batch_textures);
+        if (pparams->mAvatar.notNull() && (lastAvatar != pparams->mAvatar || lastMeshId != pparams->mSkinInfo->mHash))
+        {
+            uploadMatrixPalette(*pparams);
+            lastAvatar = pparams->mAvatar;
+            lastMeshId = pparams->mSkinInfo->mHash;
         }
+
+        pushBatch(*pparams, mask, texture, batch_textures);
     }
 }
 
 void LLRenderPass::pushMaskBatches(U32 type, U32 mask, BOOL texture, BOOL batch_textures)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL;
-	for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)	
+    auto* begin = gPipeline.beginRenderMap(type);
+    auto* end = gPipeline.endRenderMap(type);
+	for (LLCullResult::drawinfo_iterator i = begin; i != end; )
 	{
-		LLDrawInfo* pparams = *i;
-		if (pparams) 
-		{
-			LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(pparams->mAlphaMaskCutoff);
-			pushBatch(*pparams, mask, texture, batch_textures);
-		}
+        LLDrawInfo* pparams = *i;
+        LLCullResult::increment_iterator(i, end);
+		LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(pparams->mAlphaMaskCutoff);
+		pushBatch(*pparams, mask, texture, batch_textures);
 	}
 }
 
@@ -559,29 +569,31 @@ void LLRenderPass::pushRiggedMaskBatches(U32 type, U32 mask, BOOL texture, BOOL
     LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL;
     LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
-    for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)
+    auto* begin = gPipeline.beginRenderMap(type);
+    auto* end = gPipeline.endRenderMap(type);
+    for (LLCullResult::drawinfo_iterator i = begin; i != end; )
     {
         LLDrawInfo* pparams = *i;
-        if (pparams)
-        {
-            if (LLGLSLShader::sCurBoundShaderPtr)
-            {
-                LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(pparams->mAlphaMaskCutoff);
-            }
-            else
-            {
-                gGL.flush();
-            }
 
-            if (lastAvatar != pparams->mAvatar || lastMeshId != pparams->mSkinInfo->mHash)
-            {
-                uploadMatrixPalette(*pparams);
-                lastAvatar = pparams->mAvatar;
-                lastMeshId = pparams->mSkinInfo->mHash;
-            }
+        LLCullResult::increment_iterator(i, end);
 
-            pushBatch(*pparams, mask | LLVertexBuffer::MAP_WEIGHT4, texture, batch_textures);
+        if (LLGLSLShader::sCurBoundShaderPtr)
+        {
+            LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(pparams->mAlphaMaskCutoff);
+        }
+        else
+        {
+            gGL.flush();
         }
+
+        if (lastAvatar != pparams->mAvatar || lastMeshId != pparams->mSkinInfo->mHash)
+        {
+            uploadMatrixPalette(*pparams);
+            lastAvatar = pparams->mAvatar;
+            lastMeshId = pparams->mSkinInfo->mHash;
+        }
+
+        pushBatch(*pparams, mask | LLVertexBuffer::MAP_WEIGHT4, texture, batch_textures);
     }
 }
 
diff --git a/indra/newview/lldrawpoolalpha.cpp b/indra/newview/lldrawpoolalpha.cpp
index dc7e5f51df5..3ce2ced2558 100644
--- a/indra/newview/lldrawpoolalpha.cpp
+++ b/indra/newview/lldrawpoolalpha.cpp
@@ -634,7 +634,7 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, bool depth_only, bool rigged)
                     continue;
                 }
 
-                LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWPOOL("ra - push batch")
+                LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWPOOL("ra - push batch");
 
                 U32 have_mask = params.mVertexBuffer->getTypeMask() & mask;
 				if (have_mask != mask)
diff --git a/indra/newview/lldrawpoolbump.cpp b/indra/newview/lldrawpoolbump.cpp
index 6af4e2274ca..4379fdc603b 100644
--- a/indra/newview/lldrawpoolbump.cpp
+++ b/indra/newview/lldrawpoolbump.cpp
@@ -724,10 +724,12 @@ void LLDrawPoolBump::renderDeferred(S32 pass)
         LLVOAvatar* avatar = nullptr;
         U64 skin = 0;
 
-        for (LLCullResult::drawinfo_iterator i = begin; i != end; ++i)
+        for (LLCullResult::drawinfo_iterator i = begin; i != end; )
         {
             LLDrawInfo& params = **i;
 
+            LLCullResult::increment_iterator(i, end);
+
             LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(params.mAlphaMaskCutoff);
             LLDrawPoolBump::bindBumpMap(params, bump_channel);
 
diff --git a/indra/newview/lldrawpoolmaterials.cpp b/indra/newview/lldrawpoolmaterials.cpp
index ec1ac6a88cb..858fb871d3c 100644
--- a/indra/newview/lldrawpoolmaterials.cpp
+++ b/indra/newview/lldrawpoolmaterials.cpp
@@ -203,11 +203,13 @@ void LLDrawPoolMaterials::renderDeferred(S32 pass)
 
     LLVOAvatar* lastAvatar = nullptr;
 
-	for (LLCullResult::drawinfo_iterator i = begin; i != end; ++i)
+	for (LLCullResult::drawinfo_iterator i = begin; i != end; )
 	{
         LL_PROFILE_ZONE_NAMED_CATEGORY_MATERIAL("materials draw loop");
 		LLDrawInfo& params = **i;
 		
+        LLCullResult::increment_iterator(i, end);
+
         if (specular > -1 && params.mSpecColor != lastSpecular)
         {
             lastSpecular = params.mSpecColor;
diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp
index 113cd98164d..7653913c2b3 100644
--- a/indra/newview/llspatialpartition.cpp
+++ b/indra/newview/llspatialpartition.cpp
@@ -2872,42 +2872,6 @@ void renderBatchSize(LLDrawInfo* params)
     }
 }
 
-void renderShadowFrusta(LLDrawInfo* params)
-{
-	LLGLEnable blend(GL_BLEND);
-	gGL.setSceneBlendType(LLRender::BT_ADD);
-
-	LLVector4a center;
-	center.setAdd(params->mExtents[1], params->mExtents[0]);
-	center.mul(0.5f);
-	LLVector4a size;
-	size.setSub(params->mExtents[1],params->mExtents[0]);
-	size.mul(0.5f);
-
-	if (gPipeline.mShadowCamera[4].AABBInFrustum(center, size))
-	{
-		gGL.diffuseColor3f(1,0,0);
-		pushVerts(params, LLVertexBuffer::MAP_VERTEX);
-	}
-	if (gPipeline.mShadowCamera[5].AABBInFrustum(center, size))
-	{
-		gGL.diffuseColor3f(0,1,0);
-		pushVerts(params, LLVertexBuffer::MAP_VERTEX);
-	}
-	if (gPipeline.mShadowCamera[6].AABBInFrustum(center, size))
-	{
-		gGL.diffuseColor3f(0,0,1);
-		pushVerts(params, LLVertexBuffer::MAP_VERTEX);
-	}
-	if (gPipeline.mShadowCamera[7].AABBInFrustum(center, size))
-	{
-		gGL.diffuseColor3f(1,0,1);
-		pushVerts(params, LLVertexBuffer::MAP_VERTEX);
-	}
-
-	gGL.setSceneBlendType(LLRender::BT_ALPHA);
-}
-
 void renderTexelDensity(LLDrawable* drawable)
 {
 	if (LLViewerTexture::sDebugTexelsMode == LLViewerTexture::DEBUG_TEXELS_OFF
@@ -3492,10 +3456,6 @@ class LLOctreeRenderNonOccluded : public OctreeTraveler
 				{
 					renderBatchSize(draw_info);
 				}
-				if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_SHADOW_FRUSTA))
-				{
-					renderShadowFrusta(draw_info);
-				}
 			}
 		}
 	}
diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h
index 020a0104059..b765bd16326 100644
--- a/indra/newview/llspatialpartition.h
+++ b/indra/newview/llspatialpartition.h
@@ -87,8 +87,6 @@ class LLDrawInfo : public LLRefCount
     // return mSkinHash->mHash, or 0 if mSkinHash is null
     U64 getSkinHash();
 
-	LLVector4a mExtents[2];
-	
 	LLPointer<LLVertexBuffer> mVertexBuffer;
 	LLPointer<LLViewerTexture>     mTexture;
 	std::vector<LLPointer<LLViewerTexture> > mTextureList;
@@ -505,6 +503,23 @@ class LLCullResult
 	typedef LLDrawInfo** drawinfo_iterator;
 	typedef LLDrawable** drawable_iterator;
 
+    // Helper function for taking advantage of _mm_prefetch when iterating over cull results
+    static inline void increment_iterator(LLCullResult::drawinfo_iterator& i, const LLCullResult::drawinfo_iterator& end)
+    {
+        ++i;
+
+        if (i != end)
+        {
+            _mm_prefetch((char*)(*i)->mVertexBuffer.get(), _MM_HINT_NTA);
+
+            auto* ni = i + 1;
+            if (ni != end)
+            {
+                _mm_prefetch((char*)*ni, _MM_HINT_NTA);
+            }
+        }
+    }
+
 	void clear();
 	
 	sg_iterator beginVisibleGroups();
diff --git a/indra/newview/llvograss.cpp b/indra/newview/llvograss.cpp
index 36d66cccefd..b4b2db5d511 100644
--- a/indra/newview/llvograss.cpp
+++ b/indra/newview/llvograss.cpp
@@ -731,9 +731,6 @@ void LLGrassPartition::getGeometry(LLSpatialGroup* group)
 				//facep->getTexture(),
 				buffer, object->isSelected(), fullbright);
 
-			const LLVector4a* exts = group->getObjectExtents();
-			info->mExtents[0] = exts[0];
-			info->mExtents[1] = exts[1];
 			info->mVSize = vsize;
 			draw_vec.push_back(info);
 			//for alpha sorting
diff --git a/indra/newview/llvopartgroup.cpp b/indra/newview/llvopartgroup.cpp
index 0c09cbf2c28..a5c65d6ed47 100644
--- a/indra/newview/llvopartgroup.cpp
+++ b/indra/newview/llvopartgroup.cpp
@@ -963,9 +963,6 @@ void LLParticlePartition::getGeometry(LLSpatialGroup* group)
 			LLDrawInfo* info = new LLDrawInfo(start,end,count,offset,facep->getTexture(), 
 				buffer, object->isSelected(), fullbright);
 
-			const LLVector4a* exts = group->getObjectExtents();
-			info->mExtents[0] = exts[0];
-			info->mExtents[1] = exts[1];
 			info->mVSize = vsize;
 			info->mBlendFuncDst = bf_dst;
 			info->mBlendFuncSrc = bf_src;
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index afebf27d8bd..53158ee66fa 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -5305,8 +5305,6 @@ void LLVolumeGeometryManager::registerFace(LLSpatialGroup* group, LLFace* facep,
             draw_vec[idx]->mTextureListVSize[index] = vsize;
 		}
 		draw_vec[idx]->validate();
-		update_min_max(draw_vec[idx]->mExtents[0], draw_vec[idx]->mExtents[1], facep->mExtents[0]);
-		update_min_max(draw_vec[idx]->mExtents[0], draw_vec[idx]->mExtents[1], facep->mExtents[1]);
 	}
 	else
 	{
@@ -5385,8 +5383,6 @@ void LLVolumeGeometryManager::registerFace(LLSpatialGroup* group, LLFace* facep,
 		{ //for alpha sorting
 			facep->setDrawInfo(draw_info);
 		}
-		draw_info->mExtents[0] = facep->mExtents[0];
-		draw_info->mExtents[1] = facep->mExtents[1];
 
 		if (index < FACE_DO_NOT_BATCH_TEXTURES)
 		{ //initialize texture list for texture batching
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index 10c271cddcd..9851d4bc6ad 100644
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -2863,6 +2863,8 @@ void LLPipeline::clearRebuildDrawables()
 void LLPipeline::rebuildPriorityGroups()
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE;
+    LL_PROFILE_GPU_ZONE("rebuildPriorityGroups");
+
 	LLTimer update_timer;
 	assertInitialized();
 
@@ -7412,18 +7414,7 @@ void LLPipeline::renderShadowSimple(U32 type)
     {
         LLDrawInfo& params = **i;
 
-        ++i;
-
-        if (i != end)
-        {
-            _mm_prefetch((char*) (*i)->mVertexBuffer.get(), _MM_HINT_NTA);
-
-            auto* ni = i + 1;
-            if (ni != end)
-            {
-                _mm_prefetch((char*)*ni, _MM_HINT_NTA);
-            }
-        }
+        LLCullResult::increment_iterator(i, end);
 
         LLVertexBuffer* vb = params.mVertexBuffer;
         if (vb != last_vb)
@@ -7448,11 +7439,16 @@ void LLPipeline::renderAlphaObjects(U32 mask, bool texture, bool batch_texture,
     U32 type = LLRenderPass::PASS_ALPHA;
     LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
-    for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)
+    auto* begin = gPipeline.beginRenderMap(type);
+    auto* end = gPipeline.endRenderMap(type);
+
+    for (LLCullResult::drawinfo_iterator i = begin; i != end; )
     {
         LLDrawInfo* pparams = *i;
         if (pparams)
         {
+            LLCullResult::increment_iterator(i, end);
+
             if (rigged)
             {
                 if (pparams->mAvatar != nullptr)
-- 
GitLab