diff --git a/indra/llcommon/llerror.cpp b/indra/llcommon/llerror.cpp
index 7c555d73ae7838883eb016948778c6935001484b..cce6c447480f933b6613fa7e621bcc4d292b4d4e 100644
--- a/indra/llcommon/llerror.cpp
+++ b/indra/llcommon/llerror.cpp
@@ -1617,6 +1617,7 @@ namespace LLError
 
 bool debugLoggingEnabled(const std::string& tag)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_APP;
     LLMutexTrylock lock(getMutex<LOG_MUTEX>(), 5);
     if (!lock.isLocked())
     {
diff --git a/indra/llrender/llglslshader.cpp b/indra/llrender/llglslshader.cpp
index 5f29cbe3c93540fa94126a496376cb7445e36dbb..d6218b77fb252d0dba37f590a90d9c85c1c368f5 100644
--- a/indra/llrender/llglslshader.cpp
+++ b/indra/llrender/llglslshader.cpp
@@ -244,9 +244,9 @@ void LLGLSLShader::stopProfile()
     }
 }
 
-void LLGLSLShader::placeProfileQuery()
+void LLGLSLShader::placeProfileQuery(bool for_runtime)
 {
-    if (sProfileEnabled)
+    if (sProfileEnabled || for_runtime)
     {
         if (mTimerQuery == 0)
         {
@@ -255,42 +255,70 @@ void LLGLSLShader::placeProfileQuery()
             glGenQueries(1, &mPrimitivesQuery);
         }
 
-        glBeginQuery(GL_SAMPLES_PASSED, mSamplesQuery);
         glBeginQuery(GL_TIME_ELAPSED, mTimerQuery);
-        glBeginQuery(GL_PRIMITIVES_GENERATED, mPrimitivesQuery);
+
+        if (!for_runtime)
+        {
+            glBeginQuery(GL_SAMPLES_PASSED, mSamplesQuery);
+            glBeginQuery(GL_PRIMITIVES_GENERATED, mPrimitivesQuery);
+        }
     }
 }
 
-void LLGLSLShader::readProfileQuery()
+bool LLGLSLShader::readProfileQuery(bool for_runtime, bool force_read)
 {
-    if (sProfileEnabled)
+    if (sProfileEnabled || for_runtime)
     {
-        glEndQuery(GL_TIME_ELAPSED);
-        glEndQuery(GL_SAMPLES_PASSED);
-        glEndQuery(GL_PRIMITIVES_GENERATED);
+        if (!mProfilePending)
+        {
+            glEndQuery(GL_TIME_ELAPSED);
+            if (!for_runtime)
+            {
+                glEndQuery(GL_SAMPLES_PASSED);
+                glEndQuery(GL_PRIMITIVES_GENERATED);
+            }
+            mProfilePending = for_runtime;
+        }
+
+        if (mProfilePending && for_runtime && !force_read)
+        {
+            GLuint64 result = 0;
+            glGetQueryObjectui64v(mTimerQuery, GL_QUERY_RESULT_AVAILABLE, &result);
+
+            if (result != GL_TRUE)
+            {
+                return false;
+            }
+        }
 
         GLuint64 time_elapsed = 0;
         glGetQueryObjectui64v(mTimerQuery, GL_QUERY_RESULT, &time_elapsed);
+        mTimeElapsed += time_elapsed;
+        mProfilePending = false;
 
-        GLuint64 samples_passed = 0;
-        glGetQueryObjectui64v(mSamplesQuery, GL_QUERY_RESULT, &samples_passed);
+        if (!for_runtime)
+        {
+            GLuint64 samples_passed = 0;
+            glGetQueryObjectui64v(mSamplesQuery, GL_QUERY_RESULT, &samples_passed);
 
-        GLuint64 primitives_generated = 0;
-        glGetQueryObjectui64v(mPrimitivesQuery, GL_QUERY_RESULT, &primitives_generated);
-        sTotalTimeElapsed += time_elapsed;
-        mTimeElapsed += time_elapsed;
+            GLuint64 primitives_generated = 0;
+            glGetQueryObjectui64v(mPrimitivesQuery, GL_QUERY_RESULT, &primitives_generated);
+            sTotalTimeElapsed += time_elapsed;
 
-        sTotalSamplesDrawn += samples_passed;
-        mSamplesDrawn += samples_passed;
+            sTotalSamplesDrawn += samples_passed;
+            mSamplesDrawn += samples_passed;
 
-        U32 tri_count = (U32)primitives_generated / 3;
+            U32 tri_count = (U32)primitives_generated / 3;
 
-        mTrianglesDrawn += tri_count;
-        sTotalTrianglesDrawn += tri_count;
+            mTrianglesDrawn += tri_count;
+            sTotalTrianglesDrawn += tri_count;
 
-        sTotalBinds++;
-        mBinds++;
+            sTotalBinds++;
+            mBinds++;
+        }
     }
+
+    return true;
 }
 
 
diff --git a/indra/llrender/llglslshader.h b/indra/llrender/llglslshader.h
index 2801ac58a6abb4edf6dd8061e81e69858f7e07be..0f9291bcc3a96ca1c52bf91dd6fde0e5c9f6804a 100644
--- a/indra/llrender/llglslshader.h
+++ b/indra/llrender/llglslshader.h
@@ -171,8 +171,16 @@ class LLGLSLShader
     void unload();
     void clearStats();
     void dumpStats();
-    void placeProfileQuery();
-    void readProfileQuery();
+
+    // place query objects for profiling if profiling is enabled
+    // if for_runtime is true, will place timer query only whether or not profiling is enabled
+    void placeProfileQuery(bool for_runtime = false);
+
+    // Readback query objects if profiling is enabled
+    // If for_runtime is true, will readback timer query iff query is available
+    // Will return false if a query is pending (try again later)
+    // If force_read is true, will force an immediate readback (severe performance penalty)
+    bool readProfileQuery(bool for_runtime = false, bool force_read = false);
 
     BOOL createShader(std::vector<LLStaticHashedString>* attributes,
         std::vector<LLStaticHashedString>* uniforms,
@@ -308,6 +316,7 @@ class LLGLSLShader
     defines_map_t mDefines;
 
     //statistics for profiling shader performance
+    bool mProfilePending = false;
     U32 mTimerQuery;
     U32 mSamplesQuery;
     U32 mPrimitivesQuery;
@@ -324,6 +333,9 @@ class LLGLSLShader
     // this pointer should be set to whichever shader represents this shader's rigged variant
     LLGLSLShader* mRiggedVariant = nullptr;
 
+    // hacky flag used for optimization in LLDrawPoolAlpha
+    bool mCanBindFast = false;
+
 #ifdef LL_PROFILER_ENABLE_RENDER_DOC
     void setLabel(const char* label);
 #endif
diff --git a/indra/llwindow/llwindowwin32.cpp b/indra/llwindow/llwindowwin32.cpp
index 58cab4f178f54ac086dace816a5738f29b15a97f..d4fc920c5ae79635e2af5a3e75d6c95a665023c2 100644
--- a/indra/llwindow/llwindowwin32.cpp
+++ b/indra/llwindow/llwindowwin32.cpp
@@ -4958,8 +4958,6 @@ void LLWindowWin32::LLWindowWin32Thread::updateVRAMUsage()
         { // current usage is sometimes unreliable on Intel GPUs, fall back to estimated usage
             cu_mb = llmax((U32)1, eu_mb);
         }
-        //F32 eu_error = (F32)((S32)eu_mb - (S32)cu_mb) / (F32)cu_mb;
-
         U32 target_mb = budget_mb;
 
         if (target_mb > 4096)  // if 4GB are installed, try to leave 2GB free 
@@ -4973,19 +4971,17 @@ void LLWindowWin32::LLWindowWin32Thread::updateVRAMUsage()
 
         mAvailableVRAM = cu_mb < target_mb ? target_mb - cu_mb : 0;
 
-        //LL_INFOS("Window") << "\nLocal\nAFR: " << info.AvailableForReservation / 1024 / 1024
-        //    << "\nBudget: " << info.Budget / 1024 / 1024
-        //    << "\nCR: " << info.CurrentReservation / 1024 / 1024
-        //    << "\nCU: " << info.CurrentUsage / 1024 / 1024
-        //    << "\nEU: " << eu_mb << llformat(" (%.2f)", eu_error)
-        //    << "\nTU: " << target_mb
-        //    << "\nAM: " << mAvailableVRAM << LL_ENDL;
-
-        /*mDXGIAdapter->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info);
-        LL_INFOS("Window") << "\nNon-Local\nAFR: " << info.AvailableForReservation / 1024 / 1024
+#if 0
+        
+        F32 eu_error = (F32)((S32)eu_mb - (S32)cu_mb) / (F32)cu_mb;
+        LL_INFOS("Window") << "\nLocal\nAFR: " << info.AvailableForReservation / 1024 / 1024
             << "\nBudget: " << info.Budget / 1024 / 1024
             << "\nCR: " << info.CurrentReservation / 1024 / 1024
-            << "\nCU: " << info.CurrentUsage / 1024 / 1024 << LL_ENDL;*/
+            << "\nCU: " << info.CurrentUsage / 1024 / 1024
+            << "\nEU: " << eu_mb << llformat(" (%.2f)", eu_error)
+            << "\nTU: " << target_mb
+            << "\nAM: " << mAvailableVRAM << LL_ENDL;
+#endif
     }
     else if (mD3DDevice != NULL)
     { // fallback to D3D9
diff --git a/indra/newview/lldrawable.cpp b/indra/newview/lldrawable.cpp
index 105f4bfccb8049472633737cb52be2df00fbf32d..bbedf2f86c407981a3b769e288ab0ba360ac87b6 100644
--- a/indra/newview/lldrawable.cpp
+++ b/indra/newview/lldrawable.cpp
@@ -907,14 +907,6 @@ void LLDrawable::updateDistance(LLCamera& camera, bool force_update)
                 LLVector3 cam_pos_from_agent = LLViewerCamera::getInstance()->getOrigin();
                 LLVector3 cam_to_box_offset = point_to_box_offset(cam_pos_from_agent, av_box);
                 mDistanceWRTCamera = llmax(0.01f, ll_round(cam_to_box_offset.magVec(), 0.01f));
-#ifdef SHOW_DEBUG
-                LL_DEBUGS("DynamicBox") << volume->getAvatar()->getFullname() 
-                                        << " pos (ignored) " << pos
-                                        << " cam pos " << cam_pos_from_agent
-                                        << " box " << av_box[0] << "," << av_box[1] 
-                                        << " -> dist " << mDistanceWRTCamera
-                                        << LL_ENDL;
-#endif
                 mVObjp->updateLOD();
                 return;
             }
diff --git a/indra/newview/lldrawpoolalpha.cpp b/indra/newview/lldrawpoolalpha.cpp
index 7796623b23aec862a568e1d64f99a7e91a72aee6..10c4f8b1892f43d20b1db00b86a895c9595ba99a 100644
--- a/indra/newview/lldrawpoolalpha.cpp
+++ b/indra/newview/lldrawpoolalpha.cpp
@@ -107,12 +107,10 @@ static void prepare_alpha_shader(LLGLSLShader* shader, bool textureGamma, bool d
     // i.e. shaders\class1\deferred\alphaF.glsl
     if (deferredEnvironment)
     {
-        gPipeline.bindDeferredShader( *shader );
-    }
-    else
-    {
-        shader->bind();
+        shader->mCanBindFast = false;
     }
+    
+    shader->bind();
     shader->uniform1f(LLShaderMgr::DISPLAY_GAMMA, (gamma > 0.1f) ? 1.0f / gamma : (1.0f / 2.2f));
 
     if (LLPipeline::sRenderingHUDs)
diff --git a/indra/newview/llmeshrepository.cpp b/indra/newview/llmeshrepository.cpp
index 316d11a7ec61bf25ba612fe1c04f823ded3b2ae5..533d8f91d60173165a5c7ea2b0d595ef65162181 100644
--- a/indra/newview/llmeshrepository.cpp
+++ b/indra/newview/llmeshrepository.cpp
@@ -4465,6 +4465,7 @@ LLMeshCostData::LLMeshCostData()
 
 bool LLMeshCostData::init(const LLSD& header)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME;
     mSizeByLOD.resize(4);
     mEstTrisByLOD.resize(4);
 
diff --git a/indra/newview/llviewerobject.cpp b/indra/newview/llviewerobject.cpp
index 1661c60c0af5e4ecff0d7f7cb9bd003a092c96d0..49c6711b223e157abadadc466c51b29497076763 100644
--- a/indra/newview/llviewerobject.cpp
+++ b/indra/newview/llviewerobject.cpp
@@ -4024,6 +4024,7 @@ U32 LLViewerObject::recursiveGetTriangleCount(S32* vcount) const
 // prim's scale. Should revisit at some point.
 F32 LLViewerObject::recursiveGetScaledSurfaceArea() const
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME;
     F32 area = 0.f;
     const LLDrawable* drawable = mDrawable;
     if (drawable)
diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp
index 739509f71262ecc00a934af1d7c7e8569db5a628..5a8907a46d2293516601a7eee22e6c48c1a01f83 100644
--- a/indra/newview/llvoavatar.cpp
+++ b/indra/newview/llvoavatar.cpp
@@ -2751,6 +2751,10 @@ void LLVOAvatar::idleUpdate(LLAgent &agent, const F64 &time)
 
     if ((LLFrameTimer::getFrameCount() + mID.mData[0]) % compl_upd_freq == 0)
     {
+        // DEPRECATED 
+        // replace with LLPipeline::profileAvatar?
+        // Avatar profile takes ~ 0.5ms while idleUpdateRenderComplexity takes ~5ms
+        // (both are unacceptably costly)
         idleUpdateRenderComplexity();
     }
     idleUpdateDebugInfo();
@@ -11264,6 +11268,7 @@ void LLVOAvatar::accountRenderComplexityForObject(
     std::map<LLUUID, U32>& item_complexity,
     std::map<LLUUID, U32>& temp_item_complexity)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
     if (attached_object && !attached_object->isHUDAttachment())
 		{
         mAttachmentVisibleTriangleCount += attached_object->recursiveGetTriangleCount();
@@ -11408,7 +11413,6 @@ void LLVOAvatar::accountRenderComplexityForObject(
 // Calculations for mVisualComplexity value
 void LLVOAvatar::calculateUpdateRenderComplexity()
 {
-    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
     /*****************************************************************
      * This calculation should not be modified by third party viewers,
      * since it is used to limit rendering and should be uniform for
@@ -11931,3 +11935,43 @@ BOOL LLVOAvatar::isTextureVisible(LLAvatarAppearanceDefines::ETextureIndex type,
 	// non-self avatars don't have wearables
 	return FALSE;
 }
+
+void LLVOAvatar::placeProfileQuery()
+{
+    if (mGPUTimerQuery == 0)
+    {
+        glGenQueries(1, &mGPUTimerQuery);
+    }
+
+    glBeginQuery(GL_TIME_ELAPSED, mGPUTimerQuery);
+}
+
+void LLVOAvatar::readProfileQuery(S32 retries)
+{
+    if (!mGPUProfilePending)
+    {
+        glEndQuery(GL_TIME_ELAPSED);
+        mGPUProfilePending = true;
+    }
+
+    GLuint64 result = 0;
+    glGetQueryObjectui64v(mGPUTimerQuery, GL_QUERY_RESULT_AVAILABLE, &result);
+
+    if (result == GL_TRUE || --retries <= 0)
+    { // query available, readback result
+        GLuint64 time_elapsed = 0;
+        glGetQueryObjectui64v(mGPUTimerQuery, GL_QUERY_RESULT, &time_elapsed);
+        mGPURenderTime = time_elapsed / 1000000.f;
+        mGPUProfilePending = false;
+    }
+    else
+    { // wait until next frame
+        LLUUID id = getID();
+
+        LL::WorkQueue::getInstance("mainloop")->post([id, retries] {
+            LLVOAvatar* avatar = (LLVOAvatar*) gObjectList.findObject(id);
+            avatar->readProfileQuery(retries);
+            });
+    }
+}
+
diff --git a/indra/newview/llvoavatar.h b/indra/newview/llvoavatar.h
index 40f8b5f1915c521e669f79c0fa0ecd3da735dae9..bdf275308d8d9b144174670f36ed63726ecd4a7f 100644
--- a/indra/newview/llvoavatar.h
+++ b/indra/newview/llvoavatar.h
@@ -314,6 +314,9 @@ class LLVOAvatar :
 	static const U32 VISUAL_COMPLEXITY_UNKNOWN;
 	void			updateVisualComplexity();
 	
+    void placeProfileQuery();
+    void readProfileQuery(S32 retries);
+
     // get the GPU time in ms of rendering this avatar including all attachments
     // returns -1 if this avatar has not been profiled using gPipeline.profileAvatar
     F32             getGPURenderTime() { return mGPURenderTime; }
@@ -322,18 +325,11 @@ class LLVOAvatar :
     // return -1 if this avatar has not been profiled using gPipeline.mProfileAvatar
     F32             getCPURenderTime() { return mCPURenderTime; }
 
-    // get the number of samples passed during the avatar profile
-    // return -1 if this avatar has not been profiled using gPipeline.mProfileAvatar
-    S32             getGPUSamplesPassed() { return mGPUSamplesPassed; }
-
-    // get the number of triangles rendered during the avatar profile
-    // return -1 if this avatar has not been profiled using gPipeline.mProfileAvatar
-    S32             getGPUTrianglesRendered() { return mGPUTrianglesRendered; }
-
-    // DEPRECATED -- obsolete avatar render cost
+    
+    // avatar render cost
 	U32				getVisualComplexity()			{ return mVisualComplexity;				};
 
-    // DEPRECATED -- obsolete surface area calculation
+    // surface area calculation
 	F32				getAttachmentSurfaceArea()		{ return mAttachmentSurfaceArea;		};
 
 	U32				getReportedVisualComplexity()					{ return mReportedVisualComplexity;				};	// Numbers as reported by the SL server
@@ -569,20 +565,18 @@ class LLVOAvatar :
 	S32	 		mUpdatePeriod;
 	S32  		mNumInitFaces; //number of faces generated when creating the avatar drawable, does not inculde splitted faces due to long vertex buffer.
 
+    // profile handle
+    U32 mGPUTimerQuery = 0;
+
     // profile results
 
     // GPU render time in ms
     F32 mGPURenderTime = -1.f;
+    bool mGPUProfilePending = false;
 
     // CPU render time in ms
     F32 mCPURenderTime = -1.f;
 
-    // number of samples passed according to GPU
-    S32 mGPUSamplesPassed = -1;
-
-    // number of triangles rendered according to GPU
-    S32 mGPUTrianglesRendered = -1;
-
 	// the isTooComplex method uses these mutable values to avoid recalculating too frequently
     // DEPRECATED -- obsolete avatar render cost values
 	mutable U32  mVisualComplexity;
diff --git a/indra/newview/llvoavatarself.cpp b/indra/newview/llvoavatarself.cpp
index bf47bb441864e775168825cab4b682294244c40f..05f73df5df7704b72a2198e9064fdb4de8fcdf91 100644
--- a/indra/newview/llvoavatarself.cpp
+++ b/indra/newview/llvoavatarself.cpp
@@ -1239,6 +1239,7 @@ LLViewerJointAttachment* LLVOAvatarSelf::getWornAttachmentPoint(const LLUUID& id
 
 bool LLVOAvatarSelf::getAttachedPointName(const LLUUID& inv_item_id, std::string& name) const
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
 	if (!gInventory.getItem(inv_item_id))
 	{
 		name = "ATTACHMENT_MISSING_ITEM";
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index f2532980d6224a4905c12fa4c374376eed1d671e..fdb88a49f2b9980404f373ee01d833aaec65eba3 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -1652,7 +1652,9 @@ BOOL LLVOVolume::updateLOD()
 	{
 		return FALSE;
 	}
-	
+
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME;
+
 	BOOL lod_changed = FALSE;
 
 	if (!LLSculptIDSize::instance().isUnloaded(getVolume()->getParams().getSculptID())) 
@@ -1666,19 +1668,6 @@ BOOL LLVOVolume::updateLOD()
 
 	if (lod_changed)
 	{
-#ifdef SHOW_DEBUG
-		static const bool enable_log = debugLoggingEnabled("AnimatedObjectsLinkset");
-        if (enable_log)
-        {
-            if (isAnimatedObject() && isRiggedMesh())
-            {
-                std::string vobj_name = llformat("Vol%p", this);
-                F32 est_tris = getEstTrianglesMax();
-                LL_DEBUGS("AnimatedObjectsLinkset") << vobj_name << " updateLOD to " << getLOD() << ", tris " << est_tris << LL_ENDL; 
-            }
-        }
-#endif
-
 		gPipeline.markRebuild(mDrawable, LLDrawable::REBUILD_VOLUME, FALSE);
 		mLODChanged = TRUE;
 	}
@@ -3970,6 +3959,7 @@ const LLMatrix4a& LLVOVolume::getRenderMatrix() const
 // children, and cost should only be increased for unique textures  -Nyx
 U32 LLVOVolume::getRenderCost(texture_cost_t &textures) const
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME;
     /*****************************************************************
      * This calculation should not be modified by third party viewers,
      * since it is used to limit rendering and should be uniform for
@@ -4582,22 +4572,11 @@ const LLMatrix4a& LLVOVolume::getWorldMatrix(LLXformMatrix* xform) const
 
 void LLVOVolume::markForUpdate(BOOL priority)
 { 
-#ifdef SHOW_DEBUG
-	static const bool enable_log = debugLoggingEnabled("AnimatedObjectsLinkset");
-    if (enable_log)
-    {
-        if (isAnimatedObject() && isRiggedMesh())
-        {
-            std::string vobj_name = llformat("Vol%p", this);
-            F32 est_tris = getEstTrianglesMax();
-            LL_DEBUGS("AnimatedObjectsLinkset") << vobj_name << " markForUpdate, tris " << est_tris << LL_ENDL; 
-        }
-    }
-#endif
     if (mDrawable)
     {
         shrinkWrap();
     }
+
     LLViewerObject::markForUpdate(priority); 
     mVolumeChanged = TRUE; 
 }
@@ -6123,18 +6102,6 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
 					
 					if (!vobj) continue;
 
-#ifdef SHOW_DEBUG
-					static const bool enable_log = debugLoggingEnabled("AnimatedObjectsLinkset");
-					if (enable_log)
-					{
-						if (vobj->isAnimatedObject() && vobj->isRiggedMesh())
-						{
-							std::string vobj_name = llformat("Vol%p", vobj);
-							F32 est_tris = vobj->getEstTrianglesMax();
-							LL_DEBUGS("AnimatedObjectsLinkset") << vobj_name << " rebuildMesh, tris " << est_tris << LL_ENDL;
-						}
-					}
-#endif
 					if (vobj->isNoLOD()) continue;
 
 					vobj->preRebuild();
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index 2f7a86cc2145d71f5ab8cc1ff10d902d4dfd2c9f..009dfe739086f16a56b3a1e20a497f3329b89487 100644
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -3123,21 +3123,6 @@ void LLPipeline::markRebuild(LLDrawable *drawablep, LLDrawable::EDrawableFlags f
 {
 	if (drawablep && !drawablep->isDead() && assertInitialized())
 	{
-#ifdef SHOW_DEBUG
-		static const bool enable_log = debugLoggingEnabled("AnimatedObjectsLinkset");
-        if (enable_log)
-        {
-            LLVOVolume *vol_obj = drawablep->getVOVolume();
-            if (vol_obj && vol_obj->isAnimatedObject() && vol_obj->isRiggedMesh())
-            {
-                std::string vobj_name = llformat("Vol%p", vol_obj);
-                F32 est_tris = vol_obj->getEstTrianglesMax();
-                LL_DEBUGS("AnimatedObjectsLinkset") << vobj_name << " markRebuild, tris " << est_tris 
-                                                    << " priority " << (S32) priority << " flag " << std::hex << flag << LL_ENDL; 
-            }
-        }
-#endif
-    
 		if (!drawablep->isState(LLDrawable::BUILT))
 		{
 			priority = true;
@@ -7795,10 +7780,18 @@ void LLPipeline::bindShadowMaps(LLGLSLShader& shader)
 
 void LLPipeline::bindDeferredShaderFast(LLGLSLShader& shader)
 {
-    shader.bind();
-    bindLightFunc(shader);
-    bindShadowMaps(shader);
-    bindReflectionProbes(shader);
+    if (shader.mCanBindFast)
+    { // was previously fully bound, use fast path
+        shader.bind();
+        bindLightFunc(shader);
+        bindShadowMaps(shader);
+        bindReflectionProbes(shader);
+    }
+    else
+    { //wasn't previously bound, use slow path
+        bindDeferredShader(shader);
+        shader.mCanBindFast = true;
+    }
 }
 
 void LLPipeline::bindDeferredShader(LLGLSLShader& shader, LLRenderTarget* light_target)
@@ -10022,30 +10015,24 @@ void LLPipeline::renderRiggedGroups(LLRenderPass* pass, U32 type, bool texture)
     }
 }
 
-static LLTrace::BlockTimerStatHandle FTM_GENERATE_IMPOSTOR("Generate Impostor");
-
 void LLPipeline::profileAvatar(LLVOAvatar* avatar, bool profile_attachments)
 {
     if (gGLManager.mGLVersion < 3.25f)
     { // profiling requires GL 3.3 or later
         return;
     }
+
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE;
+
     LLGLSLShader* cur_shader = LLGLSLShader::sCurBoundShaderPtr;
 
     mRT->deferredScreen.bindTarget();
     mRT->deferredScreen.clear();
 
-    bool profile_enabled = LLGLSLShader::sProfileEnabled;
-    LLGLSLShader::sProfileEnabled = true;
-
     if (!profile_attachments)
     {
-        // profile entire avatar all at once
-        
-        // use gDebugProgram as a proxy for getting profile results
-        gDebugProgram.clearStats();
-        gDebugProgram.placeProfileQuery();
-        LLGLSLShader::sProfileEnabled = false;
+        // profile entire avatar all at once and readback asynchronously
+        avatar->placeProfileQuery();
 
         LLTimer cpu_timer;
 
@@ -10053,13 +10040,7 @@ void LLPipeline::profileAvatar(LLVOAvatar* avatar, bool profile_attachments)
 
         avatar->mCPURenderTime = (F32)cpu_timer.getElapsedTimeF32() * 1000.f;
 
-        LLGLSLShader::sProfileEnabled = true;
-        gDebugProgram.readProfileQuery();
-
-        avatar->mGPURenderTime = gDebugProgram.mTimeElapsed / 1000000.f;
-
-        avatar->mGPUSamplesPassed = gDebugProgram.mSamplesDrawn;
-        avatar->mGPUTrianglesRendered = gDebugProgram.mTrianglesDrawn;
+        avatar->readProfileQuery(5); // allow up to 5 frames of latency
     }
     else 
     { 
@@ -10080,23 +10061,19 @@ void LLPipeline::profileAvatar(LLVOAvatar* avatar, bool profile_attachments)
                 LLViewerObject* attached_object = attachment_iter->get();
                 if (attached_object)
                 {
+                    // use gDebugProgram to do the GPU queries
                     gDebugProgram.clearStats();
-                    gDebugProgram.placeProfileQuery();
-                    LLGLSLShader::sProfileEnabled = false;
+                    gDebugProgram.placeProfileQuery(true);
 
                     generateImpostor(avatar, false, true, attached_object);
-                    LLGLSLShader::sProfileEnabled = true;
-                    gDebugProgram.readProfileQuery();
+                    gDebugProgram.readProfileQuery(true, true);
 
                     attached_object->mGPURenderTime = gDebugProgram.mTimeElapsed / 1000000.f;
-
-                    // TODO: maybe also record triangles and samples
                 }
             }
         }
     }
 
-    LLGLSLShader::sProfileEnabled = profile_enabled;
     mRT->deferredScreen.flush();
 
     if (cur_shader)
@@ -10107,7 +10084,7 @@ void LLPipeline::profileAvatar(LLVOAvatar* avatar, bool profile_attachments)
 
 void LLPipeline::generateImpostor(LLVOAvatar* avatar, bool preview_avatar, bool for_profile, LLViewerObject* specific_attachment)
 {
-    LL_RECORD_BLOCK_TIME(FTM_GENERATE_IMPOSTOR);
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE;
     LL_PROFILE_GPU_ZONE("generateImpostor");
 	LLGLState::checkStates();
 
@@ -10155,6 +10132,7 @@ void LLPipeline::generateImpostor(LLVOAvatar* avatar, bool preview_avatar, bool
             RENDER_TYPE_TREE,
             RENDER_TYPE_VOIDWATER,
             RENDER_TYPE_WATER,
+            RENDER_TYPE_ALPHA_POST_WATER,
             RENDER_TYPE_PASS_GRASS,
             RENDER_TYPE_HUD,
             RENDER_TYPE_PARTICLES,
@@ -10346,9 +10324,9 @@ void LLPipeline::generateImpostor(LLVOAvatar* avatar, bool preview_avatar, bool
 		LLDrawPoolAvatar::sMinimumAlpha = 0.f;
 	}
 
-    if (preview_avatar)
+    if (preview_avatar || for_profile)
     {
-        // previews don't care about imposters
+        // previews and profiles don't care about imposters
         renderGeomDeferred(camera);
         renderGeomPostDeferred(camera);
     }
@@ -10456,7 +10434,7 @@ void LLPipeline::generateImpostor(LLVOAvatar* avatar, bool preview_avatar, bool
 	gGL.matrixMode(LLRender::MM_MODELVIEW);
 	gGL.popMatrix();
 
-    if (!preview_avatar)
+    if (!preview_avatar && !for_profile)
     {
         avatar->mNeedsImpostorUpdate = FALSE;
         avatar->cacheImpostorValues();
diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h
index ba2d2983c04f7ceba21081c6ffca478a3c177102..1d0317dc24c8aa467d2650e5f0f3a6f311f85ed8 100644
--- a/indra/newview/pipeline.h
+++ b/indra/newview/pipeline.h
@@ -109,8 +109,17 @@ class LLPipeline
     bool allocateShadowBuffer(U32 resX, U32 resY);
 
 	void resetVertexBuffers(LLDrawable* drawable);
+
+    // perform a profile of the given avatar
+    // if profile_attachments is true, run a profile for each attachment
     void profileAvatar(LLVOAvatar* avatar, bool profile_attachments = false);
+
+    // generate an impostor for the given avatar
+    //  preview_avatar - if true, a preview window render is being performed
+    //  for_profile - if true, a profile is being performed, do not update actual impostor
+    //  specific_attachment - specific attachment to profile, or nullptr to profile entire avatar
 	void generateImpostor(LLVOAvatar* avatar, bool preview_avatar = false, bool for_profile = false, LLViewerObject* specific_attachment = nullptr);
+
 	void bindScreenToTexture();
 	void renderFinalize();
 	void copyScreenSpaceReflections(LLRenderTarget* src, LLRenderTarget* dst);