diff --git a/indra/llrender/llglslshader.cpp b/indra/llrender/llglslshader.cpp
index 394fcd2b2f0f93478f1028808f3d5104f40d7869..84eac00c65d65ae5f7034a3f7b5ae77f1cb36f3a 100644
--- a/indra/llrender/llglslshader.cpp
+++ b/indra/llrender/llglslshader.cpp
@@ -1017,7 +1017,7 @@ S32 LLGLSLShader::bindTexture(S32 uniform, LLTexture *texture, LLTexUnit::eTextu
     
     if (uniform > -1)
     {
-        gGL.getTexUnit(uniform)->bind(texture, mode);
+        gGL.getTexUnit(uniform)->bindFast(texture);
         gGL.getTexUnit(uniform)->setTextureColorSpace(colorspace);
     }
     
@@ -1048,7 +1048,7 @@ S32 LLGLSLShader::unbindTexture(S32 uniform, LLTexUnit::eTextureType mode)
     
     if (uniform > -1)
     {
-        gGL.getTexUnit(uniform)->unbind(mode);
+        gGL.getTexUnit(uniform)->unbindFast(mode);
     }
     
     return uniform;
diff --git a/indra/llrender/llgltexture.h b/indra/llrender/llgltexture.h
index 071912c2c23231080315806313c1586b95ab6fdc..028457c510174c45cbc61dc19800325ef305a86a 100644
--- a/indra/llrender/llgltexture.h
+++ b/indra/llrender/llgltexture.h
@@ -176,7 +176,7 @@ class LLGLTexture : public LLTexture
 protected:
 	void setTexelsPerImage();
 
-	//note: do not make this function public.
+public:
 	/*virtual*/ LLImageGL* getGLTexture() const ;
 
 protected:
diff --git a/indra/llrender/llrender.cpp b/indra/llrender/llrender.cpp
index b6711e44e32841a15cddc9fb8beb81934a530f91..669a09d3ceadfd897eeb204a7ba1c6c5235980b8 100644
--- a/indra/llrender/llrender.cpp
+++ b/indra/llrender/llrender.cpp
@@ -229,8 +229,20 @@ void LLTexUnit::disable(void)
 	}
 }
 
+void LLTexUnit::bindFast(LLTexture* texture)
+{
+    LLImageGL* gl_tex = texture->getGLTexture();
+
+    glActiveTextureARB(GL_TEXTURE0_ARB + mIndex);
+    gGL.mCurrTextureUnitIndex = mIndex;
+    mCurrTexture = gl_tex->getTexName();
+    glBindTexture(sGLTextureType[gl_tex->getTarget()], mCurrTexture);
+    mHasMipMaps = gl_tex->mHasMipMaps;
+}
+
 bool LLTexUnit::bind(LLTexture* texture, bool for_rendering, bool forceBind)
 {
+    LL_PROFILE_ZONE_SCOPED;
 	stop_glerror();
 	if (mIndex >= 0)
 	{
@@ -459,6 +471,28 @@ void LLTexUnit::unbind(eTextureType type)
 	}
 }
 
+void LLTexUnit::unbindFast(eTextureType type)
+{
+    activate();
+
+    // Disabled caching of binding state.
+    if (mCurrTexType == type)
+    {
+        mCurrTexture = 0;
+
+        // Always make sure our texture color space is reset to linear.  SRGB sampling should be opt-in in the vast majority of cases.  Also prevents color space "popping".
+        mTexColorSpace = TCS_LINEAR;
+        if (type == LLTexUnit::TT_TEXTURE)
+        {
+            glBindTexture(sGLTextureType[type], sWhiteTexture);
+        }
+        else
+        {
+            glBindTexture(sGLTextureType[type], 0);
+        }
+    }
+}
+
 void LLTexUnit::setTextureAddressMode(eTextureAddressMode mode)
 {
 	if (mIndex < 0 || mCurrTexture == 0) return;
@@ -1243,8 +1277,6 @@ void LLRender::syncLightState()
 
 void LLRender::syncMatrices()
 {
-	stop_glerror();
-
 	static const U32 name[] = 
 	{
 		LLShaderMgr::MODELVIEW_MATRIX,
@@ -1415,8 +1447,6 @@ void LLRender::syncMatrices()
 			}
 		}
 	}
-
-	stop_glerror();
 }
 
 void LLRender::translatef(const GLfloat& x, const GLfloat& y, const GLfloat& z)
@@ -1927,6 +1957,7 @@ void LLRender::flush()
 {
 	if (mCount > 0)
 	{
+        LL_PROFILE_ZONE_SCOPED;
 		if (!mUIOffset.empty())
 		{
 			sUICalls++;
diff --git a/indra/llrender/llrender.h b/indra/llrender/llrender.h
index c08c2d6881eac56c8ba2307ba7ce7a82b0627f2d..6e2647a16bad3b777c2c4ce4ad5b012e434789d4 100644
--- a/indra/llrender/llrender.h
+++ b/indra/llrender/llrender.h
@@ -161,6 +161,17 @@ class LLTexUnit
 	bool bind(LLImageGL* texture, bool for_rendering = false, bool forceBind = false);
     bool bind(LLTexture* texture, bool for_rendering = false, bool forceBind = false);
 
+    // bind implementation for inner loops
+    // makes the following assumptions:
+    //  - No need for gGL.flush() 
+    //  - texture is not null
+    //  - gl_tex->getTexName() is not zero
+    //  - This texture is not being bound redundantly
+    //  - USE_SRGB_DECODE is disabled
+    //  - mTexOptionsDirty is false
+    //  - 
+    void bindFast(LLTexture* texture);
+
 	// Binds a cubemap to this texture unit 
 	// (automatically enables the texture unit for cubemaps)
 	bool bind(LLCubeMap* cubeMap);
@@ -177,6 +188,9 @@ class LLTexUnit
 	// (only if there's a texture of the given type currently bound)
 	void unbind(eTextureType type);
 
+    // Fast but unsafe version of unbind
+    void unbindFast(eTextureType type);
+
 	// Sets the addressing mode used to sample the texture
 	// Warning: this stays set for the bound texture forever, 
 	// make sure you want to permanently change the address mode  for the bound texture.
diff --git a/indra/llrender/lltexture.h b/indra/llrender/lltexture.h
index 41481fb8a722d23fd06e999ee9a9c7493ea02e59..256d85ce5ad808b6137810abc2f2a8dd38bf1c12 100644
--- a/indra/llrender/lltexture.h
+++ b/indra/llrender/lltexture.h
@@ -67,11 +67,9 @@ class LLTexture : public virtual LLRefCount, public LLTrace::MemTrackable<LLText
 	virtual S32	       getWidth(S32 discard_level = -1) const;
 	virtual S32	       getHeight(S32 discard_level = -1) const;
 	virtual bool       isActiveFetching();
+    virtual LLImageGL* getGLTexture() const;
 
 private:
-	//note: do not make this function public.
-	virtual LLImageGL* getGLTexture() const;
-
 	virtual void updateBindStatsForTester();
 };
 #endif
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 0449ac392ca78d2b167e7055418692ac5d5893c6..103d5388d32e44a63df27d3a759c320167e86ddc 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -787,6 +787,18 @@ void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indi
 	placeFence();
 }
 
+void LLVertexBuffer::drawRangeFast(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const
+{
+    mMappable = false;
+    gGL.syncMatrices();
+
+    U16* idx = ((U16*)(U8*)mAlignedIndexOffset) + indices_offset;
+
+    LL_PROFILER_GPU_ZONEC("gl.DrawRangeElements", 0xFFFF00)
+        glDrawRangeElements(sGLMode[mode], start, end, count, GL_UNSIGNED_SHORT,
+            idx);
+}
+
 void LLVertexBuffer::draw(U32 mode, U32 count, U32 indices_offset) const
 {
 	llassert(!LLGLSLShader::sNoFixedFunction || LLGLSLShader::sCurBoundShaderPtr != NULL);
@@ -2272,6 +2284,21 @@ bool LLVertexBuffer::bindGLBuffer(bool force_bind)
 	return ret;
 }
 
+bool LLVertexBuffer::bindGLBufferFast()
+{
+    if (mGLBuffer != sGLRenderBuffer || !sVBOActive)
+    {
+        glBindBufferARB(GL_ARRAY_BUFFER_ARB, mGLBuffer);
+        sGLRenderBuffer = mGLBuffer;
+        sBindCount++;
+        sVBOActive = true;
+
+        return true;
+    }
+
+    return false;
+}
+
 static LLTrace::BlockTimerStatHandle FTM_BIND_GL_INDICES("Bind Indices");
 
 bool LLVertexBuffer::bindGLIndices(bool force_bind)
@@ -2297,6 +2324,21 @@ bool LLVertexBuffer::bindGLIndices(bool force_bind)
 	return ret;
 }
 
+bool LLVertexBuffer::bindGLIndicesFast()
+{
+    if (mGLIndices != sGLRenderIndices || !sIBOActive)
+    {
+        glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, mGLIndices);
+        sGLRenderIndices = mGLIndices;
+        sBindCount++;
+        sIBOActive = true;
+        
+        return true;
+    }
+
+    return false;
+}
+
 void LLVertexBuffer::flush()
 {
 	if (useVBOs())
@@ -2487,6 +2529,26 @@ void LLVertexBuffer::setBuffer(U32 data_mask)
 	}
 }
 
+void LLVertexBuffer::setBufferFast(U32 data_mask)
+{
+    //set up pointers if the data mask is different ...
+    bool setup = (sLastMask != data_mask);
+
+    
+    const bool bindBuffer = bindGLBufferFast();
+    const bool bindIndices = bindGLIndicesFast();
+
+    setup = setup || bindBuffer || bindIndices;
+
+    setupClientArrays(data_mask);
+  
+    if (data_mask && setup)
+    {
+        setupVertexBufferFast(data_mask);
+        sSetCount++;
+    }
+}
+
 // virtual (default)
 void LLVertexBuffer::setupVertexBuffer(U32 data_mask)
 {
@@ -2644,6 +2706,99 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask)
 	llglassertok();
 }
 
+void LLVertexBuffer::setupVertexBufferFast(U32 data_mask)
+{
+    U8* base = (U8*)mAlignedOffset;
+
+    if (data_mask & MAP_NORMAL)
+    {
+        S32 loc = TYPE_NORMAL;
+        void* ptr = (void*)(base + mOffsets[TYPE_NORMAL]);
+        glVertexAttribPointerARB(loc, 3, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_NORMAL], ptr);
+    }
+    if (data_mask & MAP_TEXCOORD3)
+    {
+        S32 loc = TYPE_TEXCOORD3;
+        void* ptr = (void*)(base + mOffsets[TYPE_TEXCOORD3]);
+        glVertexAttribPointerARB(loc, 2, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TEXCOORD3], ptr);
+    }
+    if (data_mask & MAP_TEXCOORD2)
+    {
+        S32 loc = TYPE_TEXCOORD2;
+        void* ptr = (void*)(base + mOffsets[TYPE_TEXCOORD2]);
+        glVertexAttribPointerARB(loc, 2, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TEXCOORD2], ptr);
+    }
+    if (data_mask & MAP_TEXCOORD1)
+    {
+        S32 loc = TYPE_TEXCOORD1;
+        void* ptr = (void*)(base + mOffsets[TYPE_TEXCOORD1]);
+        glVertexAttribPointerARB(loc, 2, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TEXCOORD1], ptr);
+    }
+    if (data_mask & MAP_TANGENT)
+    {
+        S32 loc = TYPE_TANGENT;
+        void* ptr = (void*)(base + mOffsets[TYPE_TANGENT]);
+        glVertexAttribPointerARB(loc, 4, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TANGENT], ptr);
+    }
+    if (data_mask & MAP_TEXCOORD0)
+    {
+        S32 loc = TYPE_TEXCOORD0;
+        void* ptr = (void*)(base + mOffsets[TYPE_TEXCOORD0]);
+        glVertexAttribPointerARB(loc, 2, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TEXCOORD0], ptr);
+    }
+    if (data_mask & MAP_COLOR)
+    {
+        S32 loc = TYPE_COLOR;
+        //bind emissive instead of color pointer if emissive is present
+        void* ptr = (data_mask & MAP_EMISSIVE) ? (void*)(base + mOffsets[TYPE_EMISSIVE]) : (void*)(base + mOffsets[TYPE_COLOR]);
+        glVertexAttribPointerARB(loc, 4, GL_UNSIGNED_BYTE, GL_TRUE, LLVertexBuffer::sTypeSize[TYPE_COLOR], ptr);
+    }
+    if (data_mask & MAP_EMISSIVE)
+    {
+        S32 loc = TYPE_EMISSIVE;
+        void* ptr = (void*)(base + mOffsets[TYPE_EMISSIVE]);
+        glVertexAttribPointerARB(loc, 4, GL_UNSIGNED_BYTE, GL_TRUE, LLVertexBuffer::sTypeSize[TYPE_EMISSIVE], ptr);
+
+        if (!(data_mask & MAP_COLOR))
+        { //map emissive to color channel when color is not also being bound to avoid unnecessary shader swaps
+            loc = TYPE_COLOR;
+            glVertexAttribPointerARB(loc, 4, GL_UNSIGNED_BYTE, GL_TRUE, LLVertexBuffer::sTypeSize[TYPE_EMISSIVE], ptr);
+        }
+    }
+    if (data_mask & MAP_WEIGHT)
+    {
+        S32 loc = TYPE_WEIGHT;
+        void* ptr = (void*)(base + mOffsets[TYPE_WEIGHT]);
+        glVertexAttribPointerARB(loc, 1, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_WEIGHT], ptr);
+    }
+    if (data_mask & MAP_WEIGHT4)
+    {
+        S32 loc = TYPE_WEIGHT4;
+        void* ptr = (void*)(base + mOffsets[TYPE_WEIGHT4]);
+        glVertexAttribPointerARB(loc, 4, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_WEIGHT4], ptr);
+    }
+    if (data_mask & MAP_CLOTHWEIGHT)
+    {
+        S32 loc = TYPE_CLOTHWEIGHT;
+        void* ptr = (void*)(base + mOffsets[TYPE_CLOTHWEIGHT]);
+        glVertexAttribPointerARB(loc, 4, GL_FLOAT, GL_TRUE, LLVertexBuffer::sTypeSize[TYPE_CLOTHWEIGHT], ptr);
+    }
+    if (data_mask & MAP_TEXTURE_INDEX)
+    {
+#if !LL_DARWIN
+        S32 loc = TYPE_TEXTURE_INDEX;
+        void* ptr = (void*)(base + mOffsets[TYPE_VERTEX] + 12);
+        glVertexAttribIPointer(loc, 1, GL_UNSIGNED_INT, LLVertexBuffer::sTypeSize[TYPE_VERTEX], ptr);
+#endif
+    }
+    if (data_mask & MAP_VERTEX)
+    {
+        S32 loc = TYPE_VERTEX;
+        void* ptr = (void*)(base + mOffsets[TYPE_VERTEX]);
+        glVertexAttribPointerARB(loc, 3, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_VERTEX], ptr);
+    }
+}
+
 LLVertexBuffer::MappedRegion::MappedRegion(S32 type, S32 index, S32 count)
 : mType(type), mIndex(index), mCount(count)
 { 
diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h
index 1d60970df4949017195e05069f13aeb63478a61a..51ed85510e1b1cc4ce7b31d82c7b0642f9705b1f 100644
--- a/indra/llrender/llvertexbuffer.h
+++ b/indra/llrender/llvertexbuffer.h
@@ -210,13 +210,17 @@ class LLVertexBuffer : public LLRefCount, public LLTrace::MemTrackable<LLVertexB
 
 	virtual ~LLVertexBuffer(); // use unref()
 
-	virtual void setupVertexBuffer(U32 data_mask); // pure virtual, called from mapBuffer()
+	virtual void setupVertexBuffer(U32 data_mask);
+    void setupVertexBufferFast(U32 data_mask);
+
 	void setupVertexArray();
 	
 	void	genBuffer(U32 size);
 	void	genIndices(U32 size);
 	bool	bindGLBuffer(bool force_bind = false);
+    bool	bindGLBufferFast();
 	bool	bindGLIndices(bool force_bind = false);
+    bool    bindGLIndicesFast();
 	bool	bindGLArray();
 	void	releaseBuffer();
 	void	releaseIndices();
@@ -239,6 +243,8 @@ class LLVertexBuffer : public LLRefCount, public LLTrace::MemTrackable<LLVertexB
 
 	// set for rendering
 	virtual void	setBuffer(U32 data_mask); 	// calls  setupVertexBuffer() if data_mask is not 0
+    void	setBufferFast(U32 data_mask); 	// calls setupVertexBufferFast(), assumes data_mask is not 0 among other assumptions
+
 	void flush(); //flush pending data to GL memory
 	// allocate buffer
 	bool	allocateBuffer(S32 nverts, S32 nindices, bool create);
@@ -290,6 +296,9 @@ class LLVertexBuffer : public LLRefCount, public LLTrace::MemTrackable<LLVertexB
 	void drawArrays(U32 mode, U32 offset, U32 count) const;
 	void drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const;
 
+    //implementation for inner loops that does no safety checking
+    void drawRangeFast(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const;
+
 	//for debugging, validate data in given range is valid
 	void validateRange(U32 start, U32 end, U32 count, U32 offset) const;
 
diff --git a/indra/newview/lldrawable.cpp b/indra/newview/lldrawable.cpp
index 30c4a21e1cc76ca1808ec32222883a175d430167..495e06b6f7e4a55d4c1f6f61bade5aa862586960 100644
--- a/indra/newview/lldrawable.cpp
+++ b/indra/newview/lldrawable.cpp
@@ -234,8 +234,6 @@ void LLDrawable::markDead()
 
 LLVOVolume* LLDrawable::getVOVolume() const
 {
-	LL_PROFILE_ZONE_SCOPED
-
 	LLViewerObject* objectp = mVObjp;
 	if ( !isDead() && objectp && (objectp->getPCode() == LL_PCODE_VOLUME))
 	{
diff --git a/indra/newview/lldrawpool.cpp b/indra/newview/lldrawpool.cpp
index d583a692f9f26d4ca398ae88baeb75f9f9604d83..3e4f97e494a74b6ce2686be8e9141e4aaf99b2c1 100644
--- a/indra/newview/lldrawpool.cpp
+++ b/indra/newview/lldrawpool.cpp
@@ -404,6 +404,7 @@ void LLRenderPass::renderTexture(U32 type, U32 mask, BOOL batch_textures)
 
 void LLRenderPass::pushBatches(U32 type, U32 mask, BOOL texture, BOOL batch_textures)
 {
+    LL_PROFILE_ZONE_SCOPED;
 	for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)	
 	{
 		LLDrawInfo* pparams = *i;
@@ -452,6 +453,7 @@ void LLRenderPass::applyModelMatrix(const LLDrawInfo& params)
 
 void LLRenderPass::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL batch_textures)
 {
+    LL_PROFILE_ZONE_SCOPED;
     if (!params.mCount)
     {
         return;
@@ -469,7 +471,7 @@ void LLRenderPass::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL ba
 			{
 				if (params.mTextureList[i].notNull())
 				{
-					gGL.getTexUnit(i)->bind(params.mTextureList[i], TRUE);
+					gGL.getTexUnit(i)->bindFast(params.mTextureList[i]);
 				}
 			}
 		}
@@ -477,8 +479,7 @@ void LLRenderPass::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL ba
 		{ //not batching textures or batch has only 1 texture -- might need a texture matrix
 			if (params.mTexture.notNull())
 			{
-				params.mTexture->addTextureStats(params.mVSize);
-				gGL.getTexUnit(0)->bind(params.mTexture, TRUE) ;
+				gGL.getTexUnit(0)->bindFast(params.mTexture);
 				if (params.mTextureMatrix)
 				{
 					tex_setup = true;
@@ -490,24 +491,20 @@ void LLRenderPass::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL ba
 			}
 			else
 			{
-				gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
+				gGL.getTexUnit(0)->unbindFast(LLTexUnit::TT_TEXTURE);
 			}
 		}
 	}
 	
-	if (params.mVertexBuffer.notNull())
-	{
-		if (params.mGroup)
-		{
-			params.mGroup->rebuildMesh();
-		}
+    if (params.mGroup)
+    {
+        params.mGroup->rebuildMesh();
+    }
 
-		LLGLEnableFunc stencil_test(GL_STENCIL_TEST, params.mSelected, &LLGLCommonFunc::selected_stencil_test);
-	
-		params.mVertexBuffer->setBuffer(mask);
-		params.mVertexBuffer->drawRange(params.mDrawMode, params.mStart, params.mEnd, params.mCount, params.mOffset);
-		gPipeline.addTrianglesDrawn(params.mCount, params.mDrawMode);
-	}
+    LLGLEnableFunc stencil_test(GL_STENCIL_TEST, params.mSelected, &LLGLCommonFunc::selected_stencil_test);
+
+    params.mVertexBuffer->setBufferFast(mask);
+    params.mVertexBuffer->drawRangeFast(params.mDrawMode, params.mStart, params.mEnd, params.mCount, params.mOffset);
 
 	if (tex_setup)
 	{
diff --git a/indra/newview/lldrawpoolalpha.cpp b/indra/newview/lldrawpoolalpha.cpp
index 4ee08e869a087c650b66dff9dd3de387e0eba16a..369d7a6bb8ea3a5d27a55b780a93afc7dfcf9bd4 100644
--- a/indra/newview/lldrawpoolalpha.cpp
+++ b/indra/newview/lldrawpoolalpha.cpp
@@ -55,19 +55,7 @@ static BOOL deferred_render = FALSE;
 
 static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_SETUP("Alpha Setup");
 static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_GROUP_LOOP("Alpha Group");
-static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_PUSH("Alpha Push Verts");
 static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_DEFERRED("Alpha Deferred");
-static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_SETBUFFER("Alpha SetBuffer");
-static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_DRAW("Alpha Draw");
-static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_TEX_BINDS("Alpha Tex Binds");
-static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_MATS("Alpha Mat Tex Binds");
-static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_GLOW("Alpha Glow Binds");
-static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_SHADER_BINDS("Alpha Shader Binds");
-static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_DEFERRED_SHADER_BINDS("Alpha Def Binds");
-static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_DEFERRED_TEX_BINDS("Alpha Def Tex Binds");
-static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_MESH_REBUILD("Alpha Mesh Rebuild");
-static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_EMISSIVE("Alpha Emissive");
-static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_LIGHT_SETUP("Alpha Light Setup");
 
 LLDrawPoolAlpha::LLDrawPoolAlpha(U32 type) :
 		LLRenderPass(type), current_shader(NULL), target_shader(NULL),
@@ -86,6 +74,10 @@ LLDrawPoolAlpha::~LLDrawPoolAlpha()
 void LLDrawPoolAlpha::prerender()
 {
 	mShaderLevel = LLViewerShaderMgr::instance()->getShaderLevel(LLViewerShaderMgr::SHADER_OBJECT);
+
+    // TODO: is this even necessay?  These are probably set to never discard
+    LLViewerFetchedTexture::sFlatNormalImagep->addTextureStats(1024.f*1024.f);
+    LLViewerFetchedTexture::sWhiteImagep->addTextureStats(1024.f * 1024.f);
 }
 
 S32 LLDrawPoolAlpha::getNumPostDeferredPasses() 
@@ -309,7 +301,7 @@ void LLDrawPoolAlpha::render(S32 pass)
 		gGL.diffuseColor4f(1,0,0,1);
 				
 		LLViewerFetchedTexture::sSmokeImagep->addTextureStats(1024.f*1024.f);
-		gGL.getTexUnit(0)->bind(LLViewerFetchedTexture::sSmokeImagep, TRUE) ;
+		gGL.getTexUnit(0)->bindFast(LLViewerFetchedTexture::sSmokeImagep);
 		renderAlphaHighlight(LLVertexBuffer::MAP_VERTEX |
 							LLVertexBuffer::MAP_TEXCOORD0);
 
@@ -358,9 +350,8 @@ void LLDrawPoolAlpha::renderAlphaHighlight(U32 mask)
 				{
 					params.mGroup->rebuildMesh();
 				}
-				params.mVertexBuffer->setBuffer(mask);
-				params.mVertexBuffer->drawRange(params.mDrawMode, params.mStart, params.mEnd, params.mCount, params.mOffset);
-				gPipeline.addTrianglesDrawn(params.mCount, params.mDrawMode);
+				params.mVertexBuffer->setBufferFast(mask);
+				params.mVertexBuffer->drawRangeFast(params.mDrawMode, params.mStart, params.mEnd, params.mCount, params.mOffset);
 			}
 		}
 	}
@@ -383,27 +374,23 @@ inline bool IsEmissive(LLDrawInfo& params)
 
 inline void Draw(LLDrawInfo* draw, U32 mask)
 {
-    draw->mVertexBuffer->setBuffer(mask);
+    draw->mVertexBuffer->setBufferFast(mask);
     LLRenderPass::applyModelMatrix(*draw);
-	draw->mVertexBuffer->drawRange(draw->mDrawMode, draw->mStart, draw->mEnd, draw->mCount, draw->mOffset);                    
-    gPipeline.addTrianglesDrawn(draw->mCount, draw->mDrawMode);
+	draw->mVertexBuffer->drawRangeFast(draw->mDrawMode, draw->mStart, draw->mEnd, draw->mCount, draw->mOffset);                    
 }
 
-bool LLDrawPoolAlpha::TexSetup(LLDrawInfo* draw, bool use_shaders, bool use_material, LLGLSLShader* current_shader)
+bool LLDrawPoolAlpha::TexSetup(LLDrawInfo* draw, bool use_material, LLGLSLShader* current_shader)
 {
-    LL_RECORD_BLOCK_TIME(FTM_RENDER_ALPHA_TEX_BINDS);    
-
     bool tex_setup = false;
 
     if (deferred_render && use_material && current_shader)
     {
-        LL_RECORD_BLOCK_TIME(FTM_RENDER_ALPHA_DEFERRED_TEX_BINDS);
         if (draw->mNormalMap)
-		{            
+		{
 			draw->mNormalMap->addTextureStats(draw->mVSize);
 			current_shader->bindTexture(LLShaderMgr::BUMP_MAP, draw->mNormalMap);
 		} 
-						
+
 		if (draw->mSpecularMap)
 		{
 			draw->mSpecularMap->addTextureStats(draw->mVSize);
@@ -412,18 +399,16 @@ bool LLDrawPoolAlpha::TexSetup(LLDrawInfo* draw, bool use_shaders, bool use_mate
     }
     else if (current_shader == simple_shader)
     {
-        LLViewerFetchedTexture::sFlatNormalImagep->addTextureStats(draw->mVSize);	    
-	    LLViewerFetchedTexture::sWhiteImagep->addTextureStats(draw->mVSize);
-        current_shader->bindTexture(LLShaderMgr::BUMP_MAP, LLViewerFetchedTexture::sFlatNormalImagep);						
+        current_shader->bindTexture(LLShaderMgr::BUMP_MAP, LLViewerFetchedTexture::sFlatNormalImagep);
 	    current_shader->bindTexture(LLShaderMgr::SPECULAR_MAP, LLViewerFetchedTexture::sWhiteImagep);
     }
-	if (use_shaders && draw->mTextureList.size() > 1)
+	if (draw->mTextureList.size() > 1)
 	{
 		for (U32 i = 0; i < draw->mTextureList.size(); ++i)
 		{
 			if (draw->mTextureList[i].notNull())
 			{
-				gGL.getTexUnit(i)->bind(draw->mTextureList[i], TRUE);
+				gGL.getTexUnit(i)->bindFast(draw->mTextureList[i]);
 			}
 		}
 	}
@@ -431,16 +416,15 @@ bool LLDrawPoolAlpha::TexSetup(LLDrawInfo* draw, bool use_shaders, bool use_mate
 	{ //not batching textures or batch has only 1 texture -- might need a texture matrix
 		if (draw->mTexture.notNull())
 		{
-			draw->mTexture->addTextureStats(draw->mVSize);
-			if (use_shaders && use_material)
+			if (use_material)
 			{
 				current_shader->bindTexture(LLShaderMgr::DIFFUSE_MAP, draw->mTexture);
 			}
 			else
 			{
-			    gGL.getTexUnit(0)->bind(draw->mTexture, TRUE) ;
+			    gGL.getTexUnit(0)->bindFast(draw->mTexture);
 			}
-						
+
 			if (draw->mTextureMatrix)
 			{
 				tex_setup = true;
@@ -452,7 +436,7 @@ bool LLDrawPoolAlpha::TexSetup(LLDrawInfo* draw, bool use_shaders, bool use_mate
 		}
 		else
 		{
-			gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
+			gGL.getTexUnit(0)->unbindFast(LLTexUnit::TT_TEXTURE);
 		}
 	}
     
@@ -470,37 +454,15 @@ void LLDrawPoolAlpha::RestoreTexSetup(bool tex_setup)
 	}
 }
 
-void LLDrawPoolAlpha::renderSimples(U32 mask, std::vector<LLDrawInfo*>& simples)
-{
-    gPipeline.enableLightsDynamic();
-    simple_shader->bind();
-	simple_shader->bindTexture(LLShaderMgr::BUMP_MAP, LLViewerFetchedTexture::sFlatNormalImagep);
-	simple_shader->bindTexture(LLShaderMgr::SPECULAR_MAP, LLViewerFetchedTexture::sWhiteImagep);
-    simple_shader->uniform4f(LLShaderMgr::SPECULAR_COLOR, 1.0f, 1.0f, 1.0f, 1.0f);
-	simple_shader->uniform1f(LLShaderMgr::ENVIRONMENT_INTENSITY, 0.0f);
-    simple_shader->uniform1f(LLShaderMgr::EMISSIVE_BRIGHTNESS, 0.0f);
-    bool use_shaders = gPipeline.canUseVertexShaders();
-    for (LLDrawInfo* draw : simples)
-    {
-        bool tex_setup = TexSetup(draw, use_shaders, false, simple_shader);
-        LLGLEnableFunc stencil_test(GL_STENCIL_TEST, draw->mSelected, &LLGLCommonFunc::selected_stencil_test);
-		gGL.blendFunc((LLRender::eBlendFactor) draw->mBlendFuncSrc, (LLRender::eBlendFactor) draw->mBlendFuncDst, mAlphaSFactor, mAlphaDFactor);
-
-	    Draw(draw, mask);
-        RestoreTexSetup(tex_setup);
-    }
-    simple_shader->unbind();
-}
-
 void LLDrawPoolAlpha::renderFullbrights(U32 mask, std::vector<LLDrawInfo*>& fullbrights)
 {
     gPipeline.enableLightsFullbright();
     fullbright_shader->bind();
     fullbright_shader->uniform1f(LLShaderMgr::EMISSIVE_BRIGHTNESS, 1.0f);
-    bool use_shaders = gPipeline.canUseVertexShaders();
+    
     for (LLDrawInfo* draw : fullbrights)
     {
-        bool tex_setup = TexSetup(draw, use_shaders, false, fullbright_shader);
+        bool tex_setup = TexSetup(draw, false, fullbright_shader);
 
         LLGLEnableFunc stencil_test(GL_STENCIL_TEST, draw->mSelected, &LLGLCommonFunc::selected_stencil_test);
 		gGL.blendFunc((LLRender::eBlendFactor) draw->mBlendFuncSrc, (LLRender::eBlendFactor) draw->mBlendFuncDst, mAlphaSFactor, mAlphaDFactor);
@@ -511,65 +473,10 @@ void LLDrawPoolAlpha::renderFullbrights(U32 mask, std::vector<LLDrawInfo*>& full
     fullbright_shader->unbind();
 }
 
-void LLDrawPoolAlpha::renderMaterials(U32 mask, std::vector<LLDrawInfo*>& materials)
-{
-    LLGLSLShader::bindNoShader();
-    current_shader = NULL;
-
-    gPipeline.enableLightsDynamic();
-    bool use_shaders = gPipeline.canUseVertexShaders();
-    for (LLDrawInfo* draw : materials)
-    {
-        U32 mask = draw->mShaderMask;
-
-		llassert(mask < LLMaterial::SHADER_COUNT);
-		target_shader = (LLPipeline::sUnderWaterRender) ? &(gDeferredMaterialWaterProgram[mask]) : &(gDeferredMaterialProgram[mask]);
-
-		if (current_shader != target_shader)
-		{
-            LL_RECORD_BLOCK_TIME(FTM_RENDER_ALPHA_DEFERRED_SHADER_BINDS);
-            if (current_shader)
-            {
-                gPipeline.unbindDeferredShader(*current_shader);
-            }
-			gPipeline.bindDeferredShader(*target_shader);
-            current_shader = target_shader;
-		}
-        
-        bool tex_setup = TexSetup(draw, use_shaders, true, current_shader);
-
-        current_shader->uniform4f(LLShaderMgr::SPECULAR_COLOR, draw->mSpecColor.mV[0], draw->mSpecColor.mV[1], draw->mSpecColor.mV[2], draw->mSpecColor.mV[3]);						
-		current_shader->uniform1f(LLShaderMgr::ENVIRONMENT_INTENSITY, draw->mEnvIntensity);
-		current_shader->uniform1f(LLShaderMgr::EMISSIVE_BRIGHTNESS, draw->mFullbright ? 1.f : 0.f);
-
-        {
-            LL_RECORD_BLOCK_TIME(FTM_RENDER_ALPHA_DEFERRED_TEX_BINDS);
-			if (draw->mNormalMap)
-			{
-				draw->mNormalMap->addTextureStats(draw->mVSize);
-				current_shader->bindTexture(LLShaderMgr::BUMP_MAP, draw->mNormalMap);
-			} 
-						
-			if (draw->mSpecularMap)
-			{
-				draw->mSpecularMap->addTextureStats(draw->mVSize);
-				current_shader->bindTexture(LLShaderMgr::SPECULAR_MAP, draw->mSpecularMap);
-			}
-        }
-
-        LLGLEnableFunc stencil_test(GL_STENCIL_TEST, draw->mSelected, &LLGLCommonFunc::selected_stencil_test);
-		gGL.blendFunc((LLRender::eBlendFactor) draw->mBlendFuncSrc, (LLRender::eBlendFactor) draw->mBlendFuncDst, mAlphaSFactor, mAlphaDFactor);
-
-        Draw(draw, mask);
-        RestoreTexSetup(tex_setup);
-    }
-}
-
 void LLDrawPoolAlpha::drawEmissive(U32 mask, LLDrawInfo* draw)
 {
-    draw->mVertexBuffer->setBuffer((mask & ~LLVertexBuffer::MAP_COLOR) | LLVertexBuffer::MAP_EMISSIVE);
-	draw->mVertexBuffer->drawRange(draw->mDrawMode, draw->mStart, draw->mEnd, draw->mCount, draw->mOffset);
-	gPipeline.addTrianglesDrawn(draw->mCount, draw->mDrawMode);
+    draw->mVertexBuffer->setBufferFast((mask & ~LLVertexBuffer::MAP_COLOR) | LLVertexBuffer::MAP_EMISSIVE);
+	draw->mVertexBuffer->drawRangeFast(draw->mDrawMode, draw->mStart, draw->mEnd, draw->mCount, draw->mOffset);
 }
 
 void LLDrawPoolAlpha::drawEmissiveInline(U32 mask, LLDrawInfo* draw)
@@ -599,10 +506,10 @@ void LLDrawPoolAlpha::renderEmissives(U32 mask, std::vector<LLDrawInfo*>& emissi
     // install glow-accumulating blend mode
     // don't touch color, add to alpha (glow)
 	gGL.blendFunc(LLRender::BF_ZERO, LLRender::BF_ONE, LLRender::BF_ONE, LLRender::BF_ONE); 
-    bool use_shaders = gPipeline.canUseVertexShaders();
+ 
     for (LLDrawInfo* draw : emissives)
     {
-        bool tex_setup = TexSetup(draw, use_shaders, false, emissive_shader);
+        bool tex_setup = TexSetup(draw, false, emissive_shader);
         drawEmissive(mask, draw);
         RestoreTexSetup(tex_setup);
     }
@@ -620,8 +527,6 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, S32 pass)
 	BOOL initialized_lighting = FALSE;
 	BOOL light_enabled = TRUE;
 	
-	BOOL use_shaders = gPipeline.canUseVertexShaders();
-		
 	for (LLCullResult::sg_iterator i = gPipeline.beginAlphaGroups(); i != gPipeline.endAlphaGroups(); ++i)
 	{
 		LLSpatialGroup* group = *i;
@@ -631,8 +536,10 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, S32 pass)
 		if (group->getSpatialPartition()->mRenderByGroup &&
 		    !group->isDead())
 		{
-            std::vector<LLDrawInfo*> emissives;
-            std::vector<LLDrawInfo*> fullbrights;
+            static std::vector<LLDrawInfo*> emissives;
+            static std::vector<LLDrawInfo*> fullbrights;
+            emissives.resize(0);
+            fullbrights.resize(0);
 
 			bool is_particle_or_hud_particle = group->getSpatialPartition()->mPartitionType == LLViewerRegion::PARTITION_PARTICLE
 													  || group->getSpatialPartition()->mPartitionType == LLViewerRegion::PARTITION_HUD_PARTICLE;
@@ -649,6 +556,7 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, S32 pass)
 
 			for (LLSpatialGroup::drawmap_elem_t::iterator k = draw_info.begin(); k != draw_info.end(); ++k)	
 			{
+                LL_PROFILE_ZONE_NAMED("ra - push batch")
 				LLDrawInfo& params = **k;
                 U32 have_mask = params.mVertexBuffer->getTypeMask() & mask;
 				if (have_mask != mask)
@@ -696,34 +604,17 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, S32 pass)
 					// Turn off lighting if it hasn't already been so.
 					if (light_enabled || !initialized_lighting)
 					{
-                        LL_RECORD_BLOCK_TIME(FTM_RENDER_ALPHA_LIGHT_SETUP);
-
 						initialized_lighting = TRUE;
-						if (use_shaders) 
-						{
-							target_shader = fullbright_shader;
-						}
-						else
-						{
-							gPipeline.enableLightsFullbright();
-						}
+						target_shader = fullbright_shader;
+
 						light_enabled = FALSE;
 					}
 				}
 				// Turn on lighting if it isn't already.
 				else if (!light_enabled || !initialized_lighting)
 				{
-                    LL_RECORD_BLOCK_TIME(FTM_RENDER_ALPHA_LIGHT_SETUP);
-
 					initialized_lighting = TRUE;
-					if (use_shaders) 
-					{
-						target_shader = simple_shader;
-					}
-					else
-					{
-						gPipeline.enableLightsDynamic();
-					}
+					target_shader = simple_shader;
 					light_enabled = TRUE;
 				}
 
@@ -741,7 +632,6 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, S32 pass)
 
 					if (current_shader != target_shader)
 					{
-                        LL_RECORD_BLOCK_TIME(FTM_RENDER_ALPHA_DEFERRED_SHADER_BINDS);
 						gPipeline.bindDeferredShader(*target_shader);
                         current_shader = target_shader;
 					}
@@ -755,25 +645,19 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, S32 pass)
 					target_shader = fullbright_shader;
 				}
 				
-				if(use_shaders && (current_shader != target_shader))
+				if(current_shader != target_shader)
 				{// If we need shaders, and we're not ALREADY using the proper shader, then bind it
 				// (this way we won't rebind shaders unnecessarily).
-                    LL_RECORD_BLOCK_TIME(FTM_RENDER_ALPHA_SHADER_BINDS);
 					current_shader = target_shader;
 					current_shader->bind();
 				}
-				else if (!use_shaders && current_shader != NULL)
-				{
-					LLGLSLShader::bindNoShader();
-					current_shader = NULL;
-				}
 
                 LLVector4 spec_color(1, 1, 1, 1);
                 F32 env_intensity = 0.0f;
                 F32 brightness = 1.0f;
 
                 // We have a material.  Supply the appropriate data here.
-				if (use_shaders && mat && deferred_render)
+				if (mat && deferred_render)
 				{
 					spec_color    = params.mSpecColor;
                     env_intensity = params.mEnvIntensity;
@@ -792,20 +676,16 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, S32 pass)
 					params.mGroup->rebuildMesh();
 				}
 
-                bool tex_setup = TexSetup(&params, use_shaders, use_shaders && (mat != nullptr), current_shader);
+                bool tex_setup = TexSetup(&params, (mat != nullptr), current_shader);
 
 				{
-					LL_RECORD_BLOCK_TIME(FTM_RENDER_ALPHA_PUSH);
-
 					LLGLEnableFunc stencil_test(GL_STENCIL_TEST, params.mSelected, &LLGLCommonFunc::selected_stencil_test);
 
 					gGL.blendFunc((LLRender::eBlendFactor) params.mBlendFuncSrc, (LLRender::eBlendFactor) params.mBlendFuncDst, mAlphaSFactor, mAlphaDFactor);
-					params.mVertexBuffer->setBuffer(mask & ~(params.mFullbright ? (LLVertexBuffer::MAP_TANGENT | LLVertexBuffer::MAP_TEXCOORD1 | LLVertexBuffer::MAP_TEXCOORD2) : 0));
+					params.mVertexBuffer->setBufferFast(mask & ~(params.mFullbright ? (LLVertexBuffer::MAP_TANGENT | LLVertexBuffer::MAP_TEXCOORD1 | LLVertexBuffer::MAP_TEXCOORD2) : 0));
 
                     {
-                        LL_RECORD_BLOCK_TIME(FTM_RENDER_ALPHA_DRAW);
-					    params.mVertexBuffer->drawRange(params.mDrawMode, params.mStart, params.mEnd, params.mCount, params.mOffset);
-					    gPipeline.addTrianglesDrawn(params.mCount, params.mDrawMode);
+					    params.mVertexBuffer->drawRangeFast(params.mDrawMode, params.mStart, params.mEnd, params.mCount, params.mOffset);
                     }
 				}
 
@@ -814,8 +694,6 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, S32 pass)
 					draw_glow_for_this_partition &&
 					params.mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_EMISSIVE))
 				{
-                    LL_RECORD_BLOCK_TIME(FTM_RENDER_ALPHA_EMISSIVE);
-
                     if (batch_emissives)
                     {
                         emissives.push_back(&params);
@@ -835,19 +713,29 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, S32 pass)
 				}
 			}
 
+
+            bool rebind = false;
             if (batch_fullbrights)
             {
-                light_enabled = false;
-                renderFullbrights(mask, fullbrights);
+                if (!fullbrights.empty())
+                {
+                    light_enabled = false;
+                    renderFullbrights(mask, fullbrights);
+                    rebind = true;
+                }
             }
 
             if (batch_emissives)
             {
-                light_enabled = true;
-                renderEmissives(mask, emissives);
+                if (!emissives.empty())
+                {
+                    light_enabled = true;
+                    renderEmissives(mask, emissives);
+                    rebind = true;
+                }
             }
 
-            if (current_shader)
+            if (current_shader && rebind)
             {
                 current_shader->bind();
             }
diff --git a/indra/newview/lldrawpoolalpha.h b/indra/newview/lldrawpoolalpha.h
index a069f805e827c4d5b175a8a2224b8fa821da6c9d..a50b1d929e40dd51d7c80d74857e2f12e11cdcb9 100644
--- a/indra/newview/lldrawpoolalpha.h
+++ b/indra/newview/lldrawpoolalpha.h
@@ -75,15 +75,13 @@ class LLDrawPoolAlpha: public LLRenderPass
 	LLGLSLShader* fullbright_shader;	
 	LLGLSLShader* emissive_shader;
 
-    void renderSimples(U32 mask, std::vector<LLDrawInfo*>& simples);
     void renderFullbrights(U32 mask, std::vector<LLDrawInfo*>& fullbrights);
-    void renderMaterials(U32 mask, std::vector<LLDrawInfo*>& fullbrights);
     void renderEmissives(U32 mask, std::vector<LLDrawInfo*>& emissives);
 
     void drawEmissive(U32 mask, LLDrawInfo* draw);
     void drawEmissiveInline(U32 mask, LLDrawInfo* draw);
 
-    bool TexSetup(LLDrawInfo* draw, bool use_shaders, bool use_material, LLGLSLShader* current_shader);
+    bool TexSetup(LLDrawInfo* draw, bool use_material, LLGLSLShader* current_shader);
     void RestoreTexSetup(bool tex_setup);
 
 	// our 'normal' alpha blend function for this pass
diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp
index 02ab3162568950be9cf3515ce67510927287bcfd..8dd8c15b876c77f0d6529080480041eaafacbc5c 100644
--- a/indra/newview/lldrawpoolavatar.cpp
+++ b/indra/newview/lldrawpoolavatar.cpp
@@ -1685,7 +1685,7 @@ void LLDrawPoolAvatar::renderAvatars(LLVOAvatar* single_avatar, S32 pass)
 				renderRigged(avatarp, RIGGED_MATERIAL_ALPHA_EMISSIVE);
 				renderRigged(avatarp, RIGGED_NORMMAP);
 				renderRigged(avatarp, RIGGED_NORMMAP_MASK);
-				renderRigged(avatarp, RIGGED_NORMMAP_EMISSIVE);	
+				renderRigged(avatarp, RIGGED_NORMMAP_EMISSIVE);
 				renderRigged(avatarp, RIGGED_SPECMAP);
 				renderRigged(avatarp, RIGGED_SPECMAP_MASK);
 				renderRigged(avatarp, RIGGED_SPECMAP_EMISSIVE);
@@ -2067,56 +2067,12 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
 		LLVector4a* pos = (LLVector4a*) position.get();
 
 		LLVector4a* norm = has_normal ? (LLVector4a*) normal.get() : NULL;
-		
-        if (skin == nullptr)
-        {
-            skin = vobj->getSkinInfo();
-        }
 
-        const MatrixPaletteCache& mpc = updateSkinInfoMatrixPalette(avatar, skin);
+        const MatrixPaletteCache& mpc = updateSkinInfoMatrixPalette(avatar, vobj->getMeshID());
         const LLMatrix4a* mat = &(mpc.mMatrixPalette[0]);
+        const LLMatrix4a& bind_shape_matrix = mpc.mBindShapeMatrix;
 
-        LLSkinningUtil::checkSkinWeights(weights, buffer->getNumVerts(), skin);
-		const LLMatrix4a& bind_shape_matrix = skin->mBindShapeMatrix;
-
-#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
-        U8* joint_indices_cursor = vol_face.mJointIndices;
-        // fast path with joint indices separate from weights
-        if (joint_indices_cursor)
-        {
-            LLMatrix4a src[4];
-		    for (U32 j = 0; j < buffer->getNumVerts(); ++j)
-		    {
-			    LLMatrix4a final_mat;
-                //LLMatrix4a final_mat_correct;
-
-                F32* jw = just_weights[j].getF32ptr();
-
-                LLSkinningUtil::getPerVertexSkinMatrixWithIndices(jw, joint_indices_cursor, mat, final_mat, src);                
-
-                joint_indices_cursor += 4;
-
-			    LLVector4a& v = vol_face.mPositions[j];
-
-			    LLVector4a t;
-			    LLVector4a dst;
-			    bind_shape_matrix.affineTransform(v, t);
-			    final_mat.affineTransform(t, dst);
-			    pos[j] = dst;
-
-			    if (norm)
-			    {
-				    LLVector4a& n = vol_face.mNormals[j];
-				    bind_shape_matrix.rotate(n, t);
-				    final_mat.rotate(t, dst);
-				    dst.normalize3fast();
-				    norm[j] = dst;
-			    }
-		    }
-        }
-        // slow path with joint indices calculated from weights
-        else
-#endif
+        if (!mpc.mMatrixPalette.empty())
         {
             for (U32 j = 0; j < buffer->getNumVerts(); ++j)
 		    {
@@ -2152,9 +2108,7 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
 		return;
 	}
 
-	stop_glerror();
-
-    const LLMeshSkinInfo* lastSkin = nullptr;
+    LLUUID lastMeshId;
 
 	for (U32 i = 0; i < mRiggedFace[type].size(); ++i)
 	{
@@ -2188,19 +2142,6 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
 			continue;
 		}
 
-		const LLMeshSkinInfo* skin = vobj->getSkinInfo();
-		if (!skin)
-		{
-			continue;
-		}
-
-		//stop_glerror();
-
-		//const LLVolumeFace& vol_face = volume->getVolumeFace(te);
-		//updateRiggedFaceVertexBuffer(avatar, face, skin, volume, vol_face);
-		
-		//stop_glerror();
-
 		U32 data_mask = LLFace::getRiggedDataMask(type);
 
 		LLVertexBuffer* buff = face->getVertexBuffer();
@@ -2290,34 +2231,33 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
 		{
 			if (sShaderLevel > 0)
 			{
-                if (lastSkin != skin) // <== only upload matrix palette to GL if the skininfo changed
+                auto& meshId = vobj->getMeshID();
+                
+                if (lastMeshId != meshId) // <== only upload matrix palette to GL if the skininfo changed
                 {
                     // upload matrix palette to shader
-                    const MatrixPaletteCache& mpc = updateSkinInfoMatrixPalette(avatar, skin);
+                    const MatrixPaletteCache& mpc = updateSkinInfoMatrixPalette(avatar, meshId);
                     U32 count = mpc.mMatrixPalette.size();
 
-                    stop_glerror();
+                    if (count == 0)
+                    {
+                        //skin info not loaded yet, don't render
+                        continue;
+                    }
 
                     LLDrawPoolAvatar::sVertexProgram->uniformMatrix3x4fv(LLViewerShaderMgr::AVATAR_MATRIX,
                         count,
                         FALSE,
                         (GLfloat*) &(mpc.mGLMp[0]));
-
-                    stop_glerror();
                 }
+
+                lastMeshId = meshId;
 			}
 			else
 			{
 				data_mask &= ~LLVertexBuffer::MAP_WEIGHT4;
 			}
 
-            lastSkin = skin;
-
-			/*if (glow)
-			{
-				gGL.diffuseColor4f(0,0,0,face->getTextureEntry()->getGlow());
-			}*/
-
 			if (mat)
 			{
 				//order is important here LLRender::DIFFUSE_MAP should be last, becouse it change 
@@ -2332,13 +2272,17 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
                 {
                     specular = face->getTexture(LLRender::SPECULAR_MAP);
                 }
-                if (specular)
+                if (specular && specular_channel >= 0)
                 {
-                    gGL.getTexUnit(specular_channel)->bind(specular);
+                    gGL.getTexUnit(specular_channel)->bindFast(specular);
                 }
                 
-				gGL.getTexUnit(normal_channel)->bind(face->getTexture(LLRender::NORMAL_MAP));
-				gGL.getTexUnit(sDiffuseChannel)->bind(face->getTexture(LLRender::DIFFUSE_MAP), false, true);
+                if (normal_channel >= 0)
+                {
+                    gGL.getTexUnit(normal_channel)->bindFast(face->getTexture(LLRender::NORMAL_MAP));
+                }
+
+				gGL.getTexUnit(sDiffuseChannel)->bindFast(face->getTexture(LLRender::DIFFUSE_MAP));
 
 
 				LLColor4 col = mat->getSpecularLightColor();
@@ -2369,23 +2313,28 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
 					sVertexProgram->setMinimumAlpha(0.f);
 				}
 
-				for (U32 i = 0; i < LLRender::NUM_TEXTURE_CHANNELS; ++i)
-				{
-					LLViewerTexture* tex = face->getTexture(i);
-					if (tex)
-					{
-						tex->addTextureStats(avatar->getPixelArea());
-					}
-				}
+                if (!LLPipeline::sShadowRender && !LLPipeline::sReflectionRender)
+                {
+                    for (U32 i = 0; i < LLRender::NUM_TEXTURE_CHANNELS; ++i)
+                    {
+                        LLViewerTexture* tex = face->getTexture(i);
+                        if (tex)
+                        {
+                            tex->addTextureStats(avatar->getPixelArea());
+                        }
+                    }
+                }
 			}
 			else
 			{
-				gGL.getTexUnit(sDiffuseChannel)->bind(face->getTexture());
 				sVertexProgram->setMinimumAlpha(0.f);
 				if (normal_channel > -1)
 				{
 					LLDrawPoolBump::bindBumpMap(face, normal_channel);
 				}
+
+                gGL.getTexUnit(sDiffuseChannel)->bindFast(face->getTexture());
+
 			}
 
 			if (face->mTextureMatrix && vobj->mTexAnimMode)
@@ -2399,8 +2348,8 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
 				    gGL.loadMatrix((F32*) face->mTextureMatrix->mMatrix);
                 }
 
-				buff->setBuffer(data_mask);
-				buff->drawRange(LLRender::TRIANGLES, start, end, count, offset);
+				buff->setBufferFast(data_mask);
+				buff->drawRangeFast(LLRender::TRIANGLES, start, end, count, offset);
 
                 if (tex_index <= 1)
                 {
@@ -2411,11 +2360,9 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
 			}
 			else
 			{
-				buff->setBuffer(data_mask);
-				buff->drawRange(LLRender::TRIANGLES, start, end, count, offset);		
+				buff->setBufferFast(data_mask);
+				buff->drawRangeFast(LLRender::TRIANGLES, start, end, count, offset);
 			}
-
-			gPipeline.addTrianglesDrawn(count, LLRender::TRIANGLES);
 		}
 	}
 }
@@ -2476,8 +2423,6 @@ void LLDrawPoolAvatar::updateRiggedVertexBuffers(LLVOAvatar* avatar)
 				continue;
 			}
 
-			stop_glerror();
-
 			LLVolumeFace& vol_face = volume->getVolumeFace(te);
 			updateRiggedFaceVertexBuffer(avatar, face, vobj, volume, vol_face);
 		}
@@ -2501,47 +2446,58 @@ void LLDrawPoolAvatar::updateSkinInfoMatrixPalettes(LLVOAvatar* avatarp)
     }
 }
 
-const LLDrawPoolAvatar::MatrixPaletteCache& LLDrawPoolAvatar::updateSkinInfoMatrixPalette(LLVOAvatar * avatarp, const LLMeshSkinInfo* skin)
+const LLDrawPoolAvatar::MatrixPaletteCache& LLDrawPoolAvatar::updateSkinInfoMatrixPalette(LLVOAvatar * avatarp, const LLUUID& meshId)
 {
-    MatrixPaletteCache& entry = mMatrixPaletteCache[skin];
+    MatrixPaletteCache& entry = mMatrixPaletteCache[meshId];
 
     if (entry.mFrame != gFrameCount)
     {
         LL_PROFILE_ZONE_SCOPED;
+
+        const LLMeshSkinInfo* skin = gMeshRepo.getSkinInfo(meshId);
         entry.mFrame = gFrameCount;
-        //build matrix palette
-        U32 count = LLSkinningUtil::getMeshJointCount(skin);
-        entry.mMatrixPalette.resize(count);
-        LLSkinningUtil::initSkinningMatrixPalette(&(entry.mMatrixPalette[0]), count, skin, avatarp);
 
-        const LLMatrix4a* mat = &(entry.mMatrixPalette[0]);
+        if (skin != nullptr)
+        {
+            entry.mBindShapeMatrix = skin->mBindShapeMatrix;
+
+            //build matrix palette
+            U32 count = LLSkinningUtil::getMeshJointCount(skin);
+            entry.mMatrixPalette.resize(count);
+            LLSkinningUtil::initSkinningMatrixPalette(&(entry.mMatrixPalette[0]), count, skin, avatarp);
 
-        stop_glerror();
-        
-        entry.mGLMp.resize(count * 12);
+            const LLMatrix4a* mat = &(entry.mMatrixPalette[0]);
 
-        F32* mp = &(entry.mGLMp[0]);
-        
-        for (U32 i = 0; i < count; ++i)
-        {
-            F32* m = (F32*)mat[i].mMatrix[0].getF32ptr();
+            entry.mGLMp.resize(count * 12);
+
+            F32* mp = &(entry.mGLMp[0]);
+
+            for (U32 i = 0; i < count; ++i)
+            {
+                F32* m = (F32*)mat[i].mMatrix[0].getF32ptr();
 
-            U32 idx = i * 12;
+                U32 idx = i * 12;
 
-            mp[idx + 0] = m[0];
-            mp[idx + 1] = m[1];
-            mp[idx + 2] = m[2];
-            mp[idx + 3] = m[12];
+                mp[idx + 0] = m[0];
+                mp[idx + 1] = m[1];
+                mp[idx + 2] = m[2];
+                mp[idx + 3] = m[12];
 
-            mp[idx + 4] = m[4];
-            mp[idx + 5] = m[5];
-            mp[idx + 6] = m[6];
-            mp[idx + 7] = m[13];
+                mp[idx + 4] = m[4];
+                mp[idx + 5] = m[5];
+                mp[idx + 6] = m[6];
+                mp[idx + 7] = m[13];
 
-            mp[idx + 8] = m[8];
-            mp[idx + 9] = m[9];
-            mp[idx + 10] = m[10];
-            mp[idx + 11] = m[14];
+                mp[idx + 8] = m[8];
+                mp[idx + 9] = m[9];
+                mp[idx + 10] = m[10];
+                mp[idx + 11] = m[14];
+            }
+        }
+        else
+        {
+            entry.mMatrixPalette.resize(0);
+            entry.mGLMp.resize(0);
         }
     }
 
diff --git a/indra/newview/lldrawpoolavatar.h b/indra/newview/lldrawpoolavatar.h
index 0c1ee2cced6d17249a788b261e5c0caab04412f4..800bbc5f62afec6f1f5aa28f1823b8386c8369d5 100644
--- a/indra/newview/lldrawpoolavatar.h
+++ b/indra/newview/lldrawpoolavatar.h
@@ -283,12 +283,13 @@ typedef enum
 
 	std::vector<LLFace*> mRiggedFace[NUM_RIGGED_PASSES];
 
+    LL_ALIGN_PREFIX(16)
     class MatrixPaletteCache
     {
     public:
         U32 mFrame;
         LLMeshSkinInfo::matrix_list_t mMatrixPalette;
-        
+        LL_ALIGN_16(LLMatrix4a mBindShapeMatrix);
         // Float array ready to be sent to GL
         std::vector<F32> mGLMp;
 
@@ -296,11 +297,11 @@ typedef enum
             mFrame(gFrameCount-1)
         {
         }
-    };
+    } LL_ALIGN_POSTFIX(16);
     
-    const MatrixPaletteCache& updateSkinInfoMatrixPalette(LLVOAvatar* avatarp, const LLMeshSkinInfo* skin);
+    const MatrixPaletteCache& updateSkinInfoMatrixPalette(LLVOAvatar* avatarp, const LLUUID& meshId);
 
-    typedef std::unordered_map<const LLMeshSkinInfo*, MatrixPaletteCache> matrix_palette_cache_t;
+    typedef std::unordered_map<LLUUID, MatrixPaletteCache> matrix_palette_cache_t;
     matrix_palette_cache_t mMatrixPaletteCache;
 
 	/*virtual*/ LLViewerTexture *getDebugTexture();
diff --git a/indra/newview/lldrawpoolbump.cpp b/indra/newview/lldrawpoolbump.cpp
index 14069fa6c2bbef8b940093d12efada767f464049..f316d121ab1772129908060e141c391f9b03eb82 100644
--- a/indra/newview/lldrawpoolbump.cpp
+++ b/indra/newview/lldrawpoolbump.cpp
@@ -677,6 +677,7 @@ BOOL LLDrawPoolBump::bindBumpMap(LLFace* face, S32 channel)
 //static
 BOOL LLDrawPoolBump::bindBumpMap(U8 bump_code, LLViewerTexture* texture, F32 vsize, S32 channel)
 {
+    LL_PROFILE_ZONE_SCOPED;
 	//Note: texture atlas does not support bump texture now.
 	LLViewerFetchedTexture* tex = LLViewerTextureManager::staticCastToFetchedTexture(texture) ;
 	if(!tex)
@@ -693,7 +694,7 @@ BOOL LLDrawPoolBump::bindBumpMap(U8 bump_code, LLViewerTexture* texture, F32 vsi
 		break;
 	case BE_BRIGHTNESS: 
 	case BE_DARKNESS:
-		bump = gBumpImageList.getBrightnessDarknessImage( tex, bump_code );		
+		bump = gBumpImageList.getBrightnessDarknessImage( tex, bump_code );
 		break;
 
 	default:
@@ -709,12 +710,12 @@ BOOL LLDrawPoolBump::bindBumpMap(U8 bump_code, LLViewerTexture* texture, F32 vsi
 	{
 		if (channel == -2)
 		{
-			gGL.getTexUnit(1)->bind(bump);
-			gGL.getTexUnit(0)->bind(bump);
+			gGL.getTexUnit(1)->bindFast(bump);
+			gGL.getTexUnit(0)->bindFast(bump);
 		}
 		else
 		{
-			gGL.getTexUnit(channel)->bind(bump);
+			gGL.getTexUnit(channel)->bindFast(bump);
 		}
 
 		return TRUE;
@@ -1497,6 +1498,7 @@ void LLDrawPoolBump::renderBump(U32 type, U32 mask)
 
 void LLDrawPoolBump::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL batch_textures)
 {
+    LL_PROFILE_ZONE_SCOPED;
 	applyModelMatrix(params);
 
 	bool tex_setup = false;
@@ -1507,7 +1509,7 @@ void LLDrawPoolBump::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL
 		{
 			if (params.mTextureList[i].notNull())
 			{
-				gGL.getTexUnit(i)->bind(params.mTextureList[i], TRUE);
+				gGL.getTexUnit(i)->bindFast(params.mTextureList[i]);
 			}
 		}
 	}
@@ -1522,13 +1524,6 @@ void LLDrawPoolBump::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL
 			}
 			else
 			{
-				if (!LLGLSLShader::sNoFixedFunction)
-				{
-					gGL.getTexUnit(1)->activate();
-					gGL.matrixMode(LLRender::MM_TEXTURE);
-					gGL.loadMatrix((GLfloat*) params.mTextureMatrix->mMatrix);
-				}
-
 				gGL.getTexUnit(0)->activate();
 				gGL.matrixMode(LLRender::MM_TEXTURE);
 				gGL.loadMatrix((GLfloat*) params.mTextureMatrix->mMatrix);
@@ -1545,8 +1540,7 @@ void LLDrawPoolBump::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL
 		{
 			if (params.mTexture.notNull())
 			{
-				gGL.getTexUnit(diffuse_channel)->bind(params.mTexture);
-				params.mTexture->addTextureStats(params.mVSize);		
+				gGL.getTexUnit(diffuse_channel)->bindFast(params.mTexture);
 			}
 			else
 			{
@@ -1559,10 +1553,10 @@ void LLDrawPoolBump::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL
 	{
 		params.mGroup->rebuildMesh();
 	}
-	params.mVertexBuffer->setBuffer(mask);
-	params.mVertexBuffer->drawRange(params.mDrawMode, params.mStart, params.mEnd, params.mCount, params.mOffset);
-	gPipeline.addTrianglesDrawn(params.mCount, params.mDrawMode);
-	if (tex_setup)
+	params.mVertexBuffer->setBufferFast(mask);
+	params.mVertexBuffer->drawRangeFast(params.mDrawMode, params.mStart, params.mEnd, params.mCount, params.mOffset);
+
+    if (tex_setup)
 	{
 		if (mShiny)
 		{
@@ -1570,12 +1564,6 @@ void LLDrawPoolBump::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL
 		}
 		else
 		{
-			if (!LLGLSLShader::sNoFixedFunction)
-			{
-				gGL.getTexUnit(1)->activate();
-				gGL.matrixMode(LLRender::MM_TEXTURE);
-				gGL.loadIdentity();
-			}
 			gGL.getTexUnit(0)->activate();
 			gGL.matrixMode(LLRender::MM_TEXTURE);
 		}
diff --git a/indra/newview/lldrawpoolbump.h b/indra/newview/lldrawpoolbump.h
index 476b1d41b7d40d85ac08d6fb353cabe18d4773b3..bab160c34da854e4f50576aec3b9c55890ad7d2b 100644
--- a/indra/newview/lldrawpoolbump.h
+++ b/indra/newview/lldrawpoolbump.h
@@ -32,6 +32,8 @@
 #include "lltextureentry.h"
 #include "lluuid.h"
 
+#include <unordered_map>
+
 class LLImageRaw;
 class LLSpatialGroup;
 class LLDrawInfo;
@@ -161,7 +163,7 @@ class LLBumpImageList
 	static void onSourceLoaded( BOOL success, LLViewerTexture *src_vi, LLImageRaw* src, LLUUID& source_asset_id, EBumpEffect bump );
 
 private:
-	typedef std::map<LLUUID, LLPointer<LLViewerTexture> > bump_image_map_t;
+	typedef std::unordered_map<LLUUID, LLPointer<LLViewerTexture> > bump_image_map_t;
 	bump_image_map_t mBrightnessEntries;
 	bump_image_map_t mDarknessEntries;
 };
diff --git a/indra/newview/lldrawpoolmaterials.cpp b/indra/newview/lldrawpoolmaterials.cpp
index 05b0c1f1a93c6c666bee6916611e082206dccdd9..d2a8757379925f23f1cc395e6dfde7ce8ff2afb0 100644
--- a/indra/newview/lldrawpoolmaterials.cpp
+++ b/indra/newview/lldrawpoolmaterials.cpp
@@ -106,6 +106,7 @@ void LLDrawPoolMaterials::endDeferredPass(S32 pass)
 
 void LLDrawPoolMaterials::renderDeferred(S32 pass)
 {
+    LL_PROFILE_ZONE_SCOPED;
 	static const U32 type_list[] = 
 	{
 		LLRenderPass::PASS_MATERIAL,
@@ -157,7 +158,10 @@ void LLDrawPoolMaterials::renderDeferred(S32 pass)
 		mShader->setMinimumAlpha(params.mAlphaMaskCutoff);
 		mShader->uniform1f(LLShaderMgr::EMISSIVE_BRIGHTNESS, params.mFullbright ? 1.f : 0.f);
 
-		pushBatch(params, mask, TRUE);
+        {
+            LL_PROFILE_ZONE_SCOPED;
+            pushMaterialsBatch(params, mask);
+        }
 	}
 }
 
@@ -171,49 +175,37 @@ void LLDrawPoolMaterials::bindNormalMap(LLViewerTexture* tex)
 	mShader->bindTexture(LLShaderMgr::BUMP_MAP, tex);
 }
 
-void LLDrawPoolMaterials::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL batch_textures)
+void LLDrawPoolMaterials::pushMaterialsBatch(LLDrawInfo& params, U32 mask)
 {
+    LL_PROFILE_ZONE_SCOPED;
 	applyModelMatrix(params);
 	
 	bool tex_setup = false;
 	
-	if (batch_textures && params.mTextureList.size() > 1)
+	//not batching textures or batch has only 1 texture -- might need a texture matrix
+	if (params.mTextureMatrix)
 	{
-		for (U32 i = 0; i < params.mTextureList.size(); ++i)
+		//if (mShiny)
 		{
-			if (params.mTextureList[i].notNull())
-			{
-				gGL.getTexUnit(i)->bind(params.mTextureList[i], TRUE);
-			}
+			gGL.getTexUnit(0)->activate();
+			gGL.matrixMode(LLRender::MM_TEXTURE);
 		}
-	}
-	else
-	{ //not batching textures or batch has only 1 texture -- might need a texture matrix
-		if (params.mTextureMatrix)
-		{
-			//if (mShiny)
-			{
-				gGL.getTexUnit(0)->activate();
-				gGL.matrixMode(LLRender::MM_TEXTURE);
-			}
 			
-			gGL.loadMatrix((GLfloat*) params.mTextureMatrix->mMatrix);
-			gPipeline.mTextureMatrixOps++;
+		gGL.loadMatrix((GLfloat*) params.mTextureMatrix->mMatrix);
+		gPipeline.mTextureMatrixOps++;
 			
-			tex_setup = true;
-		}
+		tex_setup = true;
+	}
 		
-		if (mShaderLevel > 1 && texture)
+	if (mShaderLevel > 1)
+	{
+		if (params.mTexture.notNull())
+		{
+			gGL.getTexUnit(diffuse_channel)->bindFast(params.mTexture);
+		}
+		else
 		{
-			if (params.mTexture.notNull())
-			{
-				gGL.getTexUnit(diffuse_channel)->bind(params.mTexture);
-				params.mTexture->addTextureStats(params.mVSize);
-			}
-			else
-			{
-				gGL.getTexUnit(diffuse_channel)->unbind(LLTexUnit::TT_TEXTURE);
-			}
+			gGL.getTexUnit(diffuse_channel)->unbindFast(LLTexUnit::TT_TEXTURE);
 		}
 	}
 	
@@ -224,9 +216,9 @@ void LLDrawPoolMaterials::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture,
 
 	LLGLEnableFunc stencil_test(GL_STENCIL_TEST, params.mSelected, &LLGLCommonFunc::selected_stencil_test);
 
-	params.mVertexBuffer->setBuffer(mask);
-	params.mVertexBuffer->drawRange(params.mDrawMode, params.mStart, params.mEnd, params.mCount, params.mOffset);
-	gPipeline.addTrianglesDrawn(params.mCount, params.mDrawMode);
+	params.mVertexBuffer->setBufferFast(mask);
+	params.mVertexBuffer->drawRangeFast(params.mDrawMode, params.mStart, params.mEnd, params.mCount, params.mOffset);
+
 	if (tex_setup)
 	{
 		gGL.getTexUnit(0)->activate();
diff --git a/indra/newview/lldrawpoolmaterials.h b/indra/newview/lldrawpoolmaterials.h
index eae1aba87cd8ed375b7f278711ddbb5a56b53d78..6e39821b0771b5e1ff249b3b4c1ed2fa20ccde85 100644
--- a/indra/newview/lldrawpoolmaterials.h
+++ b/indra/newview/lldrawpoolmaterials.h
@@ -69,7 +69,7 @@ class LLDrawPoolMaterials : public LLRenderPass
 	void bindSpecularMap(LLViewerTexture* tex);
 	void bindNormalMap(LLViewerTexture* tex);
 	
-	/*virtual*/ void pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL batch_textures = FALSE);
+	/*virtual*/ void pushMaterialsBatch(LLDrawInfo& params, U32 mask);
 };
 
 #endif //LL_LLDRAWPOOLMATERIALS_H
diff --git a/indra/newview/lldrawpoolsimple.cpp b/indra/newview/lldrawpoolsimple.cpp
index 74e6665a964cdd8b9ef17cfe0417a4188be93427..843288cfb03a4cd546c7f01f93ab8bc2f4c68ee5 100644
--- a/indra/newview/lldrawpoolsimple.cpp
+++ b/indra/newview/lldrawpoolsimple.cpp
@@ -150,13 +150,6 @@ void LLDrawPoolGlow::render(S32 pass)
 	}
 }
 
-void LLDrawPoolGlow::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL batch_textures)
-{
-	//gGL.diffuseColor4ubv(params.mGlowColor.mV);
-	LLRenderPass::pushBatch(params, mask, texture, batch_textures);
-}
-
-
 LLDrawPoolSimple::LLDrawPoolSimple() :
 	LLRenderPass(POOL_SIMPLE)
 {
@@ -471,6 +464,7 @@ void LLDrawPoolSimple::endDeferredPass(S32 pass)
 
 void LLDrawPoolSimple::renderDeferred(S32 pass)
 {
+    LL_PROFILE_ZONE_SCOPED;
 	LLGLDisable blend(GL_BLEND);
 	LLGLDisable alpha_test(GL_ALPHA_TEST);
 
diff --git a/indra/newview/lldrawpoolsimple.h b/indra/newview/lldrawpoolsimple.h
index 608ad9e1eb0856a2095769b117f99fe070ee5cf9..b27cc4babc658caaa536785a9b50c2610d8bbc88 100644
--- a/indra/newview/lldrawpoolsimple.h
+++ b/indra/newview/lldrawpoolsimple.h
@@ -187,7 +187,6 @@ class LLDrawPoolGlow : public LLRenderPass
 	/*virtual*/ S32 getNumPasses();
 
 	void render(S32 pass = 0);
-	void pushBatch(LLDrawInfo& params, U32 mask, BOOL texture = TRUE, BOOL batch_textures = FALSE);
 
 };
 
diff --git a/indra/newview/lldrawpooltree.cpp b/indra/newview/lldrawpooltree.cpp
index 0d5195bdbfcd67211519270c39e3d029658905ed..a1ff020068629c6933177b29797195193acf3a40 100644
--- a/indra/newview/lldrawpooltree.cpp
+++ b/indra/newview/lldrawpooltree.cpp
@@ -153,6 +153,7 @@ void LLDrawPoolTree::beginDeferredPass(S32 pass)
 
 void LLDrawPoolTree::renderDeferred(S32 pass)
 {
+    LL_PROFILE_ZONE_SCOPED;
 	render(pass);
 }
 
diff --git a/indra/newview/llmeshrepository.cpp b/indra/newview/llmeshrepository.cpp
index 8ac64dbd15580806639182a82b21d43b9214e52d..a19d6d0b19309128a3c6db173a7a39b4836605e9 100644
--- a/indra/newview/llmeshrepository.cpp
+++ b/indra/newview/llmeshrepository.cpp
@@ -4045,7 +4045,7 @@ S32 LLMeshRepository::getActualMeshLOD(const LLVolumeParams& mesh_params, S32 lo
 
 const LLMeshSkinInfo* LLMeshRepository::getSkinInfo(const LLUUID& mesh_id, const LLVOVolume* requesting_obj)
 {
-	LL_RECORD_BLOCK_TIME(FTM_MESH_FETCH);
+    LL_PROFILE_ZONE_SCOPED;
     if (mesh_id.notNull())
     {
         skin_map::iterator iter = mSkinMap.find(mesh_id);
@@ -4055,6 +4055,7 @@ const LLMeshSkinInfo* LLMeshRepository::getSkinInfo(const LLUUID& mesh_id, const
         }
 
         //no skin info known about given mesh, try to fetch it
+        if (requesting_obj != nullptr)
         {
             LLMutexLock lock(mMeshMutex);
             //add volume to list of loading meshes
diff --git a/indra/newview/llmeshrepository.h b/indra/newview/llmeshrepository.h
index c1698194cb1725ddac5676dc19c394314454fabc..c0e894fda4d5a0e2a39f22d5a3086b85f4f0c98e 100644
--- a/indra/newview/llmeshrepository.h
+++ b/indra/newview/llmeshrepository.h
@@ -586,7 +586,7 @@ class LLMeshRepository
 
 	S32 getActualMeshLOD(const LLVolumeParams& mesh_params, S32 lod);
 	static S32 getActualMeshLOD(LLSD& header, S32 lod);
-	const LLMeshSkinInfo* getSkinInfo(const LLUUID& mesh_id, const LLVOVolume* requesting_obj);
+	const LLMeshSkinInfo* getSkinInfo(const LLUUID& mesh_id, const LLVOVolume* requesting_obj = nullptr);
 	LLModel::Decomposition* getDecomposition(const LLUUID& mesh_id);
 	void fetchPhysicsShape(const LLUUID& mesh_id);
 	bool hasPhysicsShape(const LLUUID& mesh_id);
diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h
index 919f386d29b5095dd3fa1f4a34ffc8301a138e07..6ef82fac9c3078a1d52aeb4a17639ff17764f4f2 100644
--- a/indra/newview/llspatialpartition.h
+++ b/indra/newview/llspatialpartition.h
@@ -41,6 +41,7 @@
 #include "llviewercamera.h"
 #include "llvector4a.h"
 #include <queue>
+#include <unordered_map>
 
 #define SG_STATE_INHERIT_MASK (OCCLUDED)
 #define SG_INITIAL_STATE_MASK (DIRTY | GEOM_DIRTY)
@@ -216,10 +217,10 @@ class LLSpatialGroup : public LLOcclusionCullingGroup
 	typedef std::vector<LLPointer<LLSpatialGroup> > sg_vector_t;
 	typedef std::vector<LLPointer<LLSpatialBridge> > bridge_list_t;
 	typedef std::vector<LLPointer<LLDrawInfo> > drawmap_elem_t; 
-	typedef std::map<U32, drawmap_elem_t > draw_map_t;	
+	typedef std::unordered_map<U32, drawmap_elem_t > draw_map_t;	
 	typedef std::vector<LLPointer<LLVertexBuffer> > buffer_list_t;
-	typedef std::map<LLFace*, buffer_list_t> buffer_texture_map_t;
-	typedef std::map<U32, buffer_texture_map_t> buffer_map_t;
+	typedef std::unordered_map<LLFace*, buffer_list_t> buffer_texture_map_t;
+	typedef std::unordered_map<U32, buffer_texture_map_t> buffer_map_t;
 
 	struct CompareDistanceGreater
 	{
diff --git a/indra/newview/llviewertexture.cpp b/indra/newview/llviewertexture.cpp
index 274f53a1603c0fbf3a5ef65a084132750bd285ba..949e71a4c98a886952607d8e2530be03d8bac4e2 100644
--- a/indra/newview/llviewertexture.cpp
+++ b/indra/newview/llviewertexture.cpp
@@ -845,14 +845,14 @@ void LLViewerTexture::addTextureStats(F32 virtual_size, BOOL needs_gltexture) co
 	{
 		//flag to reset the values because the old values are used.
 		resetMaxVirtualSizeResetCounter();
-		mMaxVirtualSize = virtual_size;		
-		mAdditionalDecodePriority = 0.f;	
+		mMaxVirtualSize = virtual_size;
+		mAdditionalDecodePriority = 0.f;
 		mNeedsGLTexture = needs_gltexture;
 	}
 	else if (virtual_size > mMaxVirtualSize)
 	{
 		mMaxVirtualSize = virtual_size;
-	}	
+	}
 }
 
 void LLViewerTexture::resetTextureStats()
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index e5a4b0f37466af91f674d9cfcd857c3b807f346b..b86935b0816ccdc6d34cafb4753e11a77fbbbefb 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -3555,7 +3555,7 @@ const LLMeshSkinInfo* LLVOVolume::getSkinInfo() const
 {
     if (getVolume())
     {
-        return gMeshRepo.getSkinInfo(getVolume()->getParams().getSculptID(), this);
+        return gMeshRepo.getSkinInfo(getMeshID(), this);
     }
     else
     {
diff --git a/indra/newview/llvovolume.h b/indra/newview/llvovolume.h
index ce400a34986b8c2e56e32def745e354cfefb1a9c..b8c6f47bbdd12a0e738b431c3671d14fc1a6a093 100644
--- a/indra/newview/llvovolume.h
+++ b/indra/newview/llvovolume.h
@@ -296,6 +296,9 @@ class LLVOVolume : public LLViewerObject
 	BOOL setIsFlexible(BOOL is_flexible);
 
     const LLMeshSkinInfo* getSkinInfo() const;
+
+    //convenience accessor for mesh ID (which is stored in sculpt id for legacy reasons)
+    const LLUUID& getMeshID() const { return getVolume()->getParams().getSculptID(); }
     
     // Extended Mesh Properties
     U32 getExtendedMeshFlags() const;
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index c0b469af81a2e27d3d8ef793a5399c9aefd774e9..5ef3819de4c3c78c4e0a82ac1d33796146c64fcf 100644
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -358,7 +358,6 @@ bool	LLPipeline::sRenderAttachedLights = true;
 bool	LLPipeline::sRenderAttachedParticles = true;
 bool	LLPipeline::sRenderDeferred = false;
 S32		LLPipeline::sVisibleLightCount = 0;
-F32		LLPipeline::sMinRenderSize = 0.f;
 bool	LLPipeline::sRenderingHUDs;
 F32     LLPipeline::sDistortionWaterClipPlaneMargin = 1.0125f;
 
@@ -2550,13 +2549,6 @@ void LLPipeline::markNotCulled(LLSpatialGroup* group, LLCamera& camera)
 		return;
 	}
 
-	const LLVector4a* bounds = group->getBounds();
-	if (sMinRenderSize > 0.f && 
-			llmax(llmax(bounds[1][0], bounds[1][1]), bounds[1][2]) < sMinRenderSize)
-	{
-		return;
-	}
-
 	assertInitialized();
 	
 	if (!group->getSpatialPartition()->mRenderByGroup)
@@ -3480,7 +3472,6 @@ void LLPipeline::stateSort(LLSpatialGroup* group, LLCamera& camera)
 			group->mLastUpdateDistance = group->mDistance;
 		}
 	}
-
 }
 
 void LLPipeline::stateSort(LLSpatialBridge* bridge, LLCamera& camera, BOOL fov_changed)
@@ -3787,6 +3778,27 @@ void renderSoundHighlights(LLDrawable* drawablep)
 }
 }
 
+void LLPipeline::touchTextures(LLDrawInfo* info)
+{
+    LL_PROFILE_ZONE_SCOPED;
+    for (auto& tex : info->mTextureList)
+    {
+        if (tex.notNull())
+        {
+            LLImageGL* gl_tex = tex->getGLTexture();
+            if (gl_tex && gl_tex->updateBindStats(gl_tex->mTextureMemory))
+            {
+                tex->setActive();
+            }
+        }
+    }
+
+    if (info->mTexture.notNull())
+    {
+        info->mTexture->addTextureStats(info->mVSize);
+    }
+}
+
 void LLPipeline::postSort(LLCamera& camera)
 {
 	LL_RECORD_BLOCK_TIME(FTM_STATESORT_POSTSORT);
@@ -3839,20 +3851,14 @@ void LLPipeline::postSort(LLCamera& camera)
 			
 			for (LLSpatialGroup::drawmap_elem_t::iterator k = src_vec.begin(); k != src_vec.end(); ++k)
 			{
-				if (sMinRenderSize > 0.f)
-				{
-					LLVector4a bounds;
-					bounds.setSub((*k)->mExtents[1],(*k)->mExtents[0]);
-
-					if (llmax(llmax(bounds[0], bounds[1]), bounds[2]) > sMinRenderSize)
-					{
-						sCull->pushDrawInfo(j->first, *k);
-					}
-				}
-				else
-				{
-					sCull->pushDrawInfo(j->first, *k);
-				}
+                LLDrawInfo* info = *k;
+				
+				sCull->pushDrawInfo(j->first, info);
+                if (!sShadowRender && !sReflectionRender)
+                {
+                    touchTextures(info);
+                    addTrianglesDrawn(info->mCount, info->mDrawMode);
+                }
 			}
 		}
 
diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h
index 5605d2641097e5f841dfea78ead4e73e1067578a..8ffbddca210883f7d6891c41cdee5294f216bb56 100644
--- a/indra/newview/pipeline.h
+++ b/indra/newview/pipeline.h
@@ -265,6 +265,8 @@ class LLPipeline
 	void stateSort(LLSpatialBridge* bridge, LLCamera& camera, BOOL fov_changed = FALSE);
 	void stateSort(LLDrawable* drawablep, LLCamera& camera);
 	void postSort(LLCamera& camera);
+    //update stats for textures in given DrawInfo
+    void touchTextures(LLDrawInfo* info);
 	void forAllVisibleDrawables(void (*func)(LLDrawable*));
 
 	void renderObjects(U32 type, U32 mask, bool texture = true, bool batch_texture = false);
@@ -596,7 +598,6 @@ class LLPipeline
 	static bool				sRenderAttachedParticles;
 	static bool				sRenderDeferred;
 	static S32				sVisibleLightCount;
-	static F32				sMinRenderSize;
 	static bool				sRenderingHUDs;
     static F32              sDistortionWaterClipPlaneMargin;