diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h
index 76cf9de613e3fc3d6c98d82d819436506c4659ce..2e6f9e2f719e4745e03bf7b73e8fb4075b6f2bd5 100644
--- a/indra/llmath/llvolume.h
+++ b/indra/llmath/llvolume.h
@@ -54,6 +54,7 @@ class LLVolumeTriangle;
 #include "llstrider.h"
 #include "v4coloru.h"
 #include "llrefcount.h"
+#include "llpointer.h"
 #include "llfile.h"
 
 //============================================================================
@@ -919,6 +920,10 @@ class LLVolumeFace
 	LLVector2*  mTexCoords;
 	U16* mIndices;
 
+	//vertex buffer filled in by LLFace to cache this volume face geometry in vram 
+	// (declared as a LLPointer to LLRefCount to avoid dependency on LLVertexBuffer)
+	mutable LLPointer<LLRefCount> mVertexBuffer; 
+
 	std::vector<S32>	mEdge;
 
 	//list of skin weights for rigged volumes
diff --git a/indra/llrender/llcubemap.cpp b/indra/llrender/llcubemap.cpp
index 45a3b18179a8aff6b2646aaf02e5aea96bf05add..32e4c0d18e4906ac59a32d629bfb09b45e86507e 100644
--- a/indra/llrender/llcubemap.cpp
+++ b/indra/llrender/llcubemap.cpp
@@ -81,7 +81,7 @@ void LLCubeMap::initGL()
 		{
 			U32 texname = 0;
 			
-			LLImageGL::generateTextures(1, &texname);
+			LLImageGL::generateTextures(LLTexUnit::TT_CUBE_MAP, 1, &texname);
 
 			for (int i = 0; i < 6; i++)
 			{
diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp
index 639d967853f03ea7c3be6553964ceed3fe0aa4be..3946c439299e9b7004837fa5254822702b725acd 100644
--- a/indra/llrender/llgl.cpp
+++ b/indra/llrender/llgl.cpp
@@ -249,6 +249,12 @@ PFNGLTEXIMAGE3DMULTISAMPLEPROC glTexImage3DMultisample = NULL;
 PFNGLGETMULTISAMPLEFVPROC glGetMultisamplefv = NULL;
 PFNGLSAMPLEMASKIPROC glSampleMaski = NULL;
 
+//transform feedback (4.0 core)
+PFNGLBEGINTRANSFORMFEEDBACKPROC glBeginTransformFeedback = NULL;
+PFNGLENDTRANSFORMFEEDBACKPROC glEndTransformFeedback = NULL;
+PFNGLTRANSFORMFEEDBACKVARYINGSPROC glTransformFeedbackVaryings = NULL;
+PFNGLBINDBUFFERRANGEPROC glBindBufferRange = NULL;
+
 //GL_ARB_debug_output
 PFNGLDEBUGMESSAGECONTROLARBPROC glDebugMessageControlARB = NULL;
 PFNGLDEBUGMESSAGEINSERTARBPROC glDebugMessageInsertARB = NULL;
@@ -421,6 +427,7 @@ LLGLManager::LLGLManager() :
 	mHasDrawBuffers(FALSE),
 	mHasTextureRectangle(FALSE),
 	mHasTextureMultisample(FALSE),
+	mHasTransformFeedback(FALSE),
 	mMaxSampleMaskWords(0),
 	mMaxColorTextureSamples(0),
 	mMaxDepthTextureSamples(0),
@@ -969,6 +976,7 @@ void LLGLManager::initExtensions()
 	mHasTextureRectangle = ExtensionExists("GL_ARB_texture_rectangle", gGLHExts.mSysExts);
 	mHasTextureMultisample = ExtensionExists("GL_ARB_texture_multisample", gGLHExts.mSysExts);
 	mHasDebugOutput = ExtensionExists("GL_ARB_debug_output", gGLHExts.mSysExts);
+	mHasTransformFeedback = mGLVersion >= 4.f ? TRUE : FALSE;
 #if !LL_DARWIN
 	mHasPointParameters = !mIsATI && ExtensionExists("GL_ARB_point_parameters", gGLHExts.mSysExts);
 #endif
@@ -1208,7 +1216,14 @@ void LLGLManager::initExtensions()
 		glTexImage3DMultisample = (PFNGLTEXIMAGE3DMULTISAMPLEPROC) GLH_EXT_GET_PROC_ADDRESS("glTexImage3DMultisample");
 		glGetMultisamplefv = (PFNGLGETMULTISAMPLEFVPROC) GLH_EXT_GET_PROC_ADDRESS("glGetMultisamplefv");
 		glSampleMaski = (PFNGLSAMPLEMASKIPROC) GLH_EXT_GET_PROC_ADDRESS("glSampleMaski");
-	}	
+	}
+	if (mHasTransformFeedback)
+	{
+		glBeginTransformFeedback = (PFNGLBEGINTRANSFORMFEEDBACKPROC) GLH_EXT_GET_PROC_ADDRESS("glBeginTransformFeedback");
+		glEndTransformFeedback = (PFNGLENDTRANSFORMFEEDBACKPROC) GLH_EXT_GET_PROC_ADDRESS("glEndTransformFeedback");
+		glTransformFeedbackVaryings = (PFNGLTRANSFORMFEEDBACKVARYINGSPROC) GLH_EXT_GET_PROC_ADDRESS("glTransformFeedbackVaryings");
+		glBindBufferRange = (PFNGLBINDBUFFERRANGEPROC) GLH_EXT_GET_PROC_ADDRESS("glBindBufferRange");
+	}
 	if (mHasDebugOutput)
 	{
 		glDebugMessageControlARB = (PFNGLDEBUGMESSAGECONTROLARBPROC) GLH_EXT_GET_PROC_ADDRESS("glDebugMessageControlARB");
@@ -2433,3 +2448,65 @@ LLGLSquashToFarClip::~LLGLSquashToFarClip()
 	gGL.matrixMode(LLRender::MM_MODELVIEW);
 }
 
+
+	
+LLGLSyncFence::LLGLSyncFence()
+{
+#ifdef GL_ARB_sync
+	mSync = 0;
+#endif
+}
+
+LLGLSyncFence::~LLGLSyncFence()
+{
+#ifdef GL_ARB_sync
+	if (mSync)
+	{
+		glDeleteSync(mSync);
+	}
+#endif
+}
+
+void LLGLSyncFence::placeFence()
+{
+#ifdef GL_ARB_sync
+	if (mSync)
+	{
+		glDeleteSync(mSync);
+	}
+	mSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+#endif
+}
+
+bool LLGLSyncFence::isCompleted()
+{
+	bool ret = true;
+#ifdef GL_ARB_sync
+	if (mSync)
+	{
+		GLenum status = glClientWaitSync(mSync, 0, 1);
+		if (status == GL_TIMEOUT_EXPIRED)
+		{
+			ret = false;
+		}
+	}
+#endif
+	return ret;
+}
+
+void LLGLSyncFence::wait()
+{
+#ifdef GL_ARB_sync
+	if (mSync)
+	{
+		while (glClientWaitSync(mSync, 0, FENCE_WAIT_TIME_NANOSECONDS) == GL_TIMEOUT_EXPIRED)
+		{ //track the number of times we've waited here
+			static S32 waits = 0;
+			waits++;
+		}
+	}
+#endif
+}
+
+
+
diff --git a/indra/llrender/llgl.h b/indra/llrender/llgl.h
index 9c3a47bd506293aa0d00281d92c7833ff2dc2933..c26b75eff79a800753248fa6506d676d3a5bfa37 100644
--- a/indra/llrender/llgl.h
+++ b/indra/llrender/llgl.h
@@ -104,6 +104,7 @@ class LLGLManager
 	BOOL mHasDepthClamp;
 	BOOL mHasTextureRectangle;
 	BOOL mHasTextureMultisample;
+	BOOL mHasTransformFeedback;
 	S32 mMaxSampleMaskWords;
 	S32 mMaxColorTextureSamples;
 	S32 mMaxDepthTextureSamples;
@@ -418,6 +419,31 @@ class LLGLUpdate
 	virtual void updateGL() = 0;
 };
 
+const U32 FENCE_WAIT_TIME_NANOSECONDS = 10000;  //1 ms
+
+class LLGLFence
+{
+public:
+	virtual void placeFence() = 0;
+	virtual bool isCompleted() = 0;
+	virtual void wait() = 0;
+};
+
+class LLGLSyncFence : public LLGLFence
+{
+public:
+#ifdef GL_ARB_sync
+	GLsync mSync;
+#endif
+	
+	LLGLSyncFence();
+	virtual ~LLGLSyncFence();
+
+	void placeFence();
+	bool isCompleted();
+	void wait();
+};
+
 extern LLMatrix4 gGLObliqueProjectionInverse;
 
 #include "llglstates.h"
diff --git a/indra/llrender/llglheaders.h b/indra/llrender/llglheaders.h
index d61ec707f06c56f9c6c6ad3a56b0e0b480cfd5cd..a0727b8686dbbb2661a70b6e88a91d163c28924f 100644
--- a/indra/llrender/llglheaders.h
+++ b/indra/llrender/llglheaders.h
@@ -528,6 +528,13 @@ extern PFNGLTEXIMAGE3DMULTISAMPLEPROC glTexImage3DMultisample;
 extern PFNGLGETMULTISAMPLEFVPROC glGetMultisamplefv;
 extern PFNGLSAMPLEMASKIPROC glSampleMaski;
 
+//transform feedback (4.0 core)
+extern PFNGLBEGINTRANSFORMFEEDBACKPROC glBeginTransformFeedback;
+extern PFNGLENDTRANSFORMFEEDBACKPROC glEndTransformFeedback;
+extern PFNGLTRANSFORMFEEDBACKVARYINGSPROC glTransformFeedbackVaryings;
+extern PFNGLBINDBUFFERRANGEPROC glBindBufferRange;
+
+
 #elif LL_WINDOWS
 //----------------------------------------------------------------------------
 // LL_WINDOWS
@@ -759,6 +766,12 @@ extern PFNGLTEXIMAGE3DMULTISAMPLEPROC glTexImage3DMultisample;
 extern PFNGLGETMULTISAMPLEFVPROC glGetMultisamplefv;
 extern PFNGLSAMPLEMASKIPROC glSampleMaski;
 
+//transform feedback (4.0 core)
+extern PFNGLBEGINTRANSFORMFEEDBACKPROC glBeginTransformFeedback;
+extern PFNGLENDTRANSFORMFEEDBACKPROC glEndTransformFeedback;
+extern PFNGLTRANSFORMFEEDBACKVARYINGSPROC glTransformFeedbackVaryings;
+extern PFNGLBINDBUFFERRANGEPROC glBindBufferRange;
+
 //GL_ARB_debug_output
 extern PFNGLDEBUGMESSAGECONTROLARBPROC glDebugMessageControlARB;
 extern PFNGLDEBUGMESSAGEINSERTARBPROC glDebugMessageInsertARB;
diff --git a/indra/llrender/llglslshader.cpp b/indra/llrender/llglslshader.cpp
index 4b7e639aed38da9f71478844dd993d983c8a424e..149e8cc548691e4ae6b6c4d7acbc314b7fe27e62 100644
--- a/indra/llrender/llglslshader.cpp
+++ b/indra/llrender/llglslshader.cpp
@@ -129,7 +129,9 @@ void LLGLSLShader::unload()
 }
 
 BOOL LLGLSLShader::createShader(vector<string> * attributes,
-								vector<string> * uniforms)
+								vector<string> * uniforms,
+								U32 varying_count,
+								const char** varyings)
 {
 	//reloading, reset matrix hash values
 	for (U32 i = 0; i < LLRender::NUM_MATRIX_MODES; ++i)
@@ -172,6 +174,11 @@ BOOL LLGLSLShader::createShader(vector<string> * attributes,
 		mFeatures.mIndexedTextureChannels = llmin(mFeatures.mIndexedTextureChannels, 1);
 	}
 
+	if (varying_count > 0 && varyings)
+	{
+		glTransformFeedbackVaryings(mProgramObject, varying_count, varyings, GL_INTERLEAVED_ATTRIBS);
+	}
+
 	// Map attributes and uniforms
 	if (success)
 	{
diff --git a/indra/llrender/llglslshader.h b/indra/llrender/llglslshader.h
index 7873fe3c4ee8e8d27bc0834a82f4fe5c70442d23..5c68cb46eb3b79a9811bc16a9cb146af902a6dc5 100644
--- a/indra/llrender/llglslshader.h
+++ b/indra/llrender/llglslshader.h
@@ -76,7 +76,9 @@ class LLGLSLShader
 
 	void unload();
 	BOOL createShader(std::vector<std::string> * attributes,
-						std::vector<std::string> * uniforms);
+						std::vector<std::string> * uniforms,
+						U32 varying_count = 0,
+						const char** varyings = NULL);
 	BOOL attachObject(std::string object);
 	void attachObject(GLhandleARB object);
 	void attachObjects(GLhandleARB* objects = NULL, S32 count = 0);
diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp
index 17131c9d8a0184543e3740ac9c4d7237458d8db7..3bdee6cade0e18e86a0f59cad4a44f91998755e7 100644
--- a/indra/llrender/llimagegl.cpp
+++ b/indra/llrender/llimagegl.cpp
@@ -51,7 +51,8 @@ S32 LLImageGL::sGlobalTextureMemoryInBytes		= 0;
 S32 LLImageGL::sBoundTextureMemoryInBytes		= 0;
 S32 LLImageGL::sCurBoundTextureMemory	= 0;
 S32 LLImageGL::sCount					= 0;
-std::list<U32> LLImageGL::sDeadTextureList;
+std::list<U32> LLImageGL::sDeadTextureList[LLTexUnit::TT_NONE];
+U32 LLImageGL::sCurTexName = 1;
 
 BOOL LLImageGL::sGlobalUseAnisotropic	= FALSE;
 F32 LLImageGL::sLastFrameTime			= 0.f;
@@ -1093,23 +1094,49 @@ BOOL LLImageGL::setSubImageFromFrameBuffer(S32 fb_x, S32 fb_y, S32 x_pos, S32 y_
 }
 
 // static
-void LLImageGL::generateTextures(S32 numTextures, U32 *textures)
+void LLImageGL::generateTextures(LLTexUnit::eTextureType type, S32 numTextures, U32 *textures)
 {
-	glGenTextures(numTextures, (GLuint*)textures);
+	for (S32 i = 0; i < numTextures; ++i)
+	{
+		if (!sDeadTextureList[type].empty())
+		{
+			textures[i] = sDeadTextureList[type].front();
+			sDeadTextureList[type].pop_front();
+		}
+		else
+		{
+			textures[i] = sCurTexName++;
+		}
+	}
 }
 
 // static
-void LLImageGL::deleteTextures(S32 numTextures, U32 *textures, bool immediate)
+void LLImageGL::deleteTextures(LLTexUnit::eTextureType type, S32 numTextures, U32 *textures, bool immediate)
 {
-	for (S32 i = 0; i < numTextures; i++)
-	{
-		sDeadTextureList.push_back(textures[i]);
-	}
+	for (S32 i = 0; i < numTextures; ++i)
+	{ //remove texture from VRAM by setting its size to zero
+		gGL.getTexUnit(0)->bindManual(type, textures[i]);
 
-	if (immediate)
+		if (type == LLTexUnit::TT_CUBE_MAP)
+		{
+			glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, GL_RGBA, 0, 0, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+			glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, GL_RGBA, 0, 0, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+			glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, GL_RGBA, 0, 0, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+			glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, GL_RGBA, 0, 0, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+			glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, GL_RGBA, 0, 0, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+			glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, GL_RGBA, 0, 0, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+		}
+		else
+		{
+			glTexImage2D(LLTexUnit::getInternalType(type), 0, GL_RGBA, 0, 0, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+		}
+		sDeadTextureList[type].push_back(textures[i]);
+	}
+	
+	/*if (immediate)
 	{
 		LLImageGL::deleteDeadTextures();
-	}
+	}*/
 }
 
 // static
@@ -1234,10 +1261,11 @@ BOOL LLImageGL::createGLTexture()
 
 	if(mTexName)
 	{
-		glDeleteTextures(1, (reinterpret_cast<GLuint*>(&mTexName))) ;
+		LLImageGL::deleteTextures(mBindTarget, 1, (reinterpret_cast<GLuint*>(&mTexName))) ;
 	}
 	
-	glGenTextures(1, (GLuint*)&mTexName);
+
+	LLImageGL::generateTextures(mBindTarget, 1, &mTexName);
 	stop_glerror();
 	if (!mTexName)
 	{
@@ -1350,7 +1378,7 @@ BOOL LLImageGL::createGLTexture(S32 discard_level, const U8* data_in, BOOL data_
 	}
 	else
 	{
-		LLImageGL::generateTextures(1, &mTexName);
+		LLImageGL::generateTextures(mBindTarget, 1, &mTexName);
 		stop_glerror();
 		{
 			llverify(gGL.getTexUnit(0)->bind(this));
@@ -1400,7 +1428,7 @@ BOOL LLImageGL::createGLTexture(S32 discard_level, const U8* data_in, BOOL data_
 			decTextureCounter(mTextureMemory, mComponents, mCategory) ;
 		}
 
-		LLImageGL::deleteTextures(1, &old_name);
+		LLImageGL::deleteTextures(mBindTarget,1, &old_name);
 
 		stop_glerror();
 	}
@@ -1533,7 +1561,7 @@ void LLImageGL::deleteDeadTextures()
 {
 	bool reset = false;
 
-	while (!sDeadTextureList.empty())
+	/*while (!sDeadTextureList.empty())
 	{
 		GLuint tex = sDeadTextureList.front();
 		sDeadTextureList.pop_front();
@@ -1555,7 +1583,7 @@ void LLImageGL::deleteDeadTextures()
 		
 		glDeleteTextures(1, &tex);
 		stop_glerror();
-	}
+	}*/
 
 	if (reset)
 	{
@@ -1577,7 +1605,7 @@ void LLImageGL::destroyGLTexture()
 			mTextureMemory = 0;
 		}
 		
-		LLImageGL::deleteTextures(1, &mTexName);			
+		LLImageGL::deleteTextures(mBindTarget, 1, &mTexName);			
 		mTexName = 0;
 		mCurrentDiscardLevel = -1 ; //invalidate mCurrentDiscardLevel.
 		mGLTextureCreated = FALSE ;
diff --git a/indra/llrender/llimagegl.h b/indra/llrender/llimagegl.h
index e23005fe29d53d3d9590977d11d03d6b4d98b9b0..8c9cea111ee11c01d2cc0e8300a6974a88f09ef9 100644
--- a/indra/llrender/llimagegl.h
+++ b/indra/llrender/llimagegl.h
@@ -45,8 +45,12 @@ class LLImageGL : public LLRefCount
 {
 	friend class LLTexUnit;
 public:
-	static std::list<U32> sDeadTextureList;
+	static U32 sCurTexName;
+	static std::list<U32> sDeadTextureList[LLTexUnit::TT_NONE];
 
+	// These 2 functions replace glGenTextures() and glDeleteTextures()
+	static void generateTextures(LLTexUnit::eTextureType type, S32 numTextures, U32 *textures);
+	static void deleteTextures(LLTexUnit::eTextureType type, S32 numTextures, U32 *textures, bool immediate = false);
 	static void deleteDeadTextures();
 
 	// Size calculation
@@ -96,10 +100,6 @@ class LLImageGL : public LLRefCount
 	void setComponents(S32 ncomponents) { mComponents = (S8)ncomponents ;}
 	void setAllowCompression(bool allow) { mAllowCompression = allow; }
 
-	// These 3 functions currently wrap glGenTextures(), glDeleteTextures(), and glTexImage2D() 
-	// for tracking purposes and will be deprecated in the future
-	static void generateTextures(S32 numTextures, U32 *textures);
-	static void deleteTextures(S32 numTextures, U32 *textures, bool immediate = false);
 	static void setManualImage(U32 target, S32 miplevel, S32 intformat, S32 width, S32 height, U32 pixformat, U32 pixtype, const void *pixels, bool allow_compression = true);
 
 	BOOL createGLTexture() ;
diff --git a/indra/llrender/llrendertarget.cpp b/indra/llrender/llrendertarget.cpp
index 780f1dc484ef082504a454416710e6432a3e43e1..f0dd6f3bd666bd4037d306ebe6529a3dbc8f97f6 100644
--- a/indra/llrender/llrendertarget.cpp
+++ b/indra/llrender/llrendertarget.cpp
@@ -135,7 +135,7 @@ bool LLRenderTarget::addColorAttachment(U32 color_fmt)
 	}
 
 	U32 tex;
-	LLImageGL::generateTextures(1, &tex);
+	LLImageGL::generateTextures(mUsage, 1, &tex);
 	gGL.getTexUnit(0)->bindManual(mUsage, tex);
 
 	stop_glerror();
@@ -217,7 +217,7 @@ bool LLRenderTarget::allocateDepth()
 	}
 	else
 	{
-		LLImageGL::generateTextures(1, &mDepth);
+		LLImageGL::generateTextures(mUsage, 1, &mDepth);
 		gGL.getTexUnit(0)->bindManual(mUsage, mDepth);
 		
 		U32 internal_type = LLTexUnit::getInternalType(mUsage);
@@ -294,7 +294,7 @@ void LLRenderTarget::release()
 		}
 		else
 		{
-			LLImageGL::deleteTextures(1, &mDepth, true);
+			LLImageGL::deleteTextures(mUsage, 1, &mDepth, true);
 			stop_glerror();
 		}
 		mDepth = 0;
@@ -326,7 +326,7 @@ void LLRenderTarget::release()
 	if (mTex.size() > 0)
 	{
 		sBytesAllocated -= mResX*mResY*4*mTex.size();
-		LLImageGL::deleteTextures(mTex.size(), &mTex[0], true);
+		LLImageGL::deleteTextures(mUsage, mTex.size(), &mTex[0], true);
 		mTex.clear();
 	}
 	
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 7b12304967102e0baae9b68b566b57ae48b50473..6a218e7734358889ddca7f039eaebfa4deecc327 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -93,6 +93,11 @@ LLVBOPool LLVertexBuffer::sDynamicIBOPool(GL_DYNAMIC_DRAW_ARB, GL_ELEMENT_ARRAY_
 
 U32 LLVBOPool::sBytesPooled = 0;
 U32 LLVBOPool::sIndexBytesPooled = 0;
+U32 LLVBOPool::sCurGLName = 1;
+
+std::list<U32> LLVertexBuffer::sAvailableVAOName;
+U32 LLVertexBuffer::sCurVAOName = 1;
+
 U32 LLVertexBuffer::sAllocatedIndexBytes = 0;
 U32 LLVertexBuffer::sIndexCount = 0;
 
@@ -117,59 +122,38 @@ bool LLVertexBuffer::sUseStreamDraw = true;
 bool LLVertexBuffer::sUseVAO = false;
 bool LLVertexBuffer::sPreferStreamDraw = false;
 
-const U32 FENCE_WAIT_TIME_NANOSECONDS = 10000;  //1 ms
 
-class LLGLSyncFence : public LLGLFence
+U32 LLVBOPool::genBuffer()
 {
-public:
-#ifdef GL_ARB_sync
-	GLsync mSync;
-#endif
-	
-	LLGLSyncFence()
-	{
-#ifdef GL_ARB_sync
-		mSync = 0;
-#endif
-	}
+	U32 ret = 0;
 
-	virtual ~LLGLSyncFence()
+	if (mGLNamePool.empty())
 	{
-#ifdef GL_ARB_sync
-		if (mSync)
-		{
-			glDeleteSync(mSync);
-		}
-#endif
+		ret = sCurGLName++;
 	}
-
-	void placeFence()
+	else
 	{
-#ifdef GL_ARB_sync
-		if (mSync)
-		{
-			glDeleteSync(mSync);
-		}
-		mSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-#endif
+		ret = mGLNamePool.front();
+		mGLNamePool.pop_front();
 	}
 
-	void wait()
-	{
-#ifdef GL_ARB_sync
-		if (mSync)
-		{
-			while (glClientWaitSync(mSync, 0, FENCE_WAIT_TIME_NANOSECONDS) == GL_TIMEOUT_EXPIRED)
-			{ //track the number of times we've waited here
-				static S32 waits = 0;
-				waits++;
-			}
-		}
-#endif
-	}
+	return ret;
+}
+
+void LLVBOPool::deleteBuffer(U32 name)
+{
+	LLVertexBuffer::unbind();
 
+	glBindBufferARB(mType, name);
+	glBufferDataARB(mType, 0, NULL, mUsage);
+
+	llassert(std::find(mGLNamePool.begin(), mGLNamePool.end(), name) == mGLNamePool.end());
+
+	mGLNamePool.push_back(name);
+
+	LLVertexBuffer::unbind();
+}
 
-};
 
 LLVBOPool::LLVBOPool(U32 vboUsage, U32 vboType)
 : mUsage(vboUsage), mType(vboType)
@@ -178,6 +162,9 @@ LLVBOPool::LLVBOPool(U32 vboUsage, U32 vboType)
 	std::fill(mMissCount.begin(), mMissCount.end(), 0);
 }
 
+static LLFastTimer::DeclareTimer FTM_VBO_GEN_BUFFER("gen buffers");
+static LLFastTimer::DeclareTimer FTM_VBO_BUFFER_DATA("glBufferData");
+
 
 volatile U8* LLVBOPool::allocate(U32& name, U32 size, bool for_seed)
 {
@@ -198,7 +185,10 @@ volatile U8* LLVBOPool::allocate(U32& name, U32 size, bool for_seed)
 	if (mFreeList[i].empty() || for_seed)
 	{
 		//make a new buffer
-		glGenBuffersARB(1, &name);
+		{
+			LLFastTimer t(FTM_VBO_GEN_BUFFER);
+			name = genBuffer();
+		}
 		glBindBufferARB(mType, name);
 
 		if (!for_seed && i < LL_VBO_POOL_SEED_COUNT)
@@ -222,6 +212,7 @@ volatile U8* LLVBOPool::allocate(U32& name, U32 size, bool for_seed)
 		}
 		else
 		{ //always use a true hint of static draw when allocating non-client-backed buffers
+			LLFastTimer t(FTM_VBO_BUFFER_DATA);
 			glBufferDataARB(mType, size, 0, GL_STATIC_DRAW_ARB);
 		}
 
@@ -324,7 +315,7 @@ void LLVBOPool::release(U32 name, volatile U8* buffer, U32 size)
 		mFreeList[i].push_back(rec);
 	}
 #else //no pooling
-	glDeleteBuffersARB(1, &name);
+	deleteBuffer(name);
 	ll_aligned_free_16((U8*) buffer);
 
 	if (mType == GL_ARRAY_BUFFER_ARB)
@@ -375,8 +366,8 @@ void LLVBOPool::cleanup()
 		{
 			Record& r = l.front();
 
-			glDeleteBuffersARB(1, &r.mGLName);
-
+			deleteBuffer(r.mGLName);
+			
 			if (r.mClientData)
 			{
 				ll_aligned_free_16((void*) r.mClientData);
@@ -434,6 +425,30 @@ U32 LLVertexBuffer::sGLMode[LLRender::NUM_MODES] =
 	GL_LINE_LOOP,
 };
 
+//static
+U32 LLVertexBuffer::getVAOName()
+{
+	U32 ret = 0;
+
+	if (!sAvailableVAOName.empty())
+	{
+		ret = sAvailableVAOName.front();
+		sAvailableVAOName.pop_front();
+	}
+	else
+	{
+		glGenVertexArrays(1, &ret);
+	}
+
+	return ret;		
+}
+
+//static
+void LLVertexBuffer::releaseVAOName(U32 name)
+{
+	sAvailableVAOName.push_back(name);
+}
+
 
 //static
 void LLVertexBuffer::seedPools()
@@ -1052,7 +1067,7 @@ LLVertexBuffer::~LLVertexBuffer()
 	if (mGLArray)
 	{
 #if GL_ARB_vertex_array_object
-		glDeleteVertexArrays(1, &mGLArray);
+		releaseVAOName(mGLArray);
 #endif
 	}
 
@@ -1337,7 +1352,7 @@ void LLVertexBuffer::allocateBuffer(S32 nverts, S32 nindices, bool create)
 		if (gGLManager.mHasVertexArrayObject && useVBOs() && (LLRender::sGLCoreProfile || sUseVAO))
 		{
 #if GL_ARB_vertex_array_object
-			glGenVertexArrays(1, &mGLArray);
+			mGLArray = getVAOName();
 #endif
 			setupVertexArray();
 		}
@@ -2207,6 +2222,14 @@ void LLVertexBuffer::flush()
 	}
 }
 
+// bind for transform feedback (quick 'n dirty)
+void LLVertexBuffer::bindForFeedback(U32 channel, U32 type, U32 index, U32 count)
+{
+	U32 offset = mOffsets[type] + sTypeSize[type]*index;
+	U32 size= (sTypeSize[type]*count);
+	glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, channel, mGLBuffer, offset, size);
+}
+
 // Set for rendering
 void LLVertexBuffer::setBuffer(U32 data_mask)
 {
diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h
index a64daa1a90669f7dd3c6314084a831cd99ba4105..11fa4ab6a0da3b7862298801072572cf9dd0b3dd 100644
--- a/indra/llrender/llvertexbuffer.h
+++ b/indra/llrender/llvertexbuffer.h
@@ -57,6 +57,8 @@ class LLVBOPool
 	static U32 sBytesPooled;
 	static U32 sIndexBytesPooled;
 	
+	static U32 sCurGLName;
+
 	LLVBOPool(U32 vboUsage, U32 vboType);
 		
 	const U32 mUsage;
@@ -74,6 +76,9 @@ class LLVBOPool
 	//destroy all records in mFreeList
 	void cleanup();
 
+	U32 genBuffer();
+	void deleteBuffer(U32 name);
+
 	class Record
 	{
 	public:
@@ -81,18 +86,14 @@ class LLVBOPool
 		volatile U8* mClientData;
 	};
 
+	std::list<U32> mGLNamePool;
+
 	typedef std::list<Record> record_list_t;
 	std::vector<record_list_t> mFreeList;
 	std::vector<U32> mMissCount;
 
 };
 
-class LLGLFence
-{
-public:
-	virtual void placeFence() = 0;
-	virtual void wait() = 0;
-};
 
 //============================================================================
 // base class 
@@ -127,15 +128,22 @@ class LLVertexBuffer : public LLRefCount
 	static LLVBOPool sStreamIBOPool;
 	static LLVBOPool sDynamicIBOPool;
 
+	static std::list<U32> sAvailableVAOName;
+	static U32 sCurVAOName;
+
 	static bool	sUseStreamDraw;
 	static bool sUseVAO;
 	static bool	sPreferStreamDraw;
 
 	static void seedPools();
 
+	static U32 getVAOName();
+	static void releaseVAOName(U32 name);
+
 	static void initClass(bool use_vbo, bool no_vbo_mapping);
 	static void cleanupClass();
 	static void setupClientArrays(U32 data_mask);
+	static void pushPositions(U32 mode, const LLVector4a* pos, U32 count);
 	static void drawArrays(U32 mode, const std::vector<LLVector3>& pos, const std::vector<LLVector3>& norm);
 	static void drawElements(U32 mode, const LLVector4a* pos, const LLVector2* tc, S32 num_indices, const U16* indicesp);
 
@@ -212,7 +220,6 @@ class LLVertexBuffer : public LLRefCount
 	void 	destroyGLIndices();
 	void	updateNumVerts(S32 nverts);
 	void	updateNumIndices(S32 nindices); 
-	bool	useVBOs() const;
 	void	unmapBuffer();
 		
 public:
@@ -222,6 +229,8 @@ class LLVertexBuffer : public LLRefCount
 	volatile U8*		mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range);
 	volatile U8*		mapIndexBuffer(S32 index, S32 count, bool map_range);
 
+	void bindForFeedback(U32 channel, U32 type, U32 index, U32 count);
+
 	// set for rendering
 	virtual void	setBuffer(U32 data_mask); 	// calls  setupVertexBuffer() if data_mask is not 0
 	void flush(); //flush pending data to GL memory
@@ -244,12 +253,14 @@ class LLVertexBuffer : public LLRefCount
 	bool getNormalStrider(LLStrider<LLVector3>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getBinormalStrider(LLStrider<LLVector3>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getColorStrider(LLStrider<LLColor4U>& strider, S32 index=0, S32 count = -1, bool map_range = false);
+	bool getTextureIndexStrider(LLStrider<LLColor4U>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getEmissiveStrider(LLStrider<LLColor4U>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getWeightStrider(LLStrider<F32>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getWeight4Strider(LLStrider<LLVector4>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getClothWeightStrider(LLStrider<LLVector4>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	
 
+	bool useVBOs() const;
 	bool isEmpty() const					{ return mEmpty; }
 	bool isLocked() const					{ return mVertexLocked || mIndexLocked; }
 	S32 getNumVerts() const					{ return mNumVerts; }
diff --git a/indra/newview/app_settings/settings.xml b/indra/newview/app_settings/settings.xml
index bd109120cff1533bd3bb6f50dda7feb66bb0289b..8cb456b4fd951ceed74c0d8ed7afa1fbd11578a7 100644
--- a/indra/newview/app_settings/settings.xml
+++ b/indra/newview/app_settings/settings.xml
@@ -9162,7 +9162,7 @@
     <key>RenderUseVAO</key>
     <map>
       <key>Comment</key>
-      <string>Use GL Vertex Array Objects</string>
+      <string>[EXPERIMENTAL] Use GL Vertex Array Objects</string>
       <key>Persist</key>
       <integer>1</integer>
       <key>Type</key>
@@ -9170,7 +9170,19 @@
       <key>Value</key>
       <integer>0</integer>
     </map>
-    <key>RenderVBOMappingDisable</key>
+  <key>RenderUseTransformFeedback</key>
+  <map>
+    <key>Comment</key>
+    <string>[EXPERIMENTAL] Use transform feedback shaders for LoD updates</string>
+    <key>Persist</key>
+    <integer>1</integer>
+    <key>Type</key>
+    <string>Boolean</string>
+    <key>Value</key>
+    <integer>0</integer>
+  </map>
+
+  <key>RenderVBOMappingDisable</key>
     <map>
       <key>Comment</key>
       <string>Disable VBO glMapBufferARB</string>
@@ -12140,6 +12152,17 @@
       <key>Value</key>
       <integer>1</integer>
     </map>
+  <key>RenderSynchronousOcclusion</key>
+  <map>
+    <key>Comment</key>
+    <string>Don't let occlusion queries get more than one frame behind (block until they complete).</string>
+    <key>Persist</key>
+    <integer>1</integer>
+    <key>Type</key>
+    <string>Boolean</string>
+    <key>Value</key>
+    <integer>1</integer>
+  </map>
     <key>RenderDelayVBUpdate</key>
     <map>
       <key>Comment</key>
diff --git a/indra/newview/lldrawpoolalpha.cpp b/indra/newview/lldrawpoolalpha.cpp
index 5b62dbc560c197bc558585544f54bda5a00343be..a56e71baa8a9f0342acf971a94cc729367e45dce 100644
--- a/indra/newview/lldrawpoolalpha.cpp
+++ b/indra/newview/lldrawpoolalpha.cpp
@@ -405,6 +405,12 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask)
 			{
 				LLDrawInfo& params = **k;
 
+				if ((params.mVertexBuffer->getTypeMask() & mask) != mask)
+				{ //FIXME!
+					//llwarns << "Missing required components, skipping render batch." << llendl;
+					continue;
+				}
+
 				LLRenderPass::applyModelMatrix(params);
 
 				
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index 97b832520d0d570baf99683849e48da88fcdbf14..2824b1a32d7868edda62b3566bec594bb2528be8 100644
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -50,6 +50,8 @@
 #include "pipeline.h"
 #include "llviewerregion.h"
 #include "llviewerwindow.h"
+#include "llviewershadermgr.h"
+
 
 #define LL_MAX_INDICES_COUNT 1000000
 
@@ -57,7 +59,6 @@ BOOL LLFace::sSafeRenderSelect = TRUE; // FALSE
 
 #define DOTVEC(a,b) (a.mV[0]*b.mV[0] + a.mV[1]*b.mV[1] + a.mV[2]*b.mV[2])
 
-
 /*
 For each vertex, given:
 	B - binormal
@@ -1111,6 +1112,73 @@ bool LLFace::canRenderAsMask()
 }
 
 
+static LLFastTimer::DeclareTimer FTM_FACE_GEOM_VOLUME("Volume VB Cache");
+
+//static 
+void LLFace::cacheFaceInVRAM(const LLVolumeFace& vf)
+{
+	LLFastTimer t(FTM_FACE_GEOM_VOLUME);
+	U32 mask = LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0 |
+				LLVertexBuffer::MAP_BINORMAL | LLVertexBuffer::MAP_NORMAL;
+	
+	if (vf.mWeights)
+	{
+		mask |= LLVertexBuffer::MAP_WEIGHT4;
+	}
+
+	LLVertexBuffer* buff = new LLVertexBuffer(mask, GL_STATIC_DRAW_ARB);
+	vf.mVertexBuffer = buff;
+
+	buff->allocateBuffer(vf.mNumVertices, 0, true);
+
+	LLStrider<LLVector4a> f_vert;
+	LLStrider<LLVector3> f_binorm;
+	LLStrider<LLVector3> f_norm;
+	LLStrider<LLVector2> f_tc;
+
+	buff->getBinormalStrider(f_binorm);
+	buff->getVertexStrider(f_vert);
+	buff->getNormalStrider(f_norm);
+	buff->getTexCoord0Strider(f_tc);
+
+	for (U32 i = 0; i < vf.mNumVertices; ++i)
+	{
+		*f_vert++ = vf.mPositions[i];
+		(*f_binorm++).set(vf.mBinormals[i].getF32ptr());
+		*f_tc++ = vf.mTexCoords[i];
+		(*f_norm++).set(vf.mNormals[i].getF32ptr());
+	}
+
+	if (vf.mWeights)
+	{
+		LLStrider<LLVector4> f_wght;
+		buff->getWeight4Strider(f_wght);
+		for (U32 i = 0; i < vf.mNumVertices; ++i)
+		{
+			(*f_wght++).set(vf.mWeights[i].getF32ptr());
+		}
+	}
+
+	buff->flush();
+}
+
+//helper function for pushing primitives for transform shaders and cleaning up
+//uninitialized data on the tail, plus tracking number of expected primitives
+void push_for_transform(LLVertexBuffer* buff, U32 source_count, U32 dest_count)
+{
+	if (source_count > 0 && dest_count >= source_count) //protect against possible U32 wrapping
+	{
+		//push source primitives
+		buff->drawArrays(LLRender::POINTS, 0, source_count);
+		U32 tail = dest_count-source_count;
+		for (U32 i = 0; i < tail; ++i)
+		{ //copy last source primitive into each element in tail
+			buff->drawArrays(LLRender::POINTS, source_count-1, 1);
+		}
+		gPipeline.mTransformFeedbackPrimitives += dest_count;
+	}
+}
+
 static LLFastTimer::DeclareTimer FTM_FACE_GET_GEOM("Face Geom");
 static LLFastTimer::DeclareTimer FTM_FACE_GEOM_POSITION("Position");
 static LLFastTimer::DeclareTimer FTM_FACE_GEOM_NORMAL("Normal");
@@ -1128,7 +1196,6 @@ static LLFastTimer::DeclareTimer FTM_FACE_TEX_DEFAULT("Default");
 static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK("Quick");
 static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK_NO_XFORM("No Xform");
 static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK_XFORM("Xform");
-
 static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK_PLANAR("Quick Planar");
 
 BOOL LLFace::getGeometryVolume(const LLVolume& volume,
@@ -1301,17 +1368,10 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
 	LLMatrix4a mat_normal;
 	mat_normal.loadu(mat_norm_in);
 	
-	//if it's not fullbright and has no normals, bake sunlight based on face normal
-	//bool bake_sunlight = !getTextureEntry()->getFullbright() &&
-	//  !mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_NORMAL);
-
 	F32 r = 0, os = 0, ot = 0, ms = 0, mt = 0, cos_ang = 0, sin_ang = 0;
-
+	bool do_xform = false;
 	if (rebuild_tcoord)
 	{
-		LLFastTimer t(FTM_FACE_GEOM_TEXTURE);
-		bool do_xform;
-			
 		if (tep)
 		{
 			r  = tep->getRotation();
@@ -1340,599 +1400,757 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
 		{
 			do_xform = false;
 		}
+	}
+	
+	static LLCachedControl<bool> use_transform_feedback(gSavedSettings, "RenderUseTransformFeedback");
+
+	if (use_transform_feedback &&
+		gTransformPositionProgram.mProgramObject && //transform shaders are loaded
+		mVertexBuffer->useVBOs() && //target buffer is in VRAM
+		!rebuild_weights && //TODO: add support for weights
+		!volume.isUnique()) //source volume is NOT flexi
+	{ //use transform feedback to pack vertex buffer
+
+		LLVertexBuffer* buff = (LLVertexBuffer*) vf.mVertexBuffer.get();
+
+		if (vf.mVertexBuffer.isNull() || buff->getNumVerts() != vf.mNumVertices)
+		{
+			mVObjp->getVolume()->genBinormals(f);
+			LLFace::cacheFaceInVRAM(vf);
+			buff = (LLVertexBuffer*) vf.mVertexBuffer.get();
+		}		
+
+		LLGLSLShader* cur_shader = LLGLSLShader::sCurBoundShaderPtr;
+		
+		gGL.pushMatrix();
+		gGL.loadMatrix((GLfloat*) mat_vert_in.mMatrix);
+
+		if (rebuild_pos)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_POSITION);
+			gTransformPositionProgram.bind();
+
+			mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_VERTEX, mGeomIndex, mGeomCount);
+
+			U8 index = mTextureIndex < 255 ? mTextureIndex : 0;
+
+			S32 val = 0.f;
+			U8* vp = (U8*) &val;
+			vp[0] = index;
+			vp[1] = 0;
+			vp[2] = 0;
+			vp[3] = 0;
+			
+			gTransformPositionProgram.uniform1i("texture_index_in", val);
+			glBeginTransformFeedback(GL_POINTS);
+			buff->setBuffer(LLVertexBuffer::MAP_VERTEX);
+
+			push_for_transform(buff, vf.mNumVertices, mGeomCount);
+
+			glEndTransformFeedback();
+		}
+
+		if (rebuild_color)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_COLOR);
+			gTransformColorProgram.bind();
+			
+			mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_COLOR, mGeomIndex, mGeomCount);
+
+			S32 val = *((S32*) color.mV);
+
+			gTransformColorProgram.uniform1i("color_in", val);
+			glBeginTransformFeedback(GL_POINTS);
+			buff->setBuffer(LLVertexBuffer::MAP_VERTEX);
+			push_for_transform(buff, vf.mNumVertices, mGeomCount);
+			glEndTransformFeedback();
+		}
+
+		if (rebuild_emissive)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_EMISSIVE);
+			gTransformColorProgram.bind();
+			
+			mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_EMISSIVE, mGeomIndex, mGeomCount);
+
+			U8 glow = (U8) llclamp((S32) (getTextureEntry()->getGlow()*255), 0, 255);
+
+			S32 glow32 = glow |
+						 (glow << 8) |
+						 (glow << 16) |
+						 (glow << 24);
+
+			gTransformColorProgram.uniform1i("color_in", glow32);
+			glBeginTransformFeedback(GL_POINTS);
+			buff->setBuffer(LLVertexBuffer::MAP_VERTEX);
+			push_for_transform(buff, vf.mNumVertices, mGeomCount);
+			glEndTransformFeedback();
+		}
+
+		if (rebuild_normal)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_NORMAL);
+			gTransformNormalProgram.bind();
+			
+			mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_NORMAL, mGeomIndex, mGeomCount);
 						
-		//bump setup
-		LLVector4a binormal_dir( -sin_ang, cos_ang, 0.f );
-		LLVector4a bump_s_primary_light_ray(0.f, 0.f, 0.f);
-		LLVector4a bump_t_primary_light_ray(0.f, 0.f, 0.f);
+			glBeginTransformFeedback(GL_POINTS);
+			buff->setBuffer(LLVertexBuffer::MAP_NORMAL);
+			push_for_transform(buff, vf.mNumVertices, mGeomCount);
+			glEndTransformFeedback();
+		}
 
-		LLQuaternion bump_quat;
-		if (mDrawablep->isActive())
+		if (rebuild_binormal)
 		{
-			bump_quat = LLQuaternion(mDrawablep->getRenderMatrix());
+			LLFastTimer t(FTM_FACE_GEOM_BINORMAL);
+			gTransformBinormalProgram.bind();
+			
+			mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_BINORMAL, mGeomIndex, mGeomCount);
+						
+			glBeginTransformFeedback(GL_POINTS);
+			buff->setBuffer(LLVertexBuffer::MAP_BINORMAL);
+			push_for_transform(buff, vf.mNumVertices, mGeomCount);
+			glEndTransformFeedback();
 		}
-		
-		if (bump_code)
+
+		if (rebuild_tcoord)
 		{
-			mVObjp->getVolume()->genBinormals(f);
-			F32 offset_multiple; 
-			switch( bump_code )
-			{
-				case BE_NO_BUMP:
-				offset_multiple = 0.f;
-				break;
-				case BE_BRIGHTNESS:
-				case BE_DARKNESS:
-				if( mTexture.notNull() && mTexture->hasGLTexture())
-				{
-					// Offset by approximately one texel
-					S32 cur_discard = mTexture->getDiscardLevel();
-					S32 max_size = llmax( mTexture->getWidth(), mTexture->getHeight() );
-					max_size <<= cur_discard;
-					const F32 ARTIFICIAL_OFFSET = 2.f;
-					offset_multiple = ARTIFICIAL_OFFSET / (F32)max_size;
-				}
-				else
-				{
-					offset_multiple = 1.f/256;
-				}
-				break;
+			LLFastTimer t(FTM_FACE_GEOM_TEXTURE);
+			gTransformTexCoordProgram.bind();
+			
+			mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_TEXCOORD0, mGeomIndex, mGeomCount);
+						
+			glBeginTransformFeedback(GL_POINTS);
+			buff->setBuffer(LLVertexBuffer::MAP_TEXCOORD0);
+			push_for_transform(buff, vf.mNumVertices, mGeomCount);
+			glEndTransformFeedback();
 
-				default:  // Standard bumpmap textures.  Assumed to be 256x256
-				offset_multiple = 1.f / 256;
-				break;
-			}
+			bool do_bump = bump_code && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_TEXCOORD1);
 
-			F32 s_scale = 1.f;
-			F32 t_scale = 1.f;
-			if( tep )
+			if (do_bump)
 			{
-				tep->getScale( &s_scale, &t_scale );
-			}
-			// Use the nudged south when coming from above sun angle, such
-			// that emboss mapping always shows up on the upward faces of cubes when 
-			// it's noon (since a lot of builders build with the sun forced to noon).
-			LLVector3   sun_ray  = gSky.mVOSkyp->mBumpSunDir;
-			LLVector3   moon_ray = gSky.getMoonDirection();
-			LLVector3& primary_light_ray = (sun_ray.mV[VZ] > 0) ? sun_ray : moon_ray;
-
-			bump_s_primary_light_ray.load3((offset_multiple * s_scale * primary_light_ray).mV);
-			bump_t_primary_light_ray.load3((offset_multiple * t_scale * primary_light_ray).mV);
+				mVertexBuffer->bindForFeedback(0, LLVertexBuffer::TYPE_TEXCOORD1, mGeomIndex, mGeomCount);
+				glBeginTransformFeedback(GL_POINTS);
+				buff->setBuffer(LLVertexBuffer::MAP_TEXCOORD0);
+				push_for_transform(buff, vf.mNumVertices, mGeomCount);
+				glEndTransformFeedback();
+			}				
 		}
 
-		U8 texgen = getTextureEntry()->getTexGen();
-		if (rebuild_tcoord && texgen != LLTextureEntry::TEX_GEN_DEFAULT)
-		{ //planar texgen needs binormals
-			mVObjp->getVolume()->genBinormals(f);
+		glBindBufferARB(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
+
+		gGL.popMatrix();
+
+		if (cur_shader)
+		{
+			cur_shader->bind();
 		}
+	}
+	else
+	{
+		//if it's not fullbright and has no normals, bake sunlight based on face normal
+		//bool bake_sunlight = !getTextureEntry()->getFullbright() &&
+		//  !mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_NORMAL);
 
-		U8 tex_mode = 0;
-	
-		if (isState(TEXTURE_ANIM))
+		if (rebuild_tcoord)
 		{
-			LLVOVolume* vobj = (LLVOVolume*) (LLViewerObject*) mVObjp;	
-			tex_mode = vobj->mTexAnimMode;
+			LLFastTimer t(FTM_FACE_GEOM_TEXTURE);
+									
+			//bump setup
+			LLVector4a binormal_dir( -sin_ang, cos_ang, 0.f );
+			LLVector4a bump_s_primary_light_ray(0.f, 0.f, 0.f);
+			LLVector4a bump_t_primary_light_ray(0.f, 0.f, 0.f);
 
-			if (!tex_mode)
+			LLQuaternion bump_quat;
+			if (mDrawablep->isActive())
 			{
-				clearState(TEXTURE_ANIM);
+				bump_quat = LLQuaternion(mDrawablep->getRenderMatrix());
 			}
-			else
+		
+			if (bump_code)
 			{
-				os = ot = 0.f;
-				r = 0.f;
-				cos_ang = 1.f;
-				sin_ang = 0.f;
-				ms = mt = 1.f;
+				mVObjp->getVolume()->genBinormals(f);
+				F32 offset_multiple; 
+				switch( bump_code )
+				{
+					case BE_NO_BUMP:
+					offset_multiple = 0.f;
+					break;
+					case BE_BRIGHTNESS:
+					case BE_DARKNESS:
+					if( mTexture.notNull() && mTexture->hasGLTexture())
+					{
+						// Offset by approximately one texel
+						S32 cur_discard = mTexture->getDiscardLevel();
+						S32 max_size = llmax( mTexture->getWidth(), mTexture->getHeight() );
+						max_size <<= cur_discard;
+						const F32 ARTIFICIAL_OFFSET = 2.f;
+						offset_multiple = ARTIFICIAL_OFFSET / (F32)max_size;
+					}
+					else
+					{
+						offset_multiple = 1.f/256;
+					}
+					break;
 
-				do_xform = false;
+					default:  // Standard bumpmap textures.  Assumed to be 256x256
+					offset_multiple = 1.f / 256;
+					break;
+				}
+
+				F32 s_scale = 1.f;
+				F32 t_scale = 1.f;
+				if( tep )
+				{
+					tep->getScale( &s_scale, &t_scale );
+				}
+				// Use the nudged south when coming from above sun angle, such
+				// that emboss mapping always shows up on the upward faces of cubes when 
+				// it's noon (since a lot of builders build with the sun forced to noon).
+				LLVector3   sun_ray  = gSky.mVOSkyp->mBumpSunDir;
+				LLVector3   moon_ray = gSky.getMoonDirection();
+				LLVector3& primary_light_ray = (sun_ray.mV[VZ] > 0) ? sun_ray : moon_ray;
+
+				bump_s_primary_light_ray.load3((offset_multiple * s_scale * primary_light_ray).mV);
+				bump_t_primary_light_ray.load3((offset_multiple * t_scale * primary_light_ray).mV);
 			}
 
-			if (getVirtualSize() >= MIN_TEX_ANIM_SIZE)
-			{ //don't override texture transform during tc bake
-				tex_mode = 0;
+			U8 texgen = getTextureEntry()->getTexGen();
+			if (rebuild_tcoord && texgen != LLTextureEntry::TEX_GEN_DEFAULT)
+			{ //planar texgen needs binormals
+				mVObjp->getVolume()->genBinormals(f);
 			}
-		}
 
-		LLVector4a scalea;
-		scalea.load3(scale.mV);
+			U8 tex_mode = 0;
+	
+			if (isState(TEXTURE_ANIM))
+			{
+				LLVOVolume* vobj = (LLVOVolume*) (LLViewerObject*) mVObjp;	
+				tex_mode = vobj->mTexAnimMode;
+
+				if (!tex_mode)
+				{
+					clearState(TEXTURE_ANIM);
+				}
+				else
+				{
+					os = ot = 0.f;
+					r = 0.f;
+					cos_ang = 1.f;
+					sin_ang = 0.f;
+					ms = mt = 1.f;
 
-		bool do_bump = bump_code && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_TEXCOORD1);
-		bool do_tex_mat = tex_mode && mTextureMatrix;
+					do_xform = false;
+				}
 
-		if (!in_atlas && !do_bump)
-		{ //not in atlas or not bump mapped, might be able to do a cheap update
-			mVertexBuffer->getTexCoord0Strider(tex_coords, mGeomIndex, mGeomCount);
+				if (getVirtualSize() >= MIN_TEX_ANIM_SIZE)
+				{ //don't override texture transform during tc bake
+					tex_mode = 0;
+				}
+			}
 
-			if (texgen != LLTextureEntry::TEX_GEN_PLANAR)
-			{
-				LLFastTimer t(FTM_FACE_TEX_QUICK);
-				if (!do_tex_mat)
+			LLVector4a scalea;
+			scalea.load3(scale.mV);
+
+			bool do_bump = bump_code && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_TEXCOORD1);
+			bool do_tex_mat = tex_mode && mTextureMatrix;
+
+			if (!in_atlas && !do_bump)
+			{ //not in atlas or not bump mapped, might be able to do a cheap update
+				mVertexBuffer->getTexCoord0Strider(tex_coords, mGeomIndex, mGeomCount);
+
+				if (texgen != LLTextureEntry::TEX_GEN_PLANAR)
 				{
-					if (!do_xform)
+					LLFastTimer t(FTM_FACE_TEX_QUICK);
+					if (!do_tex_mat)
 					{
-						LLFastTimer t(FTM_FACE_TEX_QUICK_NO_XFORM);
-						LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32));
-					}
-					else
-					{
-						LLFastTimer t(FTM_FACE_TEX_QUICK_XFORM);
-						F32* dst = (F32*) tex_coords.get();
-						LLVector4a* src = (LLVector4a*) vf.mTexCoords;
+						if (!do_xform)
+						{
+							LLFastTimer t(FTM_FACE_TEX_QUICK_NO_XFORM);
+							LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32));
+						}
+						else
+						{
+							LLFastTimer t(FTM_FACE_TEX_QUICK_XFORM);
+							F32* dst = (F32*) tex_coords.get();
+							LLVector4a* src = (LLVector4a*) vf.mTexCoords;
 
-						LLVector4a trans;
-						trans.splat(-0.5f);
+							LLVector4a trans;
+							trans.splat(-0.5f);
 
-						LLVector4a rot0;
-						rot0.set(cos_ang, -sin_ang, cos_ang, -sin_ang);
+							LLVector4a rot0;
+							rot0.set(cos_ang, -sin_ang, cos_ang, -sin_ang);
 
-						LLVector4a rot1;
-						rot1.set(sin_ang, cos_ang, sin_ang, cos_ang);
+							LLVector4a rot1;
+							rot1.set(sin_ang, cos_ang, sin_ang, cos_ang);
 
-						LLVector4a scale;
-						scale.set(ms, mt, ms, mt);
+							LLVector4a scale;
+							scale.set(ms, mt, ms, mt);
 
-						LLVector4a offset;
-						offset.set(os+0.5f, ot+0.5f, os+0.5f, ot+0.5f);
+							LLVector4a offset;
+							offset.set(os+0.5f, ot+0.5f, os+0.5f, ot+0.5f);
 
-						LLVector4Logical mask;
-						mask.clear();
-						mask.setElement<2>();
-						mask.setElement<3>();
+							LLVector4Logical mask;
+							mask.clear();
+							mask.setElement<2>();
+							mask.setElement<3>();
 
-						U32 count = num_vertices/2 + num_vertices%2;
+							U32 count = num_vertices/2 + num_vertices%2;
 
-						for (S32 i = 0; i < count; i++)
+							for (S32 i = 0; i < count; i++)
+							{	
+								LLVector4a res = *src++;
+								xform4a(res, trans, mask, rot0, rot1, offset, scale);
+								res.store4a(dst);
+								dst += 4;
+							}
+						}
+					}
+					else
+					{ //do tex mat, no texgen, no atlas, no bump
+						for (S32 i = 0; i < num_vertices; i++)
 						{	
-							LLVector4a res = *src++;
-							xform4a(res, trans, mask, rot0, rot1, offset, scale);
-							res.store4a(dst);
-							dst += 4;
+							LLVector2 tc(vf.mTexCoords[i]);
+							//LLVector4a& norm = vf.mNormals[i];
+							//LLVector4a& center = *(vf.mCenter);
+
+							LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
+							tmp = tmp * *mTextureMatrix;
+							tc.mV[0] = tmp.mV[0];
+							tc.mV[1] = tmp.mV[1];
+							*tex_coords++ = tc;	
 						}
 					}
 				}
 				else
-				{ //do tex mat, no texgen, no atlas, no bump
-					for (S32 i = 0; i < num_vertices; i++)
-					{	
-						LLVector2 tc(vf.mTexCoords[i]);
-						//LLVector4a& norm = vf.mNormals[i];
-						//LLVector4a& center = *(vf.mCenter);
-
-						LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
-						tmp = tmp * *mTextureMatrix;
-						tc.mV[0] = tmp.mV[0];
-						tc.mV[1] = tmp.mV[1];
-						*tex_coords++ = tc;	
-					}
-				}
-			}
-			else
-			{ //no bump, no atlas, tex gen planar
-				LLFastTimer t(FTM_FACE_TEX_QUICK_PLANAR);
-				if (do_tex_mat)
-				{
-					for (S32 i = 0; i < num_vertices; i++)
-					{	
-						LLVector2 tc(vf.mTexCoords[i]);
-						LLVector4a& norm = vf.mNormals[i];
-						LLVector4a& center = *(vf.mCenter);
-						LLVector4a vec = vf.mPositions[i];	
-						vec.mul(scalea);
-						planarProjection(tc, norm, center, vec);
+				{ //no bump, no atlas, tex gen planar
+					LLFastTimer t(FTM_FACE_TEX_QUICK_PLANAR);
+					if (do_tex_mat)
+					{
+						for (S32 i = 0; i < num_vertices; i++)
+						{	
+							LLVector2 tc(vf.mTexCoords[i]);
+							LLVector4a& norm = vf.mNormals[i];
+							LLVector4a& center = *(vf.mCenter);
+							LLVector4a vec = vf.mPositions[i];	
+							vec.mul(scalea);
+							planarProjection(tc, norm, center, vec);
 						
-						LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
-						tmp = tmp * *mTextureMatrix;
-						tc.mV[0] = tmp.mV[0];
-						tc.mV[1] = tmp.mV[1];
+							LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
+							tmp = tmp * *mTextureMatrix;
+							tc.mV[0] = tmp.mV[0];
+							tc.mV[1] = tmp.mV[1];
 				
-						*tex_coords++ = tc;	
+							*tex_coords++ = tc;	
+						}
 					}
-				}
-				else
-				{
-					for (S32 i = 0; i < num_vertices; i++)
-					{	
-						LLVector2 tc(vf.mTexCoords[i]);
-						LLVector4a& norm = vf.mNormals[i];
-						LLVector4a& center = *(vf.mCenter);
-						LLVector4a vec = vf.mPositions[i];	
-						vec.mul(scalea);
-						planarProjection(tc, norm, center, vec);
+					else
+					{
+						for (S32 i = 0; i < num_vertices; i++)
+						{	
+							LLVector2 tc(vf.mTexCoords[i]);
+							LLVector4a& norm = vf.mNormals[i];
+							LLVector4a& center = *(vf.mCenter);
+							LLVector4a vec = vf.mPositions[i];	
+							vec.mul(scalea);
+							planarProjection(tc, norm, center, vec);
 						
-						xform(tc, cos_ang, sin_ang, os, ot, ms, mt);
+							xform(tc, cos_ang, sin_ang, os, ot, ms, mt);
 
-						*tex_coords++ = tc;	
+							*tex_coords++ = tc;	
+						}
 					}
 				}
-			}
 
-			if (map_range)
-			{
-				mVertexBuffer->flush();
+				if (map_range)
+				{
+					mVertexBuffer->flush();
+				}
 			}
-		}
-		else
-		{ //either bump mapped or in atlas, just do the whole expensive loop
-			LLFastTimer t(FTM_FACE_TEX_DEFAULT);
-			mVertexBuffer->getTexCoord0Strider(tex_coords, mGeomIndex, mGeomCount, map_range);
+			else
+			{ //either bump mapped or in atlas, just do the whole expensive loop
+				LLFastTimer t(FTM_FACE_TEX_DEFAULT);
+				mVertexBuffer->getTexCoord0Strider(tex_coords, mGeomIndex, mGeomCount, map_range);
 
-			std::vector<LLVector2> bump_tc;
+				std::vector<LLVector2> bump_tc;
 		
-			for (S32 i = 0; i < num_vertices; i++)
-			{	
-				LLVector2 tc(vf.mTexCoords[i]);
+				for (S32 i = 0; i < num_vertices; i++)
+				{	
+					LLVector2 tc(vf.mTexCoords[i]);
 			
-				LLVector4a& norm = vf.mNormals[i];
+					LLVector4a& norm = vf.mNormals[i];
 				
-				LLVector4a& center = *(vf.mCenter);
+					LLVector4a& center = *(vf.mCenter);
 		   
-				if (texgen != LLTextureEntry::TEX_GEN_DEFAULT)
-				{
-					LLVector4a vec = vf.mPositions[i];
+					if (texgen != LLTextureEntry::TEX_GEN_DEFAULT)
+					{
+						LLVector4a vec = vf.mPositions[i];
 				
-					vec.mul(scalea);
+						vec.mul(scalea);
+
+						switch (texgen)
+						{
+							case LLTextureEntry::TEX_GEN_PLANAR:
+								planarProjection(tc, norm, center, vec);
+								break;
+							case LLTextureEntry::TEX_GEN_SPHERICAL:
+								sphericalProjection(tc, norm, center, vec);
+								break;
+							case LLTextureEntry::TEX_GEN_CYLINDRICAL:
+								cylindricalProjection(tc, norm, center, vec);
+								break;
+							default:
+								break;
+						}		
+					}
 
-					switch (texgen)
+					if (tex_mode && mTextureMatrix)
 					{
-						case LLTextureEntry::TEX_GEN_PLANAR:
-							planarProjection(tc, norm, center, vec);
+						LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
+						tmp = tmp * *mTextureMatrix;
+						tc.mV[0] = tmp.mV[0];
+						tc.mV[1] = tmp.mV[1];
+					}
+					else
+					{
+						xform(tc, cos_ang, sin_ang, os, ot, ms, mt);
+					}
+
+					if(in_atlas)
+					{
+						//
+						//manually calculate tex-coord per vertex for varying address modes.
+						//should be removed if shader can handle this.
+						//
+
+						S32 int_part = 0 ;
+						switch(mTexture->getAddressMode())
+						{
+						case LLTexUnit::TAM_CLAMP:
+							if(tc.mV[0] < 0.f)
+							{
+								tc.mV[0] = 0.f ;
+							}
+							else if(tc.mV[0] > 1.f)
+							{
+								tc.mV[0] = 1.f;
+							}
+
+							if(tc.mV[1] < 0.f)
+							{
+								tc.mV[1] = 0.f ;
+							}
+							else if(tc.mV[1] > 1.f)
+							{
+								tc.mV[1] = 1.f;
+							}
 							break;
-						case LLTextureEntry::TEX_GEN_SPHERICAL:
-							sphericalProjection(tc, norm, center, vec);
+						case LLTexUnit::TAM_MIRROR:
+							if(tc.mV[0] < 0.f)
+							{
+								tc.mV[0] = -tc.mV[0] ;
+							}
+							int_part = (S32)tc.mV[0] ;
+							if(int_part & 1) //odd number
+							{
+								tc.mV[0] = int_part + 1 - tc.mV[0] ;
+							}
+							else //even number
+							{
+								tc.mV[0] -= int_part ;
+							}
+
+							if(tc.mV[1] < 0.f)
+							{
+								tc.mV[1] = -tc.mV[1] ;
+							}
+							int_part = (S32)tc.mV[1] ;
+							if(int_part & 1) //odd number
+							{
+								tc.mV[1] = int_part + 1 - tc.mV[1] ;
+							}
+							else //even number
+							{
+								tc.mV[1] -= int_part ;
+							}
 							break;
-						case LLTextureEntry::TEX_GEN_CYLINDRICAL:
-							cylindricalProjection(tc, norm, center, vec);
+						case LLTexUnit::TAM_WRAP:
+							if(tc.mV[0] > 1.f)
+								tc.mV[0] -= (S32)(tc.mV[0] - 0.00001f) ;
+							else if(tc.mV[0] < -1.f)
+								tc.mV[0] -= (S32)(tc.mV[0] + 0.00001f) ;
+
+							if(tc.mV[1] > 1.f)
+								tc.mV[1] -= (S32)(tc.mV[1] - 0.00001f) ;
+							else if(tc.mV[1] < -1.f)
+								tc.mV[1] -= (S32)(tc.mV[1] + 0.00001f) ;
+
+							if(tc.mV[0] < 0.f)
+							{
+								tc.mV[0] = 1.0f + tc.mV[0] ;
+							}
+							if(tc.mV[1] < 0.f)
+							{
+								tc.mV[1] = 1.0f + tc.mV[1] ;
+							}
 							break;
 						default:
 							break;
-					}		
-				}
+						}
+				
+						tc.mV[0] = tcoord_xoffset + tcoord_xscale * tc.mV[0] ;
+						tc.mV[1] = tcoord_yoffset + tcoord_yscale * tc.mV[1] ;
+					}
+				
 
-				if (tex_mode && mTextureMatrix)
-				{
-					LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
-					tmp = tmp * *mTextureMatrix;
-					tc.mV[0] = tmp.mV[0];
-					tc.mV[1] = tmp.mV[1];
+					*tex_coords++ = tc;
+					if (do_bump)
+					{
+						bump_tc.push_back(tc);
+					}
 				}
-				else
+
+				if (map_range)
 				{
-					xform(tc, cos_ang, sin_ang, os, ot, ms, mt);
+					mVertexBuffer->flush();
 				}
 
-				if(in_atlas)
+				if (do_bump)
 				{
-					//
-					//manually calculate tex-coord per vertex for varying address modes.
-					//should be removed if shader can handle this.
-					//
-
-					S32 int_part = 0 ;
-					switch(mTexture->getAddressMode())
+					mVertexBuffer->getTexCoord1Strider(tex_coords2, mGeomIndex, mGeomCount, map_range);
+		
+					for (S32 i = 0; i < num_vertices; i++)
 					{
-					case LLTexUnit::TAM_CLAMP:
-						if(tc.mV[0] < 0.f)
-						{
-							tc.mV[0] = 0.f ;
-						}
-						else if(tc.mV[0] > 1.f)
-						{
-							tc.mV[0] = 1.f;
-						}
-
-						if(tc.mV[1] < 0.f)
-						{
-							tc.mV[1] = 0.f ;
-						}
-						else if(tc.mV[1] > 1.f)
-						{
-							tc.mV[1] = 1.f;
-						}
-						break;
-					case LLTexUnit::TAM_MIRROR:
-						if(tc.mV[0] < 0.f)
-						{
-							tc.mV[0] = -tc.mV[0] ;
-						}
-						int_part = (S32)tc.mV[0] ;
-						if(int_part & 1) //odd number
-						{
-							tc.mV[0] = int_part + 1 - tc.mV[0] ;
-						}
-						else //even number
+						LLVector4a tangent;
+						tangent.setCross3(vf.mBinormals[i], vf.mNormals[i]);
+
+						LLMatrix4a tangent_to_object;
+						tangent_to_object.setRows(tangent, vf.mBinormals[i], vf.mNormals[i]);
+						LLVector4a t;
+						tangent_to_object.rotate(binormal_dir, t);
+						LLVector4a binormal;
+						mat_normal.rotate(t, binormal);
+						
+						//VECTORIZE THIS
+						if (mDrawablep->isActive())
 						{
-							tc.mV[0] -= int_part ;
+							LLVector3 t;
+							t.set(binormal.getF32ptr());
+							t *= bump_quat;
+							binormal.load3(t.mV);
 						}
 
-						if(tc.mV[1] < 0.f)
-						{
-							tc.mV[1] = -tc.mV[1] ;
-						}
-						int_part = (S32)tc.mV[1] ;
-						if(int_part & 1) //odd number
-						{
-							tc.mV[1] = int_part + 1 - tc.mV[1] ;
-						}
-						else //even number
-						{
-							tc.mV[1] -= int_part ;
-						}
-						break;
-					case LLTexUnit::TAM_WRAP:
-						if(tc.mV[0] > 1.f)
-							tc.mV[0] -= (S32)(tc.mV[0] - 0.00001f) ;
-						else if(tc.mV[0] < -1.f)
-							tc.mV[0] -= (S32)(tc.mV[0] + 0.00001f) ;
-
-						if(tc.mV[1] > 1.f)
-							tc.mV[1] -= (S32)(tc.mV[1] - 0.00001f) ;
-						else if(tc.mV[1] < -1.f)
-							tc.mV[1] -= (S32)(tc.mV[1] + 0.00001f) ;
-
-						if(tc.mV[0] < 0.f)
-						{
-							tc.mV[0] = 1.0f + tc.mV[0] ;
-						}
-						if(tc.mV[1] < 0.f)
-						{
-							tc.mV[1] = 1.0f + tc.mV[1] ;
-						}
-						break;
-					default:
-						break;
+						binormal.normalize3fast();
+						LLVector2 tc = bump_tc[i];
+						tc += LLVector2( bump_s_primary_light_ray.dot3(tangent).getF32(), bump_t_primary_light_ray.dot3(binormal).getF32() );
+					
+						*tex_coords2++ = tc;
 					}
-				
-					tc.mV[0] = tcoord_xoffset + tcoord_xscale * tc.mV[0] ;
-					tc.mV[1] = tcoord_yoffset + tcoord_yscale * tc.mV[1] ;
-				}
-				
-
-				*tex_coords++ = tc;
-				if (do_bump)
-				{
-					bump_tc.push_back(tc);
-				}
-			}
-
-			if (map_range)
-			{
-				mVertexBuffer->flush();
-			}
 
-			if (do_bump)
-			{
-				mVertexBuffer->getTexCoord1Strider(tex_coords2, mGeomIndex, mGeomCount, map_range);
-		
-				for (S32 i = 0; i < num_vertices; i++)
-				{
-					LLVector4a tangent;
-					tangent.setCross3(vf.mBinormals[i], vf.mNormals[i]);
-
-					LLMatrix4a tangent_to_object;
-					tangent_to_object.setRows(tangent, vf.mBinormals[i], vf.mNormals[i]);
-					LLVector4a t;
-					tangent_to_object.rotate(binormal_dir, t);
-					LLVector4a binormal;
-					mat_normal.rotate(t, binormal);
-						
-					//VECTORIZE THIS
-					if (mDrawablep->isActive())
+					if (map_range)
 					{
-						LLVector3 t;
-						t.set(binormal.getF32ptr());
-						t *= bump_quat;
-						binormal.load3(t.mV);
+						mVertexBuffer->flush();
 					}
-
-					binormal.normalize3fast();
-					LLVector2 tc = bump_tc[i];
-					tc += LLVector2( bump_s_primary_light_ray.dot3(tangent).getF32(), bump_t_primary_light_ray.dot3(binormal).getF32() );
-					
-					*tex_coords2++ = tc;
-				}
-
-				if (map_range)
-				{
-					mVertexBuffer->flush();
 				}
 			}
 		}
-	}
 
-	if (rebuild_pos)
-	{
-		LLFastTimer t(FTM_FACE_GEOM_POSITION);
-		llassert(num_vertices > 0);
+		if (rebuild_pos)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_POSITION);
+			llassert(num_vertices > 0);
 		
-		mVertexBuffer->getVertexStrider(vert, mGeomIndex, mGeomCount, map_range);
+			mVertexBuffer->getVertexStrider(vert, mGeomIndex, mGeomCount, map_range);
 			
 
-		LLMatrix4a mat_vert;
-		mat_vert.loadu(mat_vert_in);
+			LLMatrix4a mat_vert;
+			mat_vert.loadu(mat_vert_in);
 
-		LLVector4a* src = vf.mPositions;
-		volatile F32* dst = (volatile F32*) vert.get();
+			LLVector4a* src = vf.mPositions;
+			volatile F32* dst = (volatile F32*) vert.get();
 
-		volatile F32* end = dst+num_vertices*4;
-		LLVector4a res;
+			volatile F32* end = dst+num_vertices*4;
+			LLVector4a res;
 
-		LLVector4a texIdx;
+			LLVector4a texIdx;
 
-		U8 index = mTextureIndex < 255 ? mTextureIndex : 0;
+			U8 index = mTextureIndex < 255 ? mTextureIndex : 0;
 
-		F32 val = 0.f;
-		U8* vp = (U8*) &val;
-		vp[0] = index;
-		vp[1] = 0;
-		vp[2] = 0;
-		vp[3] = 0;
+			F32 val = 0.f;
+			U8* vp = (U8*) &val;
+			vp[0] = index;
+			vp[1] = 0;
+			vp[2] = 0;
+			vp[3] = 0;
 
-		llassert(index <= LLGLSLShader::sIndexedTextureChannels-1);
+			llassert(index <= LLGLSLShader::sIndexedTextureChannels-1);
 
-		LLVector4Logical mask;
-		mask.clear();
-		mask.setElement<3>();
+			LLVector4Logical mask;
+			mask.clear();
+			mask.setElement<3>();
 		
-		texIdx.set(0,0,0,val);
-
-		{
-			LLFastTimer t(FTM_FACE_POSITION_STORE);
-			LLVector4a tmp;
+			texIdx.set(0,0,0,val);
 
-			do
-			{	
-				mat_vert.affineTransform(*src++, res);
-				tmp.setSelectWithMask(mask, texIdx, res);
-				tmp.store4a((F32*) dst);
-				dst += 4;
+			{
+				LLFastTimer t(FTM_FACE_POSITION_STORE);
+				LLVector4a tmp;
+
+				do
+				{	
+					mat_vert.affineTransform(*src++, res);
+					tmp.setSelectWithMask(mask, texIdx, res);
+					tmp.store4a((F32*) dst);
+					dst += 4;
+				}
+				while(dst < end);
 			}
-			while(dst < end);
-		}
 
-		{
-			LLFastTimer t(FTM_FACE_POSITION_PAD);
-			S32 aligned_pad_vertices = mGeomCount - num_vertices;
-			res.set(res[0], res[1], res[2], 0.f);
+			{
+				LLFastTimer t(FTM_FACE_POSITION_PAD);
+				S32 aligned_pad_vertices = mGeomCount - num_vertices;
+				res.set(res[0], res[1], res[2], 0.f);
+
+				while (aligned_pad_vertices > 0)
+				{
+					--aligned_pad_vertices;
+					res.store4a((F32*) dst);
+					dst += 4;
+				}
+			}
 
-			while (aligned_pad_vertices > 0)
+			if (map_range)
 			{
-				--aligned_pad_vertices;
-				res.store4a((F32*) dst);
-				dst += 4;
+				mVertexBuffer->flush();
 			}
 		}
 
-		if (map_range)
-		{
-			mVertexBuffer->flush();
-		}
-	}
 		
-	if (rebuild_normal)
-	{
-		LLFastTimer t(FTM_FACE_GEOM_NORMAL);
-		mVertexBuffer->getNormalStrider(norm, mGeomIndex, mGeomCount, map_range);
-		F32* normals = (F32*) norm.get();
+		if (rebuild_normal)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_NORMAL);
+			mVertexBuffer->getNormalStrider(norm, mGeomIndex, mGeomCount, map_range);
+			F32* normals = (F32*) norm.get();
 	
-		for (S32 i = 0; i < num_vertices; i++)
-		{	
-			LLVector4a normal;
-			mat_normal.rotate(vf.mNormals[i], normal);
-			normal.normalize3fast();
-			normal.store4a(normals);
-			normals += 4;
-		}
+			for (S32 i = 0; i < num_vertices; i++)
+			{	
+				LLVector4a normal;
+				mat_normal.rotate(vf.mNormals[i], normal);
+				normal.normalize3fast();
+				normal.store4a(normals);
+				normals += 4;
+			}
 
-		if (map_range)
-		{
-			mVertexBuffer->flush();
+			if (map_range)
+			{
+				mVertexBuffer->flush();
+			}
 		}
-	}
 		
-	if (rebuild_binormal)
-	{
-		LLFastTimer t(FTM_FACE_GEOM_BINORMAL);
-		mVertexBuffer->getBinormalStrider(binorm, mGeomIndex, mGeomCount, map_range);
-		F32* binormals = (F32*) binorm.get();
+		if (rebuild_binormal)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_BINORMAL);
+			mVertexBuffer->getBinormalStrider(binorm, mGeomIndex, mGeomCount, map_range);
+			F32* binormals = (F32*) binorm.get();
 		
-		for (S32 i = 0; i < num_vertices; i++)
-		{	
-			LLVector4a binormal;
-			mat_normal.rotate(vf.mBinormals[i], binormal);
-			binormal.normalize3fast();
-			binormal.store4a(binormals);
-			binormals += 4;
-		}
+			for (S32 i = 0; i < num_vertices; i++)
+			{	
+				LLVector4a binormal;
+				mat_normal.rotate(vf.mBinormals[i], binormal);
+				binormal.normalize3fast();
+				binormal.store4a(binormals);
+				binormals += 4;
+			}
 
-		if (map_range)
-		{
-			mVertexBuffer->flush();
+			if (map_range)
+			{
+				mVertexBuffer->flush();
+			}
 		}
-	}
 	
-	if (rebuild_weights && vf.mWeights)
-	{
-		LLFastTimer t(FTM_FACE_GEOM_WEIGHTS);
-		mVertexBuffer->getWeight4Strider(wght, mGeomIndex, mGeomCount, map_range);
-		F32* weights = (F32*) wght.get();
-		LLVector4a::memcpyNonAliased16(weights, (F32*) vf.mWeights, num_vertices*4*sizeof(F32));
-		if (map_range)
+		if (rebuild_weights && vf.mWeights)
 		{
-			mVertexBuffer->flush();
+			LLFastTimer t(FTM_FACE_GEOM_WEIGHTS);
+			mVertexBuffer->getWeight4Strider(wght, mGeomIndex, mGeomCount, map_range);
+			F32* weights = (F32*) wght.get();
+			LLVector4a::memcpyNonAliased16(weights, (F32*) vf.mWeights, num_vertices*4*sizeof(F32));
+			if (map_range)
+			{
+				mVertexBuffer->flush();
+			}
 		}
-	}
 
-	if (rebuild_color && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_COLOR) )
-	{
-		LLFastTimer t(FTM_FACE_GEOM_COLOR);
-		mVertexBuffer->getColorStrider(colors, mGeomIndex, mGeomCount, map_range);
+		if (rebuild_color && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_COLOR) )
+		{
+			LLFastTimer t(FTM_FACE_GEOM_COLOR);
+			mVertexBuffer->getColorStrider(colors, mGeomIndex, mGeomCount, map_range);
 
-		LLVector4a src;
+			LLVector4a src;
 
-		U32 vec[4];
-		vec[0] = vec[1] = vec[2] = vec[3] = color.mAll;
+			U32 vec[4];
+			vec[0] = vec[1] = vec[2] = vec[3] = color.mAll;
 		
-		src.loadua((F32*) vec);
+			src.loadua((F32*) vec);
 
-		F32* dst = (F32*) colors.get();
-		S32 num_vecs = num_vertices/4;
-		if (num_vertices%4 > 0)
-		{
-			++num_vecs;
-		}
+			F32* dst = (F32*) colors.get();
+			S32 num_vecs = num_vertices/4;
+			if (num_vertices%4 > 0)
+			{
+				++num_vecs;
+			}
 
-		for (S32 i = 0; i < num_vecs; i++)
-		{	
-			src.store4a(dst);
-			dst += 4;
-		}
+			for (S32 i = 0; i < num_vecs; i++)
+			{	
+				src.store4a(dst);
+				dst += 4;
+			}
 
-		if (map_range)
-		{
-			mVertexBuffer->flush();
+			if (map_range)
+			{
+				mVertexBuffer->flush();
+			}
 		}
-	}
 
-	if (rebuild_emissive)
-	{
-		LLFastTimer t(FTM_FACE_GEOM_EMISSIVE);
-		LLStrider<LLColor4U> emissive;
-		mVertexBuffer->getEmissiveStrider(emissive, mGeomIndex, mGeomCount, map_range);
+		if (rebuild_emissive)
+		{
+			LLFastTimer t(FTM_FACE_GEOM_EMISSIVE);
+			LLStrider<LLColor4U> emissive;
+			mVertexBuffer->getEmissiveStrider(emissive, mGeomIndex, mGeomCount, map_range);
 
-		U8 glow = (U8) llclamp((S32) (getTextureEntry()->getGlow()*255), 0, 255);
+			U8 glow = (U8) llclamp((S32) (getTextureEntry()->getGlow()*255), 0, 255);
 
-		LLVector4a src;
+			LLVector4a src;
 
 		
-		U32 glow32 = glow |
-					 (glow << 8) |
-					 (glow << 16) |
-					 (glow << 24);
+			U32 glow32 = glow |
+						 (glow << 8) |
+						 (glow << 16) |
+						 (glow << 24);
 
-		U32 vec[4];
-		vec[0] = vec[1] = vec[2] = vec[3] = glow32;
+			U32 vec[4];
+			vec[0] = vec[1] = vec[2] = vec[3] = glow32;
 		
-		src.loadua((F32*) vec);
+			src.loadua((F32*) vec);
 
-		F32* dst = (F32*) emissive.get();
-		S32 num_vecs = num_vertices/4;
-		if (num_vertices%4 > 0)
-		{
-			++num_vecs;
-		}
+			F32* dst = (F32*) emissive.get();
+			S32 num_vecs = num_vertices/4;
+			if (num_vertices%4 > 0)
+			{
+				++num_vecs;
+			}
 
-		for (S32 i = 0; i < num_vecs; i++)
-		{	
-			src.store4a(dst);
-			dst += 4;
-		}
+			for (S32 i = 0; i < num_vecs; i++)
+			{	
+				src.store4a(dst);
+				dst += 4;
+			}
 
-		if (map_range)
-		{
-			mVertexBuffer->flush();
+			if (map_range)
+			{
+				mVertexBuffer->flush();
+			}
 		}
 	}
+
 	if (rebuild_tcoord)
 	{
 		mTexExtents[0].setVec(0,0);
diff --git a/indra/newview/llface.h b/indra/newview/llface.h
index 82e4ab61b7d9d5695afbe08f0c929327eca019e6..c31f35769396b5151f6129fbc03edfb18d8c20cf 100644
--- a/indra/newview/llface.h
+++ b/indra/newview/llface.h
@@ -83,6 +83,8 @@ class LLFace
 
 	static void initClass();
 
+	static void cacheFaceInVRAM(const LLVolumeFace& vf);
+
 public:
 	LLFace(LLDrawable* drawablep, LLViewerObject* objp)   { init(drawablep, objp); }
 	~LLFace()  { destroy(); }
diff --git a/indra/newview/llfloatermodelpreview.cpp b/indra/newview/llfloatermodelpreview.cpp
index 7448f2bb2a9fc5ade568d00f43c4ba45de0a393f..b362fa340c539c8bec84770501b6d75d9eb9ccb5 100755
--- a/indra/newview/llfloatermodelpreview.cpp
+++ b/indra/newview/llfloatermodelpreview.cpp
@@ -540,7 +540,7 @@ LLFloaterModelPreview::~LLFloaterModelPreview()
 
 	if (mGLName)
 	{
-		LLImageGL::deleteTextures(1, &mGLName );
+		LLImageGL::deleteTextures(LLTexUnit::TT_TEXTURE, 1, &mGLName );
 	}
 
 	delete mStatusLock;
diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp
index e886f83671be3bebec59921cd420615418a42f3e..06604fba6ff267658ea71455d1b59dc01341e925 100644
--- a/indra/newview/llspatialpartition.cpp
+++ b/indra/newview/llspatialpartition.cpp
@@ -85,12 +85,32 @@ static F32 sCurMaxTexPriority = 1.f;
 
 class LLOcclusionQueryPool : public LLGLNamePool
 {
+public:
+	LLOcclusionQueryPool()
+	{
+		mCurQuery = 1;
+	}
+
 protected:
+
+	std::list<GLuint> mAvailableName;
+	GLuint mCurQuery;
+		
 	virtual GLuint allocateName()
 	{
-		GLuint name;
-		glGenQueriesARB(1, &name);
-		return name;
+		GLuint ret = 0;
+
+		if (!mAvailableName.empty())
+		{
+			ret = mAvailableName.front();
+			mAvailableName.pop_front();
+		}
+		else
+		{
+			ret = mCurQuery++;
+		}
+
+		return ret;
 	}
 
 	virtual void releaseName(GLuint name)
@@ -98,7 +118,8 @@ class LLOcclusionQueryPool : public LLGLNamePool
 #if LL_TRACK_PENDING_OCCLUSION_QUERIES
 		LLSpatialGroup::sPendingQueries.erase(name);
 #endif
-		glDeleteQueriesARB(1, &name);
+		llassert(std::find(mAvailableName.begin(), mAvailableName.end(), name) == mAvailableName.end());
+		mAvailableName.push_back(name);
 	}
 };
 
@@ -687,6 +708,11 @@ void LLSpatialGroup::rebuildGeom()
 	if (!isDead())
 	{
 		mSpatialPartition->rebuildGeom(this);
+
+		if (isState(LLSpatialGroup::MESH_DIRTY))
+		{
+			gPipeline.markMeshDirty(this);
+		}
 	}
 }
 
@@ -1587,7 +1613,7 @@ BOOL LLSpatialGroup::rebound()
 }
 
 static LLFastTimer::DeclareTimer FTM_OCCLUSION_READBACK("Readback Occlusion");
-static LLFastTimer::DeclareTimer FTM_OCCLUSION_WAIT("Wait");
+static LLFastTimer::DeclareTimer FTM_OCCLUSION_WAIT("Occlusion Wait");
 
 void LLSpatialGroup::checkOcclusion()
 {
@@ -1607,7 +1633,9 @@ void LLSpatialGroup::checkOcclusion()
 			{
 				glGetQueryObjectuivARB(mOcclusionQuery[LLViewerCamera::sCurCameraID], GL_QUERY_RESULT_AVAILABLE_ARB, &available);
 
-				if (mOcclusionIssued[LLViewerCamera::sCurCameraID] < gFrameCount)
+				static LLCachedControl<bool> wait_for_query(gSavedSettings, "RenderSynchronousOcclusion");
+
+				if (wait_for_query && mOcclusionIssued[LLViewerCamera::sCurCameraID] < gFrameCount)
 				{ //query was issued last frame, wait until it's available
 					S32 max_loop = 1024;
 					LLFastTimer t(FTM_OCCLUSION_WAIT);
diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp
index 001d8e184a4122c90f0d1f791fa2d80922a11fc0..99ed04d1affa1b64cca0c6676498a48ed27e6473 100644
--- a/indra/newview/llviewerdisplay.cpp
+++ b/indra/newview/llviewerdisplay.cpp
@@ -755,12 +755,12 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)
 				gTextureList.updateImages(max_image_decode_time);
 			}
 
-			{
+			/*{
 				LLFastTimer t(FTM_IMAGE_UPDATE_DELETE);
 				//remove dead textures from GL
 				LLImageGL::deleteDeadTextures();
 				stop_glerror();
-			}
+			}*/
 		}
 
 		LLGLState::checkStates();
diff --git a/indra/newview/llviewershadermgr.cpp b/indra/newview/llviewershadermgr.cpp
index 0473e2b7c05b34983a84ab9c793e4a5d2cbee7e6..485e3af62da4755270b5d4f9f5e94c2202264d7c 100644
--- a/indra/newview/llviewershadermgr.cpp
+++ b/indra/newview/llviewershadermgr.cpp
@@ -63,6 +63,13 @@ bool				LLViewerShaderMgr::sSkipReload = false;
 
 LLVector4			gShinyOrigin;
 
+//transform shaders
+LLGLSLShader			gTransformPositionProgram;
+LLGLSLShader			gTransformTexCoordProgram;
+LLGLSLShader			gTransformNormalProgram;
+LLGLSLShader			gTransformColorProgram;
+LLGLSLShader			gTransformBinormalProgram;
+
 //utility shaders
 LLGLSLShader	gOcclusionProgram;
 LLGLSLShader	gCustomAlphaProgram;
@@ -438,7 +445,8 @@ void LLViewerShaderMgr::setShaders()
 		S32 wl_class = 2;
 		S32 water_class = 2;
 		S32 deferred_class = 0;
-		
+		S32 transform_class = gGLManager.mHasTransformFeedback ? 1 : 0;
+
 		if (LLFeatureManager::getInstance()->isFeatureAvailable("RenderDeferred") &&
 		    gSavedSettings.getBOOL("RenderDeferred") &&
 			gSavedSettings.getBOOL("RenderAvatarVP") &&
@@ -476,6 +484,7 @@ void LLViewerShaderMgr::setShaders()
 			gSky.mVOSkyp->forceSkyUpdate();
 		}
 
+		
 		// Load lighting shaders
 		mVertexShaderLevel[SHADER_LIGHTING] = light_class;
 		mVertexShaderLevel[SHADER_INTERFACE] = light_class;
@@ -485,6 +494,7 @@ void LLViewerShaderMgr::setShaders()
 		mVertexShaderLevel[SHADER_EFFECT] = effect_class;
 		mVertexShaderLevel[SHADER_WINDLIGHT] = wl_class;
 		mVertexShaderLevel[SHADER_DEFERRED] = deferred_class;
+		mVertexShaderLevel[SHADER_TRANSFORM] = transform_class;
 
 		BOOL loaded = loadBasicShaders();
 
@@ -516,6 +526,11 @@ void LLViewerShaderMgr::setShaders()
 				loaded = loadShadersInterface();
 			}
 			
+			if (loaded)
+			{
+				loaded = loadTransformShaders();
+			}
+
 			if (loaded)
 			{
 				// Load max avatar shaders to set the max level
@@ -733,6 +748,12 @@ void LLViewerShaderMgr::unloadShaders()
 	gDeferredSkinnedBumpProgram.unload();
 	gDeferredSkinnedAlphaProgram.unload();
 
+	gTransformPositionProgram.unload();
+	gTransformTexCoordProgram.unload();
+	gTransformNormalProgram.unload();
+	gTransformColorProgram.unload();
+	gTransformBinormalProgram.unload();
+
 	mVertexShaderLevel[SHADER_LIGHTING] = 0;
 	mVertexShaderLevel[SHADER_OBJECT] = 0;
 	mVertexShaderLevel[SHADER_AVATAR] = 0;
@@ -741,6 +762,7 @@ void LLViewerShaderMgr::unloadShaders()
 	mVertexShaderLevel[SHADER_INTERFACE] = 0;
 	mVertexShaderLevel[SHADER_EFFECT] = 0;
 	mVertexShaderLevel[SHADER_WINDLIGHT] = 0;
+	mVertexShaderLevel[SHADER_TRANSFORM] = 0;
 
 	gPipeline.mVertexShadersLoaded = 0;
 }
@@ -2763,6 +2785,95 @@ BOOL LLViewerShaderMgr::loadShadersWindLight()
 	return success;
 }
 
+BOOL LLViewerShaderMgr::loadTransformShaders()
+{
+	BOOL success = TRUE;
+	
+	if (mVertexShaderLevel[SHADER_TRANSFORM] < 1)
+	{
+		gTransformPositionProgram.unload();
+		gTransformTexCoordProgram.unload();
+		gTransformNormalProgram.unload();
+		gTransformColorProgram.unload();
+		gTransformBinormalProgram.unload();
+		return TRUE;
+	}
+
+	if (success)
+	{
+		gTransformPositionProgram.mName = "Position Transform Shader";
+		gTransformPositionProgram.mShaderFiles.clear();
+		gTransformPositionProgram.mShaderFiles.push_back(make_pair("transform/positionV.glsl", GL_VERTEX_SHADER_ARB));
+		gTransformPositionProgram.mShaderLevel = mVertexShaderLevel[SHADER_TRANSFORM];
+
+		const char* varyings[] = {
+			"position_out",
+			"texture_index_out",
+		};
+	
+		success = gTransformPositionProgram.createShader(NULL, NULL, 2, varyings);
+	}
+
+	if (success)
+	{
+		gTransformTexCoordProgram.mName = "TexCoord Transform Shader";
+		gTransformTexCoordProgram.mShaderFiles.clear();
+		gTransformTexCoordProgram.mShaderFiles.push_back(make_pair("transform/texcoordV.glsl", GL_VERTEX_SHADER_ARB));
+		gTransformTexCoordProgram.mShaderLevel = mVertexShaderLevel[SHADER_TRANSFORM];
+
+		const char* varyings[] = {
+			"texcoord_out",
+		};
+	
+		success = gTransformTexCoordProgram.createShader(NULL, NULL, 1, varyings);
+	}
+
+	if (success)
+	{
+		gTransformNormalProgram.mName = "Normal Transform Shader";
+		gTransformNormalProgram.mShaderFiles.clear();
+		gTransformNormalProgram.mShaderFiles.push_back(make_pair("transform/normalV.glsl", GL_VERTEX_SHADER_ARB));
+		gTransformNormalProgram.mShaderLevel = mVertexShaderLevel[SHADER_TRANSFORM];
+
+		const char* varyings[] = {
+			"normal_out",
+		};
+	
+		success = gTransformNormalProgram.createShader(NULL, NULL, 1, varyings);
+	}
+
+	if (success)
+	{
+		gTransformColorProgram.mName = "Color Transform Shader";
+		gTransformColorProgram.mShaderFiles.clear();
+		gTransformColorProgram.mShaderFiles.push_back(make_pair("transform/colorV.glsl", GL_VERTEX_SHADER_ARB));
+		gTransformColorProgram.mShaderLevel = mVertexShaderLevel[SHADER_TRANSFORM];
+
+		const char* varyings[] = {
+			"color_out",
+		};
+	
+		success = gTransformColorProgram.createShader(NULL, NULL, 1, varyings);
+	}
+
+	if (success)
+	{
+		gTransformBinormalProgram.mName = "Binormal Transform Shader";
+		gTransformBinormalProgram.mShaderFiles.clear();
+		gTransformBinormalProgram.mShaderFiles.push_back(make_pair("transform/binormalV.glsl", GL_VERTEX_SHADER_ARB));
+		gTransformBinormalProgram.mShaderLevel = mVertexShaderLevel[SHADER_TRANSFORM];
+
+		const char* varyings[] = {
+			"binormal_out",
+		};
+	
+		success = gTransformBinormalProgram.createShader(NULL, NULL, 1, varyings);
+	}
+
+	
+	return success;
+}
+
 std::string LLViewerShaderMgr::getShaderDirPrefix(void)
 {
 	return gDirUtilp->getExpandedFilename(LL_PATH_APP_SETTINGS, "shaders/class");
diff --git a/indra/newview/llviewershadermgr.h b/indra/newview/llviewershadermgr.h
index d10aba85c7c3bbcd2a028bcee85fffa057f85407..03d686e07ecd64ef5aa45be576b13508a35e09cc 100644
--- a/indra/newview/llviewershadermgr.h
+++ b/indra/newview/llviewershadermgr.h
@@ -54,6 +54,7 @@ class LLViewerShaderMgr: public LLShaderMgr
 	BOOL loadShadersWater();
 	BOOL loadShadersInterface();
 	BOOL loadShadersWindLight();
+	BOOL loadTransformShaders();
 
 	std::vector<S32> mVertexShaderLevel;
 	S32	mMaxAvatarShaderLevel;
@@ -69,6 +70,7 @@ class LLViewerShaderMgr: public LLShaderMgr
 		SHADER_WINDLIGHT,
 		SHADER_WATER,
 		SHADER_DEFERRED,
+		SHADER_TRANSFORM,
 		SHADER_COUNT
 	};
 
@@ -209,6 +211,15 @@ inline bool operator != (LLViewerShaderMgr::shader_iter const & a, LLViewerShade
 
 extern LLVector4			gShinyOrigin;
 
+//transform shaders
+extern LLGLSLShader			gTransformPositionProgram;
+extern LLGLSLShader			gTransformTexCoordProgram;
+extern LLGLSLShader			gTransformNormalProgram;
+extern LLGLSLShader			gTransformColorProgram;
+extern LLGLSLShader			gTransformBinormalProgram;
+
+
+
 //utility shaders
 extern LLGLSLShader			gOcclusionProgram;
 extern LLGLSLShader			gCustomAlphaProgram;
diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp
index c221c7fdd8bcc4fae8c34f330916d9f0974949f5..083ad622cd44e6b7ff028bef9d93d0d4a12b2fbe 100644
--- a/indra/newview/llvoavatar.cpp
+++ b/indra/newview/llvoavatar.cpp
@@ -896,7 +896,7 @@ void LLVOAvatar::deleteLayerSetCaches(bool clearAll)
 		}
 		if (mBakedTextureDatas[i].mMaskTexName)
 		{
-			glDeleteTextures(1, (GLuint*)&(mBakedTextureDatas[i].mMaskTexName));
+			LLImageGL::deleteTextures(LLTexUnit::TT_TEXTURE, 1, (GLuint*)&(mBakedTextureDatas[i].mMaskTexName));
 			mBakedTextureDatas[i].mMaskTexName = 0 ;
 		}
 	}
@@ -7394,7 +7394,7 @@ void LLVOAvatar::onBakedTextureMasksLoaded( BOOL success, LLViewerFetchedTexture
 			}
 
 			U32 gl_name;
-			LLImageGL::generateTextures(1, &gl_name );
+			LLImageGL::generateTextures(LLTexUnit::TT_TEXTURE, 1, &gl_name );
 			stop_glerror();
 
 			gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, gl_name);
@@ -7431,7 +7431,7 @@ void LLVOAvatar::onBakedTextureMasksLoaded( BOOL success, LLViewerFetchedTexture
 						maskData->mLastDiscardLevel = discard_level;
 						if (self->mBakedTextureDatas[baked_index].mMaskTexName)
 						{
-							LLImageGL::deleteTextures(1, &(self->mBakedTextureDatas[baked_index].mMaskTexName));
+							LLImageGL::deleteTextures(LLTexUnit::TT_TEXTURE, 1, &(self->mBakedTextureDatas[baked_index].mMaskTexName));
 						}
 						self->mBakedTextureDatas[baked_index].mMaskTexName = gl_name;
 						found_texture_id = true;
diff --git a/indra/newview/llvoavatarself.cpp b/indra/newview/llvoavatarself.cpp
index f063653cc5555ab84f3ee05f2317e5a1d00b67b0..b6d4d6b56f46dd4f069a2ce871d244ecdcefde29 100644
--- a/indra/newview/llvoavatarself.cpp
+++ b/indra/newview/llvoavatarself.cpp
@@ -2605,7 +2605,7 @@ void LLVOAvatarSelf::deleteScratchTextures()
 		 namep; 
 		 namep = sScratchTexNames.getNextData() )
 	{
-		LLImageGL::deleteTextures(1, (U32 *)namep );
+		LLImageGL::deleteTextures(LLTexUnit::TT_TEXTURE, 1, (U32 *)namep );
 		stop_glerror();
 	}
 
diff --git a/indra/newview/llvopartgroup.cpp b/indra/newview/llvopartgroup.cpp
index b6adc776ccd4cb511a8c6572a246f7782525641d..fca97987a2b88f9fbebde4fc7307638446283114 100644
--- a/indra/newview/llvopartgroup.cpp
+++ b/indra/newview/llvopartgroup.cpp
@@ -125,7 +125,7 @@ S32 LLVOPartGroup::findAvailableVBSlot()
 void LLVOPartGroup::freeVBSlot(S32 idx)
 {
 	llassert(idx < LL_MAX_PARTICLE_COUNT && idx >= 0);
-	llassert(sVBSlotCursor > sVBSlotFree);
+	//llassert(sVBSlotCursor > sVBSlotFree);
 
 	if (sVBSlotCursor > sVBSlotFree)
 	{
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index 6d4565ec2fdca6a8342e4a5aa52a712e52fd92b1..6bb4e9e63b795c09f81e0af9071243920ad076ee 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -1089,9 +1089,33 @@ BOOL LLVOVolume::setVolume(const LLVolumeParams &params_in, const S32 detail, bo
 			}
 		}
 
+
+		static LLCachedControl<bool> use_transform_feedback(gSavedSettings, "RenderUseTransformFeedback");
+
+		bool cache_in_vram = use_transform_feedback && gTransformPositionProgram.mProgramObject &&
+			(!mVolumeImpl || !mVolumeImpl->isVolumeUnique());
+
+		if (cache_in_vram)
+		{ //this volume might be used as source data for a transform object, put it in vram
+			LLVolume* volume = getVolume();
+			for (S32 i = 0; i < volume->getNumFaces(); ++i)
+			{
+				const LLVolumeFace& face = volume->getVolumeFace(i);
+				if (face.mVertexBuffer.notNull())
+				{ //already cached
+					break;
+				}
+				volume->genBinormals(i);
+				LLFace::cacheFaceInVRAM(face);
+			}
+		}
+		
+
 		return TRUE;
 	}
 
+
+
 	return FALSE;
 }
 
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index a691302cb98f03ca8a97485164a7ec4a72f0cb0c..a76a32f834e91dd1d0db90ff3ed5dba7047b64a1 100644
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -51,6 +51,10 @@
 // newview includes
 #include "llagent.h"
 #include "llagentcamera.h"
+#include "llappviewer.h"
+#include "lltexturecache.h"
+#include "lltexturefetch.h"
+#include "llimageworker.h"
 #include "lldrawable.h"
 #include "lldrawpoolalpha.h"
 #include "lldrawpoolavatar.h"
@@ -403,9 +407,11 @@ LLPipeline::LLPipeline() :
 	mInitialized(FALSE),
 	mVertexShadersEnabled(FALSE),
 	mVertexShadersLoaded(0),
+	mTransformFeedbackPrimitives(0),
 	mRenderDebugFeatureMask(0),
 	mRenderDebugMask(0),
 	mOldRenderDebugMask(0),
+	mMeshDirtyQueryObject(0),
 	mGroupQ1Locked(false),
 	mGroupQ2Locked(false),
 	mResetVertexBuffers(false),
@@ -693,6 +699,12 @@ void LLPipeline::destroyGL()
 	{
 		LLVertexBuffer::sEnableVBOs = FALSE;
 	}
+
+	if (mMeshDirtyQueryObject)
+	{
+		glDeleteQueriesARB(1, &mMeshDirtyQueryObject);
+		mMeshDirtyQueryObject = 0;
+	}
 }
 
 static LLFastTimer::DeclareTimer FTM_RESIZE_SCREEN_TEXTURE("Resize Screen Texture");
@@ -1028,19 +1040,19 @@ void LLPipeline::releaseGLBuffers()
 	
 	if (mNoiseMap)
 	{
-		LLImageGL::deleteTextures(1, &mNoiseMap);
+		LLImageGL::deleteTextures(LLTexUnit::TT_TEXTURE, 1, &mNoiseMap);
 		mNoiseMap = 0;
 	}
 
 	if (mTrueNoiseMap)
 	{
-		LLImageGL::deleteTextures(1, &mTrueNoiseMap);
+		LLImageGL::deleteTextures(LLTexUnit::TT_TEXTURE, 1, &mTrueNoiseMap);
 		mTrueNoiseMap = 0;
 	}
 
 	if (mLightFunc)
 	{
-		LLImageGL::deleteTextures(1, &mLightFunc);
+		LLImageGL::deleteTextures(LLTexUnit::TT_TEXTURE, 1, &mLightFunc);
 		mLightFunc = 0;
 	}
 
@@ -1131,7 +1143,7 @@ void LLPipeline::createGLBuffers()
 				noise[i].mV[2] = ll_frand()*scaler+1.f-scaler/2.f;
 			}
 
-			LLImageGL::generateTextures(1, &mNoiseMap);
+			LLImageGL::generateTextures(LLTexUnit::TT_TEXTURE, 1, &mNoiseMap);
 			
 			gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, mNoiseMap);
 			LLImageGL::setManualImage(LLTexUnit::getInternalType(LLTexUnit::TT_TEXTURE), 0, GL_RGB16F_ARB, noiseRes, noiseRes, GL_RGB, GL_FLOAT, noise, false);
@@ -1147,7 +1159,7 @@ void LLPipeline::createGLBuffers()
 				noise[i] = ll_frand()*2.0-1.0;
 			}
 
-			LLImageGL::generateTextures(1, &mTrueNoiseMap);
+			LLImageGL::generateTextures(LLTexUnit::TT_TEXTURE, 1, &mTrueNoiseMap);
 			gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, mTrueNoiseMap);
 			LLImageGL::setManualImage(LLTexUnit::getInternalType(LLTexUnit::TT_TEXTURE), 0, GL_RGB16F_ARB, noiseRes, noiseRes, GL_RGB,GL_FLOAT, noise, false);
 			gGL.getTexUnit(0)->setTextureFilteringOption(LLTexUnit::TFO_POINT);
@@ -1183,7 +1195,7 @@ void LLPipeline::createGLBuffers()
 				}
 			}
 
-			LLImageGL::generateTextures(1, &mLightFunc);
+			LLImageGL::generateTextures(LLTexUnit::TT_TEXTURE, 1, &mLightFunc);
 			gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, mLightFunc);
 			LLImageGL::setManualImage(LLTexUnit::getInternalType(LLTexUnit::TT_TEXTURE), 0, GL_R8, lightResX, lightResY, GL_RED, GL_UNSIGNED_BYTE, lg, false);
 			gGL.getTexUnit(0)->setTextureAddressMode(LLTexUnit::TAM_CLAMP);
@@ -2829,6 +2841,11 @@ void LLPipeline::processPartitionQ()
 	mPartitionQ.clear();
 }
 
+void LLPipeline::markMeshDirty(LLSpatialGroup* group)
+{
+	mMeshDirtyGroup.push_back(group);
+}
+
 void LLPipeline::markRebuild(LLSpatialGroup* group, BOOL priority)
 {
 	LLMemType mt(LLMemType::MTYPE_PIPELINE);
@@ -3437,15 +3454,43 @@ void LLPipeline::postSort(LLCamera& camera)
 			}
 		}
 	}
+	
+	//flush particle VB
+	LLVOPartGroup::sVB->flush();
+
+	/*bool use_transform_feedback = gTransformPositionProgram.mProgramObject && !mMeshDirtyGroup.empty();
+
+	if (use_transform_feedback)
+	{ //place a query around potential transform feedback code for synchronization
+		mTransformFeedbackPrimitives = 0;
+
+		if (!mMeshDirtyQueryObject)
+		{
+			glGenQueriesARB(1, &mMeshDirtyQueryObject);
+		}
+
 		
+		glBeginQueryARB(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN, mMeshDirtyQueryObject);
+	}*/
+
+	//pack vertex buffers for groups that chose to delay their updates
+	for (LLSpatialGroup::sg_vector_t::iterator iter = mMeshDirtyGroup.begin(); iter != mMeshDirtyGroup.end(); ++iter)
+	{
+		(*iter)->rebuildMesh();
+	}
+
+	/*if (use_transform_feedback)
+	{
+		glEndQueryARB(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN);
+	}*/
+	
+	mMeshDirtyGroup.clear();
+
 	if (!sShadowRender)
 	{
 		std::sort(sCull->beginAlphaGroups(), sCull->endAlphaGroups(), LLSpatialGroup::CompareDepthGreater());
 	}
 
-	//flush particle VB
-	LLVOPartGroup::sVB->flush();
-
 	llpushcallstacks ;
 	// only render if the flag is set. The flag is only set if we are in edit mode or the toggle is set in the menus
 	if (LLFloaterReg::instanceVisible("beacons") && !sShadowRender)
@@ -3532,6 +3577,33 @@ void LLPipeline::postSort(LLCamera& camera)
 		}
 	}
 
+	/*static LLFastTimer::DeclareTimer FTM_TRANSFORM_WAIT("Transform Fence");
+	static LLFastTimer::DeclareTimer FTM_TRANSFORM_DO_WORK("Transform Work");
+	if (use_transform_feedback)
+	{ //using transform feedback, wait for transform feedback to complete
+		LLFastTimer t(FTM_TRANSFORM_WAIT);
+
+		S32 done = 0;
+		//glGetQueryivARB(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN, GL_CURRENT_QUERY, &count);
+		
+		glGetQueryObjectivARB(mMeshDirtyQueryObject, GL_QUERY_RESULT_AVAILABLE, &done);
+		
+		while (!done)
+		{ 
+			{
+				LLFastTimer t(FTM_TRANSFORM_DO_WORK);
+				F32 max_time = llmin(gFrameIntervalSeconds*10.f, 1.f);
+				//do some useful work while we wait
+				LLAppViewer::getTextureCache()->update(max_time); // unpauses the texture cache thread
+				LLAppViewer::getImageDecodeThread()->update(max_time); // unpauses the image thread
+				LLAppViewer::getTextureFetch()->update(max_time); // unpauses the texture fetch thread
+			}
+			glGetQueryObjectivARB(mMeshDirtyQueryObject, GL_QUERY_RESULT_AVAILABLE, &done);
+		}
+
+		mTransformFeedbackPrimitives = 0;
+	}*/
+						
 	//LLSpatialGroup::sNoDelete = FALSE;
 	llpushcallstacks ;
 }
diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h
index 252fe1346ce026bc0fb205080497049b98b96d38..9eebe3831fa7842302e109adbbf4bf7ef897fb15 100644
--- a/indra/newview/pipeline.h
+++ b/indra/newview/pipeline.h
@@ -161,6 +161,7 @@ class LLPipeline
 	void		markRebuild(LLSpatialGroup* group, BOOL priority = FALSE);
 	void        markRebuild(LLDrawable *drawablep, LLDrawable::EDrawableFlags flag = LLDrawable::REBUILD_ALL, BOOL priority = FALSE);
 	void		markPartitionMove(LLDrawable* drawablep);
+	void		markMeshDirty(LLSpatialGroup* group);
 
 	//get the object between start and end that's closest to start.
 	LLViewerObject* lineSegmentIntersectInWorld(const LLVector3& start, const LLVector3& end,
@@ -592,6 +593,7 @@ class LLPipeline
 	BOOL					mVertexShadersEnabled;
 	S32						mVertexShadersLoaded; // 0 = no, 1 = yes, -1 = failed
 
+	U32						mTransformFeedbackPrimitives; //number of primitives expected to be generated by transform feedback
 protected:
 	BOOL					mRenderTypeEnabled[NUM_RENDER_TYPES];
 	std::stack<std::string> mRenderTypeEnableStack;
@@ -649,6 +651,9 @@ class LLPipeline
 	LLSpatialGroup::sg_vector_t		mGroupQ1; //priority
 	LLSpatialGroup::sg_vector_t		mGroupQ2; // non-priority
 
+	LLSpatialGroup::sg_vector_t		mMeshDirtyGroup; //groups that need rebuildMesh called
+	U32 mMeshDirtyQueryObject;
+
 	LLDrawable::drawable_list_t		mPartitionQ; //drawables that need to update their spatial partition radius 
 
 	bool mGroupQ2Locked;