diff --git a/indra/llrender/llrender.cpp b/indra/llrender/llrender.cpp
index 8ab8d3151b749db13203959be5bc16d0a9542fb2..7c974934aec142a1cac356c613d3a15243e969ec 100644
--- a/indra/llrender/llrender.cpp
+++ b/indra/llrender/llrender.cpp
@@ -35,6 +35,7 @@
 #include "llrendertarget.h"
 #include "lltexture.h"
 #include "llshadermgr.h"
+#include "llmd5.h"
 
 #if LL_WINDOWS
 extern void APIENTRY gl_debug_callback(GLenum source,
@@ -66,6 +67,14 @@ LLVector2 LLRender::sUIGLScaleFactor = LLVector2(1.f, 1.f);
 static const U32 LL_NUM_TEXTURE_LAYERS = 32; 
 static const U32 LL_NUM_LIGHT_UNITS = 8;
 
+struct LLVBCache
+{
+    LLPointer<LLVertexBuffer> vb;
+    std::chrono::steady_clock::time_point touched;
+};
+
+static std::unordered_map<std::size_t, LLVBCache> sVBCache;
+
 static const GLenum sGLTextureType[] =
 {
 	GL_TEXTURE_2D,
@@ -1635,24 +1644,105 @@ void LLRender::flush()
 
         if (mBuffer)
         {
-            if (mBuffer->useVBOs() && !mBuffer->isLocked())
-            { //hack to only flush the part of the buffer that was updated (relies on stream draw using buffersubdata)
-                mBuffer->getVertexStrider(mVerticesp, 0, count);
-                mBuffer->getTexCoord0Strider(mTexcoordsp, 0, count);
-                mBuffer->getColorStrider(mColorsp, 0, count);
+
+            LLMD5 hash;
+            U32 attribute_mask = LLGLSLShader::sCurBoundShaderPtr->mAttributeMask;
+
+            {
+                LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb cache hash");
+
+                hash.update((U8*)mVerticesp.get(), count * sizeof(LLVector4a));
+                if (attribute_mask & LLVertexBuffer::MAP_TEXCOORD0)
+                {
+                    hash.update((U8*)mTexcoordsp.get(), count * sizeof(LLVector2));
+                }
+
+                if (attribute_mask & LLVertexBuffer::MAP_COLOR)
+                {
+                    hash.update((U8*)mColorsp.get(), count * sizeof(LLColor4U));
+                }
+
+                hash.finalize();
+            }
+            
+            size_t vhash[2];
+            hash.raw_digest((unsigned char*) vhash);
+
+            // check the VB cache before making a new vertex buffer
+            // This is a giant hack to deal with (mostly) our terrible UI rendering code
+            // that was built on top of OpenGL immediate mode.  Huge performance wins
+            // can be had by not uploading geometry to VRAM unless absolutely necessary.
+            // Most of our usage of the "immediate mode" style draw calls is actually
+            // sending the same geometry over and over again.
+            // To leverage this, we maintain a running hash of the vertex stream being
+            // built up before a flush, and then check that hash against a VB 
+            // cache just before creating a vertex buffer in VRAM
+            auto& cache = sVBCache.find(vhash[0]);
+
+            LLPointer<LLVertexBuffer> vb;
+
+            if (cache != sVBCache.end())
+            {
+                LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb cache hit");
+                // cache hit, just use the cached buffer
+                vb = cache->second.vb;
+                cache->second.touched = std::chrono::steady_clock::now();
+            }
+            else
+            {
+                LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb cache miss");
+                vb = new LLVertexBuffer(attribute_mask, GL_STATIC_DRAW);
+                vb->allocateBuffer(count, 0, true);
+                vb->setPositionData((LLVector4a*) mVerticesp.get());
+
+                if (attribute_mask & LLVertexBuffer::MAP_TEXCOORD0)
+                {
+                    vb->setTexCoordData(mTexcoordsp.get());
+                }
+
+                if (attribute_mask & LLVertexBuffer::MAP_COLOR)
+                {
+                    vb->setColorData(mColorsp.get());
+                }
+
+                vb->unbind();
+
+                sVBCache[vhash[0]] = { vb , std::chrono::steady_clock::now() };
+
+                static U32 miss_count = 0;
+                miss_count++;
+                if (miss_count > 1024)
+                {
+                    LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb cache clean");
+                    miss_count = 0;
+                    auto now = std::chrono::steady_clock::now();
+
+                    using namespace std::chrono_literals;
+                    // every 1024 misses, clean the cache of any VBs that haven't been touched in the last second
+                    for (auto& iter = sVBCache.begin(); iter != sVBCache.end(); )
+                    {
+                        if (now - iter->second.touched > 1s)
+                        {
+                            iter = sVBCache.erase(iter);
+                        }
+                        else
+                        {
+                            ++iter;
+                        }
+                    }
+                }
             }
 
-            mBuffer->flush();
-            mBuffer->setBuffer(immediate_mask);
+            vb->setBuffer(immediate_mask);
 
             if (mMode == LLRender::QUADS && sGLCoreProfile)
             {
-                mBuffer->drawArrays(LLRender::TRIANGLES, 0, count);
+                vb->drawArrays(LLRender::TRIANGLES, 0, count);
                 mQuadCycle = 1;
             }
             else
             {
-                mBuffer->drawArrays(mMode, 0, count);
+                vb->drawArrays(mMode, 0, count);
             }
         }
         else
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 20261dcb8a2f3622e04ed6712ab2c5f051ba1ba0..57be21cf6e62b1e435f9a98597fc43ea3179c689 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -1180,7 +1180,6 @@ bool expand_region(LLVertexBuffer::MappedRegion& region, S32 start, S32 end)
 U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
-	bindGLBuffer(true);
 	if (mFinal)
 	{
 		LL_ERRS() << "LLVertexBuffer::mapVeretxBuffer() called on a finalized buffer." << LL_ENDL;
@@ -1256,7 +1255,6 @@ U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_ran
 U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
-	bindGLIndices(true);
 	if (mFinal)
 	{
 		LL_ERRS() << "LLVertexBuffer::mapIndexBuffer() called on a finalized buffer." << LL_ENDL;
@@ -2070,6 +2068,24 @@ void LLVertexBuffer::setupVertexBufferFast(U32 data_mask)
         void* ptr = (void*)(base + mOffsets[TYPE_VERTEX]);
         glVertexAttribPointer(loc, 3, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_VERTEX], ptr);
     }
-	}
+}
+
+void LLVertexBuffer::setPositionData(const LLVector4a* data)
+{
+    bindGLBuffer();
+    flush_vbo(GL_ARRAY_BUFFER, 0, sizeof(LLVector4a) * getNumVerts(), (U8*) data);
+}
+
+void LLVertexBuffer::setTexCoordData(const LLVector2* data)
+{
+    bindGLBuffer();
+    flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TEXCOORD0], mOffsets[TYPE_TEXCOORD0] + sTypeSize[TYPE_TEXCOORD0] * getNumVerts(), (U8*)data);
+}
+
+void LLVertexBuffer::setColorData(const LLColor4U* data)
+{
+    bindGLBuffer();
+    flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_COLOR], mOffsets[TYPE_COLOR] + sTypeSize[TYPE_COLOR] * getNumVerts(), (U8*) data);
+}
 
 
diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h
index 74b951884ddad7e8d360960d899f6b8b7932b4ec..926d37b0523278eff7b2d38b5f8f379c6952ddd2 100644
--- a/indra/llrender/llvertexbuffer.h
+++ b/indra/llrender/llvertexbuffer.h
@@ -212,6 +212,10 @@ class LLVertexBuffer : public LLRefCount
     bool getMetallicRoughnessTexcoordStrider(LLStrider<LLVector2>& strider, S32 index=0, S32 count = -1, bool map_range = false);
     bool getEmissiveTexcoordStrider(LLStrider<LLVector2>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	
+    void setPositionData(const LLVector4a* data);
+    void setTexCoordData(const LLVector2* data);
+    void setColorData(const LLColor4U* data);
+
 
 	bool useVBOs() const;
 	bool isEmpty() const					{ return mEmpty; }