diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp
index 59c63d846563ab9dfe4998cac101ea4a2c24cb97..189a460001d530241712542188c74f4baf015d59 100644
--- a/indra/llrender/llgl.cpp
+++ b/indra/llrender/llgl.cpp
@@ -650,7 +650,7 @@ bool LLGLManager::initGL()
 
 	if (LLRender::sGLCoreProfile)
 	{
-		mNumTextureUnits = mNumTextureImageUnits;
+		mNumTextureUnits = llmin(mNumTextureImageUnits, MAX_GL_TEXTURE_UNITS);
 	}
 	else if (mHasMultitexture)
 	{
diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp
index 12089e5ad3d0560acad83e6008e2db64cd0a79e0..cbdb8f83f68b9f2743cf5680c46bf782cd47abf6 100644
--- a/indra/llrender/llimagegl.cpp
+++ b/indra/llrender/llimagegl.cpp
@@ -725,9 +725,12 @@ void LLImageGL::setImage(const U8* data_in, BOOL data_hasmips)
 		}
 		else if (!is_compressed)
 		{
-			if (mAutoGenMips && !LLRender::sGLCoreProfile) //auto-generating mipmaps is deprecated in GL 3.0
+			if (mAutoGenMips)
 			{
-				glTexParameteri(LLTexUnit::getInternalType(mBindTarget), GL_GENERATE_MIPMAP_SGIS, TRUE);
+				if (!glGenerateMipmap)
+				{
+					glTexParameteri(LLTexUnit::getInternalType(mBindTarget), GL_GENERATE_MIPMAP_SGIS, TRUE);
+				}
 				stop_glerror();
 				{
 // 					LLFastTimer t2(FTM_TEMP4);
@@ -756,6 +759,11 @@ void LLImageGL::setImage(const U8* data_in, BOOL data_hasmips)
 						stop_glerror();
 					}
 				}
+
+				if (glGenerateMipmap)
+				{
+					glGenerateMipmap(LLTexUnit::getInternalType(mBindTarget));
+				}
 			}
 			else
 			{
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 1b7b0cdf3e57f59d2e9174e9310e9c1c0ebf74f6..199699449adbd0b3de2c01719cd2f49a00694133 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -34,6 +34,7 @@
 #include "llmemtype.h"
 #include "llrender.h"
 #include "llvector4a.h"
+#include "llshadermgr.h"
 #include "llglslshader.h"
 #include "llmemory.h"
 
@@ -121,6 +122,7 @@ class LLGLSyncFence : public LLGLFence
 
 };
 
+//NOTE: each component must be AT LEAST 4 bytes in size to avoid a performance penalty on AMD hardware
 S32 LLVertexBuffer::sTypeSize[LLVertexBuffer::TYPE_MAX] =
 {
 	sizeof(LLVector4), // TYPE_VERTEX,
@@ -130,7 +132,7 @@ S32 LLVertexBuffer::sTypeSize[LLVertexBuffer::TYPE_MAX] =
 	sizeof(LLVector2), // TYPE_TEXCOORD2,
 	sizeof(LLVector2), // TYPE_TEXCOORD3,
 	sizeof(LLColor4U), // TYPE_COLOR,
-	sizeof(U8),		   // TYPE_EMISSIVE
+	sizeof(LLColor4U), // TYPE_EMISSIVE, only alpha is used currently
 	sizeof(LLVector4), // TYPE_BINORMAL,
 	sizeof(F32),	   // TYPE_WEIGHT,
 	sizeof(LLVector4), // TYPE_WEIGHT4,
@@ -1071,7 +1073,7 @@ void LLVertexBuffer::setupVertexArray()
 		2, //TYPE_TEXCOORD2,
 		2, //TYPE_TEXCOORD3,
 		4, //TYPE_COLOR,
-		1, //TYPE_EMISSIVE,
+		4, //TYPE_EMISSIVE,
 		3, //TYPE_BINORMAL,
 		1, //TYPE_WEIGHT,
 		4, //TYPE_WEIGHT4,
@@ -1842,9 +1844,9 @@ bool LLVertexBuffer::getColorStrider(LLStrider<LLColor4U>& strider, S32 index, S
 {
 	return VertexBufferStrider<LLColor4U,TYPE_COLOR>::get(*this, strider, index, count, map_range);
 }
-bool LLVertexBuffer::getEmissiveStrider(LLStrider<U8>& strider, S32 index, S32 count, bool map_range)
+bool LLVertexBuffer::getEmissiveStrider(LLStrider<LLColor4U>& strider, S32 index, S32 count, bool map_range)
 {
-	return VertexBufferStrider<U8,TYPE_EMISSIVE>::get(*this, strider, index, count, map_range);
+	return VertexBufferStrider<LLColor4U,TYPE_EMISSIVE>::get(*this, strider, index, count, map_range);
 }
 bool LLVertexBuffer::getWeightStrider(LLStrider<F32>& strider, S32 index, S32 count, bool map_range)
 {
@@ -1994,18 +1996,17 @@ void LLVertexBuffer::setBuffer(U32 data_mask)
 		if (shader)
 		{
 			U32 required_mask = 0;
-			for (U32 i = 0; i < LLVertexBuffer::TYPE_MAX; ++i)
+			for (U32 i = 0; i < LLVertexBuffer::TYPE_TEXTURE_INDEX; ++i)
 			{
 				if (shader->getAttribLocation(i) > -1)
 				{
 					U32 required = 1 << i;
 					if ((data_mask & required) == 0)
 					{
-						llwarns << "Missing attribute: " << i << llendl;
+						llwarns << "Missing attribute: " << LLShaderMgr::instance()->mReservedAttribs[i] << llendl;
 					}
 
 					required_mask |= required;
-
 				}
 			}
 
@@ -2186,7 +2187,7 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask)
 		{
 			S32 loc = TYPE_EMISSIVE;
 			void* ptr = (void*)(base + mOffsets[TYPE_EMISSIVE]);
-			glVertexAttribPointerARB(loc, 1, GL_UNSIGNED_BYTE, GL_TRUE, LLVertexBuffer::sTypeSize[TYPE_EMISSIVE], ptr);
+			glVertexAttribPointerARB(loc, 4, GL_UNSIGNED_BYTE, GL_TRUE, LLVertexBuffer::sTypeSize[TYPE_EMISSIVE], ptr);
 		}
 		if (data_mask & MAP_WEIGHT)
 		{
diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h
index d116a552faff27a4d5d3c4b0a47633ca534004e1..98cab8b1628d714a0681c5fed5433877d519ebf7 100644
--- a/indra/llrender/llvertexbuffer.h
+++ b/indra/llrender/llvertexbuffer.h
@@ -228,7 +228,7 @@ class LLVertexBuffer : public LLRefCount
 	bool getNormalStrider(LLStrider<LLVector3>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getBinormalStrider(LLStrider<LLVector3>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getColorStrider(LLStrider<LLColor4U>& strider, S32 index=0, S32 count = -1, bool map_range = false);
-	bool getEmissiveStrider(LLStrider<U8>& strider, S32 index=0, S32 count = -1, bool map_range = false);
+	bool getEmissiveStrider(LLStrider<LLColor4U>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getWeightStrider(LLStrider<F32>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getWeight4Strider(LLStrider<LLVector4>& strider, S32 index=0, S32 count = -1, bool map_range = false);
 	bool getClothWeightStrider(LLStrider<LLVector4>& strider, S32 index=0, S32 count = -1, bool map_range = false);
diff --git a/indra/newview/app_settings/shaders/class1/environment/waterFogF.glsl b/indra/newview/app_settings/shaders/class1/environment/waterFogF.glsl
index e4db326bede85aedd7cc8b24939d0725d9388521..57b3a6d001a1420204cd8824c6b167f465bfc6ab 100644
--- a/indra/newview/app_settings/shaders/class1/environment/waterFogF.glsl
+++ b/indra/newview/app_settings/shaders/class1/environment/waterFogF.glsl
@@ -26,6 +26,9 @@
 
 VARYING float fog_depth;
 
+uniform vec4 waterFogColor;
+uniform float waterFogEnd;
+
 vec4 applyWaterFog(vec4 color)
 {
 	// GL_EXP2 Fog
@@ -33,9 +36,9 @@ vec4 applyWaterFog(vec4 color)
 	// GL_EXP Fog
 	// float fog = exp(-gl_Fog.density * fog_depth);
 	// GL_LINEAR Fog
-	float fog = (gl_Fog.end - fog_depth) * gl_Fog.scale;
+	float fog = (waterFogEnd - fog_depth) * gl_Fog.scale;
 	fog = clamp(fog, 0.0, 1.0);
-	color.rgb = mix(gl_Fog.color.rgb, color.rgb, fog);
+	color.rgb = mix(waterFogColor.rgb, color.rgb, fog);
 	return color;
 }
 
diff --git a/indra/newview/lldrawpoolsky.cpp b/indra/newview/lldrawpoolsky.cpp
index 8a3871b6b423df18b477afdbeaed1bcd10034557..7f7d9f65c60c048a26d02438e278dd21893f190f 100644
--- a/indra/newview/lldrawpoolsky.cpp
+++ b/indra/newview/lldrawpoolsky.cpp
@@ -76,13 +76,14 @@ void LLDrawPoolSky::render(S32 pass)
 		return;
 	}
 	
-	// use a shader only underwater
+	// don't render sky under water (background just gets cleared to fog color)
 	if(mVertexShaderLevel > 0 && LLPipeline::sUnderWaterRender)
 	{
-		mShader = &gObjectFullbrightWaterProgram;
-		mShader->bind();
+		return;
 	}
-	else if (LLGLSLShader::sNoFixedFunction)
+
+
+	if (LLGLSLShader::sNoFixedFunction)
 	{ //just use the UI shader (generic single texture no lighting)
 		gOneTextureNoColorProgram.bind();
 	}
diff --git a/indra/newview/lldrawpoolwater.cpp b/indra/newview/lldrawpoolwater.cpp
index 5de0b8c7960b6417af932210cba53dfd6d9079ce..f6fe96877d261aad50f54a05ace23c491132088a 100644
--- a/indra/newview/lldrawpoolwater.cpp
+++ b/indra/newview/lldrawpoolwater.cpp
@@ -59,6 +59,8 @@ BOOL LLDrawPoolWater::sSkipScreenCopy = FALSE;
 BOOL LLDrawPoolWater::sNeedsReflectionUpdate = TRUE;
 BOOL LLDrawPoolWater::sNeedsDistortionUpdate = TRUE;
 LLColor4 LLDrawPoolWater::sWaterFogColor = LLColor4(0.2f, 0.5f, 0.5f, 0.f);
+F32 LLDrawPoolWater::sWaterFogEnd = 0.f;
+
 LLVector3 LLDrawPoolWater::sLightDir;
 
 LLDrawPoolWater::LLDrawPoolWater() :
diff --git a/indra/newview/lldrawpoolwater.h b/indra/newview/lldrawpoolwater.h
index 99b541ca5a384d1de939c9795e96b908f0265abd..aeeba179d6ca134c240ea7cf70ec4768a95cea20 100644
--- a/indra/newview/lldrawpoolwater.h
+++ b/indra/newview/lldrawpoolwater.h
@@ -49,6 +49,7 @@ class LLDrawPoolWater: public LLFacePool
 	static LLVector3 sLightDir;
 
 	static LLColor4 sWaterFogColor;
+	static F32 sWaterFogEnd;
 
 	enum
 	{
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index d36379b0e7546a756164113290fc43c37a837595..36b88ebbd47fc3dbcf825813d690a2515ef0c64c 100644
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -1764,7 +1764,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
 	if (rebuild_emissive)
 	{
 		LLFastTimer t(FTM_FACE_GEOM_EMISSIVE);
-		LLStrider<U8> emissive;
+		LLStrider<LLColor4U> emissive;
 		mVertexBuffer->getEmissiveStrider(emissive, mGeomIndex, mGeomCount, map_range);
 
 		U8 glow = (U8) llclamp((S32) (getTextureEntry()->getGlow()*255), 0, 255);
@@ -1783,8 +1783,8 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
 		src.loadua((F32*) vec);
 
 		LLVector4a* dst = (LLVector4a*) emissive.get();
-		S32 num_vecs = num_vertices/16;
-		if (num_vertices%16 > 0)
+		S32 num_vecs = num_vertices/4;
+		if (num_vertices%4 > 0)
 		{
 			++num_vecs;
 		}
diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp
index 4f3127805f804adb11bfaa3a96cdebd1281b4eee..efe93b7f48aa546b0fe86f535a1bde3d9a52409a 100644
--- a/indra/newview/llviewerdisplay.cpp
+++ b/indra/newview/llviewerdisplay.cpp
@@ -865,6 +865,11 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)
 			else
 			{
 				gPipeline.mScreen.bindTarget();
+				if (LLPipeline::sUnderWaterRender && !gPipeline.canUseWindLightShaders())
+				{
+					const LLColor4 &col = LLDrawPoolWater::sWaterFogColor;
+					glClearColor(col.mV[0], col.mV[1], col.mV[2], 0.f);
+				}
 				gPipeline.mScreen.clear();
 			}
 			
diff --git a/indra/newview/llvosky.cpp b/indra/newview/llvosky.cpp
index d90c3be6c78efcf7b2f84ad40f46aed8537955ed..e9db37821b1895ee29c0cccb0877da2be45f33b9 100644
--- a/indra/newview/llvosky.cpp
+++ b/indra/newview/llvosky.cpp
@@ -2040,9 +2040,12 @@ void LLVOSky::updateFog(const F32 distance)
 {
 	if (!gPipeline.hasRenderDebugFeatureMask(LLPipeline::RENDER_DEBUG_FEATURE_FOG))
 	{
-		glFogf(GL_FOG_DENSITY, 0);
-		glFogfv(GL_FOG_COLOR, (F32 *) &LLColor4::white.mV);
-		glFogf(GL_FOG_END, 1000000.f);
+		if (!LLGLSLShader::sNoFixedFunction)
+		{
+			glFogf(GL_FOG_DENSITY, 0);
+			glFogfv(GL_FOG_COLOR, (F32 *) &LLColor4::white.mV);
+			glFogf(GL_FOG_END, 1000000.f);
+		}
 		return;
 	}
 
@@ -2112,7 +2115,10 @@ void LLVOSky::updateFog(const F32 distance)
 	if (camera_height > water_height)
 	{
 		LLColor4 fog(render_fog_color);
-		glFogfv(GL_FOG_COLOR, fog.mV);
+		if (!LLGLSLShader::sNoFixedFunction)
+		{
+			glFogfv(GL_FOG_COLOR, fog.mV);
+		}
 		mGLFogCol = fog;
 
 		if (hide_clip_plane)
@@ -2120,13 +2126,19 @@ void LLVOSky::updateFog(const F32 distance)
 			// For now, set the density to extend to the cull distance.
 			const F32 f_log = 2.14596602628934723963618357029f; // sqrt(fabs(log(0.01f)))
 			fog_density = f_log/fog_distance;
-			glFogi(GL_FOG_MODE, GL_EXP2);
+			if (!LLGLSLShader::sNoFixedFunction)
+			{
+				glFogi(GL_FOG_MODE, GL_EXP2);
+			}
 		}
 		else
 		{
 			const F32 f_log = 4.6051701859880913680359829093687f; // fabs(log(0.01f))
 			fog_density = (f_log)/fog_distance;
-			glFogi(GL_FOG_MODE, GL_EXP);
+			if (!LLGLSLShader::sNoFixedFunction)
+			{
+				glFogi(GL_FOG_MODE, GL_EXP);
+			}
 		}
 	}
 	else
@@ -2146,24 +2158,27 @@ void LLVOSky::updateFog(const F32 distance)
 		fogCol.setAlpha(1);
 
 		// set the gl fog color
-		glFogfv(GL_FOG_COLOR, (F32 *) &fogCol.mV);
 		mGLFogCol = fogCol;
 
 		// set the density based on what the shaders use
 		fog_density = water_fog_density * gSavedSettings.getF32("WaterGLFogDensityScale");
-		glFogi(GL_FOG_MODE, GL_EXP2);
+
+		if (!LLGLSLShader::sNoFixedFunction)
+		{
+			glFogfv(GL_FOG_COLOR, (F32 *) &fogCol.mV);
+			glFogi(GL_FOG_MODE, GL_EXP2);
+		}
 	}
 
 	mFogColor = sky_fog_color;
 	mFogColor.setAlpha(1);
-	LLGLSFog gls_fog;
-
-	glFogf(GL_FOG_END, fog_distance*2.2f);
-
-	glFogf(GL_FOG_DENSITY, fog_density);
+	LLDrawPoolWater::sWaterFogEnd = fog_distance*2.2f;
 
 	if (!LLGLSLShader::sNoFixedFunction)
 	{
+		LLGLSFog gls_fog;
+		glFogf(GL_FOG_END, fog_distance*2.2f);
+		glFogf(GL_FOG_DENSITY, fog_density);
 		glHint(GL_FOG_HINT, GL_NICEST);
 	}
 	stop_glerror();
diff --git a/indra/newview/llwaterparammanager.cpp b/indra/newview/llwaterparammanager.cpp
index 1a98d4c6c25e6043e947ae43758a60f688b9a3e6..20b34637b8cb1ad4beb2b925c2aca7437b813d32 100644
--- a/indra/newview/llwaterparammanager.cpp
+++ b/indra/newview/llwaterparammanager.cpp
@@ -190,6 +190,7 @@ void LLWaterParamManager::updateShaderUniforms(LLGLSLShader * shader)
 		shader->uniform4fv(LLViewerShaderMgr::LIGHTNORM, 1, LLWLParamManager::getInstance()->getRotatedLightDir().mV);
 		shader->uniform3fv("camPosLocal", 1, LLViewerCamera::getInstance()->getOrigin().mV);
 		shader->uniform4fv("waterFogColor", 1, LLDrawPoolWater::sWaterFogColor.mV);
+		shader->uniform1f("waterFogEnd", LLDrawPoolWater::sWaterFogEnd);
 		shader->uniform4fv("waterPlane", 1, mWaterPlane.mV);
 		shader->uniform1f("waterFogDensity", getFogDensity());
 		shader->uniform1f("waterFogKS", mWaterFogKS);