SL-18190 Faster better stronger radiance/irradiance maps

04d3a29a · David Parks · 4f7c86a1 · 04d3a29a · 04d3a29a · 04d3a29a
Commit 04d3a29a authored 2 years ago by David Parks
--- a/indra/llrender/llglheaders.h
+++ b/indra/llrender/llglheaders.h
@@ -1061,13 +1061,6 @@ extern void glGetBufferPointervARB (GLenum, GLenum, GLvoid* *);
 #endif

 #if defined(TRACY_ENABLE) && LL_PROFILER_ENABLE_TRACY_OPENGL
-    // Tracy uses the following:
-    //    glGenQueries
-    //    glGetQueryiv
-    //    glGetQueryObjectiv
-    #define glGenQueries        glGenQueriesARB
-    #define glGetQueryiv        glGetQueryivARB
-    #define glGetQueryObjectiv  glGetQueryObjectivARB
    #include <tracy/TracyOpenGL.hpp>
 #endif
    

--- a/indra/llrender/llrendertarget.cpp
+++ b/indra/llrender/llrendertarget.cpp
@@ -471,6 +471,7 @@ void LLRenderTarget::release()

 void LLRenderTarget::bindTarget()
 {
+    LL_PROFILE_GPU_ZONE("bindTarget");
    llassert(mFBO);

 	if (mFBO)
@@ -577,6 +578,7 @@ void LLRenderTarget::bindTexture(U32 index, S32 channel, LLTexUnit::eTextureFilt

 void LLRenderTarget::flush(bool fetch_depth)
 {
+    LL_PROFILE_GPU_ZONE("rt flush");
 	gGL.flush();
    llassert(mFBO);
 	if (!mFBO)

--- a/indra/newview/app_settings/shaders/class1/deferred/deferredUtil.glsl
+++ b/indra/newview/app_settings/shaders/class1/deferred/deferredUtil.glsl
@@ -366,7 +366,7 @@ vec3 pbrIbl(vec3 diffuseColor,
 	vec2 brdf = BRDF(clamp(nv, 0, 1), 1.0-perceptualRough);
 	vec3 diffuseLight = irradiance;
 	vec3 specularLight = radiance;
-
+    
 	vec3 diffuse = diffuseLight * diffuseColor;
 	vec3 specular = specularLight * (specularColor * brdf.x + brdf.y);


--- a/indra/newview/app_settings/shaders/class1/interface/irradianceGenF.glsl
+++ b/indra/newview/app_settings/shaders/class1/interface/irradianceGenF.glsl
@@ -38,63 +38,190 @@ uniform int sourceIdx;

 VARYING vec3 vary_dir;

-// =============================================================================================================
-// Parts of this file are (c) 2018 Sascha Willems
-// SNIPPED FROM https://github.com/SaschaWillems/Vulkan-glTF-PBR/blob/master/data/shaders/irradiancecube.frag
-/*
-MIT License

-Copyright (c) 2018 Sascha Willems
+// Code below is derived from the Khronos GLTF Sample viewer:
+// https://github.com/KhronosGroup/glTF-Sample-Viewer/blob/master/source/shaders/ibl_filtering.frag

-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:

-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
+#define MATH_PI 3.1415926535897932384626433832795

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-*/
-// =============================================================================================================
+float u_roughness = 1.0;
+int u_sampleCount = 16;
+float u_lodBias = 2.0;
+int u_width = 64;

+// Hammersley Points on the Hemisphere
+// CC BY 3.0 (Holger Dammertz)
+// http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html
+// with adapted interface
+float radicalInverse_VdC(uint bits)
+{
+    bits = (bits << 16u) | (bits >> 16u);
+    bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
+    bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
+    bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
+    bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
+    return float(bits) * 2.3283064365386963e-10; // / 0x100000000
+}
+
+// hammersley2d describes a sequence of points in the 2d unit square [0,1)^2
+// that can be used for quasi Monte Carlo integration
+vec2 hammersley2d(int i, int N) {
+    return vec2(float(i)/float(N), radicalInverse_VdC(uint(i)));
+}
+
+// Hemisphere Sample
+
+// TBN generates a tangent bitangent normal coordinate frame from the normal
+// (the normal must be normalized)
+mat3 generateTBN(vec3 normal)
+{
+    vec3 bitangent = vec3(0.0, 1.0, 0.0);
+
+    float NdotUp = dot(normal, vec3(0.0, 1.0, 0.0));
+    float epsilon = 0.0000001;
+    /*if (1.0 - abs(NdotUp) <= epsilon)
+    {
+        // Sampling +Y or -Y, so we need a more robust bitangent.
+        if (NdotUp > 0.0)
+        {
+            bitangent = vec3(0.0, 0.0, 1.0);
+        }
+        else
+        {
+            bitangent = vec3(0.0, 0.0, -1.0);
+        }
+    }*/
+
+    vec3 tangent = normalize(cross(bitangent, normal));
+    bitangent = cross(normal, tangent);
+
+    return mat3(tangent, bitangent, normal);
+}
+
+struct MicrofacetDistributionSample
+{
+    float pdf;
+    float cosTheta;
+    float sinTheta;
+    float phi;
+};
+
+MicrofacetDistributionSample Lambertian(vec2 xi, float roughness)
+{
+    MicrofacetDistributionSample lambertian;
+
+    // Cosine weighted hemisphere sampling
+    // http://www.pbr-book.org/3ed-2018/Monte_Carlo_Integration/2D_Sampling_with_Multidimensional_Transformations.html#Cosine-WeightedHemisphereSampling
+    lambertian.cosTheta = sqrt(1.0 - xi.y);
+    lambertian.sinTheta = sqrt(xi.y); // equivalent to `sqrt(1.0 - cosTheta*cosTheta)`;
+    lambertian.phi = 2.0 * MATH_PI * xi.x;
+
+    lambertian.pdf = lambertian.cosTheta / MATH_PI; // evaluation for solid angle, therefore drop the sinTheta
+
+    return lambertian;
+}
+
+
+// getImportanceSample returns an importance sample direction with pdf in the .w component
+vec4 getImportanceSample(int sampleIndex, vec3 N, float roughness)
+{
+    // generate a quasi monte carlo point in the unit square [0.1)^2
+    vec2 xi = hammersley2d(sampleIndex, u_sampleCount);
+
+    MicrofacetDistributionSample importanceSample;
+
+    // generate the points on the hemisphere with a fitting mapping for
+    // the distribution (e.g. lambertian uses a cosine importance)
+    importanceSample = Lambertian(xi, roughness);
+    
+    // transform the hemisphere sample to the normal coordinate frame
+    // i.e. rotate the hemisphere to the normal direction
+    vec3 localSpaceDirection = normalize(vec3(
+        importanceSample.sinTheta * cos(importanceSample.phi), 
+        importanceSample.sinTheta * sin(importanceSample.phi), 
+        importanceSample.cosTheta
+    ));
+    mat3 TBN = generateTBN(N);
+    vec3 direction = TBN * localSpaceDirection;
+
+    return vec4(direction, importanceSample.pdf);
+}
+
+// Mipmap Filtered Samples (GPU Gems 3, 20.4)
+// https://developer.nvidia.com/gpugems/gpugems3/part-iii-rendering/chapter-20-gpu-based-importance-sampling
+// https://cgg.mff.cuni.cz/~jaroslav/papers/2007-sketch-fis/Final_sap_0073.pdf
+float computeLod(float pdf)
+{
+    // // Solid angle of current sample -- bigger for less likely samples
+    // float omegaS = 1.0 / (float(u_sampleCount) * pdf);
+    // // Solid angle of texel
+    // // note: the factor of 4.0 * MATH_PI 
+    // float omegaP = 4.0 * MATH_PI / (6.0 * float(u_width) * float(u_width));
+    // // Mip level is determined by the ratio of our sample's solid angle to a texel's solid angle 
+    // // note that 0.5 * log2 is equivalent to log4
+    // float lod = 0.5 * log2(omegaS / omegaP);
+
+    // babylon introduces a factor of K (=4) to the solid angle ratio
+    // this helps to avoid undersampling the environment map
+    // this does not appear in the original formulation by Jaroslav Krivanek and Mark Colbert
+    // log4(4) == 1
+    // lod += 1.0;
+
+    // We achieved good results by using the original formulation from Krivanek & Colbert adapted to cubemaps

+    // https://cgg.mff.cuni.cz/~jaroslav/papers/2007-sketch-fis/Final_sap_0073.pdf
+    float lod = 0.5 * log2( 6.0 * float(u_width) * float(u_width) / (float(u_sampleCount) * pdf));
+
+
+    return lod;
+}
+
+vec3 filterColor(vec3 N)
+{
+    //return  textureLod(uCubeMap, N, 3.0).rgb;
+    vec3 color = vec3(0.f);
+    float weight = 0.0f;

-#define PI 3.1415926535897932384626433832795
+    for(int i = 0; i < u_sampleCount; ++i)
+    {
+        vec4 importanceSample = getImportanceSample(i, N, 1.0);

+        vec3 H = vec3(importanceSample.xyz);
+        float pdf = importanceSample.w;
+
+        // mipmap filtered samples (GPU Gems 3, 20.4)
+        float lod = computeLod(pdf);
+
+        // apply the bias to the lod
+        lod += u_lodBias;
+
+        lod = clamp(lod, 0, 7);
+        // sample lambertian at a lower resolution to avoid fireflies
+        vec3 lambertian = textureLod(reflectionProbes, vec4(H, sourceIdx), lod).rgb;
+
+        color += lambertian;
+    }
+
+    if(weight != 0.0f)
+    {
+        color /= weight;
+    }
+    else
+    {
+        color /= float(u_sampleCount);
+    }
+
+    return color.rgb ;
+}
+
+// entry point
 void main()
 {
-    float deltaPhi = (2.0 * PI) / 11.25;
-	float deltaTheta = (0.5 * PI) / 4.0;
-    float mipLevel = 2;
-
-	vec3 N = normalize(vary_dir);
-	vec3 up = vec3(0.0, 1.0, 0.0);
-	vec3 right = normalize(cross(up, N));
-	up = normalize(cross(N, right));
-
-	const float TWO_PI = PI * 2.0;
-	const float HALF_PI = PI * 0.5;
-
-	vec3 color = vec3(0.0);
-	uint sampleCount = 0u;
-	for (float phi = 0.0; phi < TWO_PI; phi += deltaPhi) {
-		for (float theta = 0.0; theta < HALF_PI; theta += deltaTheta) {
-			vec3 tempVec = cos(phi) * right + sin(phi) * up;
-			vec3 sampleVector = cos(theta) * N + sin(theta) * tempVec;
-			color += textureLod(reflectionProbes, vec4(sampleVector, sourceIdx), mipLevel).rgb * cos(theta) * sin(theta);
-			sampleCount++;
-		}
-	}
-	frag_color = vec4(PI * color / float(sampleCount), 1.0);
+    vec3 color = vec3(0);
+
+    color = filterColor(vary_dir);
+    
+    frag_color = vec4(color,1.0);
 }
-// =============================================================================================================

--- a/indra/newview/app_settings/shaders/class1/interface/radianceGenF.glsl
+++ b/indra/newview/app_settings/shaders/class1/interface/radianceGenF.glsl
@@ -66,7 +66,7 @@ SOFTWARE.
 // =============================================================================================================


-uniform float roughness;
+//uniform float roughness;

 uniform float mipLevel;

@@ -123,14 +123,18 @@ float D_GGX(float dotNH, float roughness)
 	return (alpha2)/(PI * denom*denom); 
 }

-vec3 prefilterEnvMap(vec3 R, float roughness)
+vec3 prefilterEnvMap(vec3 R)
 {
 	vec3 N = R;
 	vec3 V = R;
 	vec3 color = vec3(0.0);
 	float totalWeight = 0.0;
 	float envMapDim = 256.0;
-    int numSamples = 32/max(int(mipLevel), 1);
+    int numSamples = 8;
+    
+    float numMips = 7.0;
+
+    float roughness = (mipLevel+1)/numMips;

 	for(uint i = 0u; i < numSamples; i++) {
 		vec2 Xi = hammersley2d(i, numSamples);
@@ -150,8 +154,9 @@ vec3 prefilterEnvMap(vec3 R, float roughness)
 			// Solid angle of 1 pixel across all cube faces
 			float omegaP = 4.0 * PI / (6.0 * envMapDim * envMapDim);
 			// Biased (+1.0) mip level for better result
-			//float mipLevel = roughness == 0.0 ? 0.0 : max(0.5 * log2(omegaS / omegaP) + 1.0, 0.0f);
-			color += textureLod(reflectionProbes, vec4(L,sourceIdx), mipLevel).rgb * dotNL;
+			//float mip = roughness == 0.0 ? 0.0 : max(0.5 * log2(omegaS / omegaP) + 1.0, 0.0f);
+            float mip = clamp(0.5 * log2(omegaS / omegaP) + 1.0, 0.0f, 7.f);
+			color += textureLod(reflectionProbes, vec4(L,sourceIdx), mip).rgb * dotNL;
 			totalWeight += dotNL;

 		}
@@ -162,7 +167,7 @@ vec3 prefilterEnvMap(vec3 R, float roughness)
 void main()
 {		
 	vec3 N = normalize(vary_dir);
-	frag_color = vec4(prefilterEnvMap(N, roughness), 1.0);
+	frag_color = vec4(prefilterEnvMap(N), 1.0);
 }
 // =============================================================================================================

--- a/indra/newview/app_settings/shaders/class1/interface/reflectionmipF.glsl
+++ b/indra/newview/app_settings/shaders/class1/interface/reflectionmipF.glsl
@@ -39,6 +39,7 @@ VARYING vec2 vary_texcoord0;

 void main() 
 {
+#if 0
    float w[9];

    float c = 1.0/16.0;  //corner weight
@@ -72,4 +73,7 @@ void main()
    //color /= wsum;

    frag_color = vec4(color, 1.0);
+#else
+    frag_color = vec4(texture2DRect(screenMap, vary_texcoord0.xy).rgb, 1.0);
+#endif
 }
--- a/indra/newview/llreflectionmapmanager.cpp
+++ b/indra/newview/llreflectionmapmanager.cpp
@@ -410,8 +410,7 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)

        S32 mips = log2((F32)LL_REFLECTION_PROBE_RESOLUTION) + 0.5f;

-        //for (int i = 0; i < mMipChain.size(); ++i)
-        for (int i = 0; i < 1; ++i)
+        for (int i = 0; i < mMipChain.size(); ++i)
        {
            LL_PROFILE_GPU_ZONE("probe mip");
            mMipChain[i].bindTarget();
@@ -447,10 +446,14 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)

            if (mip >= 0)
            {
+                LL_PROFILE_GPU_ZONE("probe mip copy");
                mTexture->bind(0);
                //glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, mip, 0, 0, probe->mCubeIndex * 6 + face, 0, 0, res, res);
                glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, mip, 0, 0, targetIdx * 6 + face, 0, 0, res, res);
-                glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, mip, 0, 0, probe->mCubeIndex * 6 + face, 0, 0, res, res);
+                if (i == 0)
+                {
+                    glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, mip, 0, 0, probe->mCubeIndex * 6 + face, 0, 0, res, res);
+                }
                mTexture->unbind();
            }
            mMipChain[i].flush();
@@ -474,8 +477,12 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)

        static LLStaticHashedString sMipLevel("mipLevel");

+        mMipChain[1].bindTarget();
+        U32 res = mMipChain[1].getWidth();
+
        for (int i = 1; i < mMipChain.size(); ++i)
        {
+            LL_PROFILE_GPU_ZONE("probe radiance gen");
            for (int cf = 0; cf < 6; ++cf)
            { // for each cube face
                LLCoordFrame frame;
@@ -485,15 +492,11 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)
                frame.getOpenGLRotation(mat);
                gGL.loadMatrix(mat);

-                mMipChain[i].bindTarget();
                static LLStaticHashedString sRoughness("roughness");

                gRadianceGenProgram.uniform1f(sRoughness, (F32)i / (F32)(mMipChain.size() - 1));
                gRadianceGenProgram.uniform1f(sMipLevel, llmax((F32)(i - 1), 0.f));
-                if (i > 0)
-                {
-                    gRadianceGenProgram.uniform1i(sSourceIdx, probe->mCubeIndex);
-                }
+                
                gGL.begin(gGL.QUADS);
                gGL.vertex3f(-1, -1, -1);
                gGL.vertex3f(1, -1, -1);
@@ -501,12 +504,17 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)
                gGL.vertex3f(-1, 1, -1);
                gGL.end();
                gGL.flush();
-
-                S32 res = mMipChain[i].getWidth();
+                
                glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, i, 0, 0, probe->mCubeIndex * 6 + cf, 0, 0, res, res);
-                mMipChain[i].flush();
+            }
+
+            if (i != mMipChain.size() - 1)
+            {
+                res /= 2;
+                glViewport(0, 0, res, res);
            }
        }
+
        gRadianceGenProgram.unbind();

        //generate irradiance map
@@ -514,7 +522,7 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)
        channel = gIrradianceGenProgram.enableTexture(LLShaderMgr::REFLECTION_PROBES, LLTexUnit::TT_CUBE_MAP_ARRAY);
        mTexture->bind(channel);

-        gIrradianceGenProgram.uniform1i(sSourceIdx, probe->mCubeIndex);
+        gIrradianceGenProgram.uniform1i(sSourceIdx, targetIdx);

        int start_mip = 0;
        // find the mip target to start with based on irradiance map resolution
@@ -528,6 +536,8 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)

        for (int i = start_mip; i < mMipChain.size(); ++i)
        {
+            LL_PROFILE_GPU_ZONE("probe irradiance gen");
+            glViewport(0, 0, mMipChain[i].getWidth(), mMipChain[i].getHeight());
            for (int cf = 0; cf < 6; ++cf)
            { // for each cube face
                LLCoordFrame frame;
@@ -537,8 +547,6 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)
                frame.getOpenGLRotation(mat);
                gGL.loadMatrix(mat);

-                mMipChain[i].bindTarget();
-
                gGL.begin(gGL.QUADS);
                gGL.vertex3f(-1, -1, -1);
                gGL.vertex3f(1, -1, -1);
@@ -551,9 +559,11 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)
                mIrradianceMaps->bind(channel);
                glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, i - start_mip, 0, 0, probe->mCubeIndex * 6 + cf, 0, 0, res, res);
                mTexture->bind(channel);
-                mMipChain[i].flush();
            }
        }
+
+        mMipChain[1].flush();
+
        gIrradianceGenProgram.unbind();
    }
 }