From 6c59ca80ead7210c58c96234fb1f2a951335266e Mon Sep 17 00:00:00 2001
From: Rye Mutt <rye@alchemyviewer.org>
Date: Sun, 31 Oct 2021 16:59:10 -0400
Subject: [PATCH] Optimize setupSpotlight

---
 indra/llmath/alglmath.h    | 77 ++++++++++++++++++++++++++++++++++++++
 indra/newview/pipeline.cpp | 50 +++++++++++++------------
 2 files changed, 104 insertions(+), 23 deletions(-)

diff --git a/indra/llmath/alglmath.h b/indra/llmath/alglmath.h
index 0ad8cb30756..8f63c276f3a 100644
--- a/indra/llmath/alglmath.h
+++ b/indra/llmath/alglmath.h
@@ -7,6 +7,12 @@
 
 namespace ALGLMath
 {
+	inline static const LLMatrix4a TRANS_MAT = LLMatrix4a(
+		LLVector4a(.5f, 0, 0, 0),
+		LLVector4a(0, .5f, 0, 0),
+		LLVector4a(0, 0, .5f, 0),
+		LLVector4a(.5f, .5f, .5f, 1.f));
+
 	inline LLMatrix4a genRot(const float a, const LLVector4a& axis)
 	{
 		F32 r = a * DEG_TO_RAD;
@@ -44,6 +50,77 @@ namespace ALGLMath
 
 	inline LLMatrix4a genRot(const float a, const float x, const float y, const float z) { return genRot(a, LLVector4a(x, y, z)); }
 
+	inline LLMatrix4a genOrtho(const GLfloat& left, const GLfloat& right, const GLfloat& bottom, const GLfloat& top, const GLfloat& zNear, const GLfloat& zFar)
+	{
+		LLMatrix4a ortho_mat;
+		ortho_mat.setRow<0>(LLVector4a(2.f / (right - left), 0, 0));
+		ortho_mat.setRow<1>(LLVector4a(0, 2.f / (top - bottom), 0));
+		ortho_mat.setRow<2>(LLVector4a(0, 0, -2.f / (zFar - zNear)));
+		ortho_mat.setRow<3>(LLVector4a(-(right + left) / (right - left), -(top + bottom) / (top - bottom), -(zFar + zNear) / (zFar - zNear), 1));
+
+		return ortho_mat;
+	}
+
+	inline LLMatrix4a genPersp(const GLfloat& fovy, const GLfloat& aspect, const GLfloat& zNear, const GLfloat& zFar)
+	{
+		GLfloat f = 1.f / tanf(DEG_TO_RAD * fovy / 2.f);
+
+		LLMatrix4a persp_mat;
+		persp_mat.setRow<0>(LLVector4a(f / aspect, 0, 0));
+		persp_mat.setRow<1>(LLVector4a(0, f, 0));
+		persp_mat.setRow<2>(LLVector4a(0, 0, (zFar + zNear) / (zNear - zFar), -1.f));
+		persp_mat.setRow<3>(LLVector4a(0, 0, (2.f * zFar * zNear) / (zNear - zFar), 0));
+
+		return persp_mat;
+	}
+
+	inline LLMatrix4a genLook(const LLVector3& pos_in, const LLVector3& dir_in, const LLVector3& up_in)
+	{
+		const LLVector4a pos(pos_in.mV[VX], pos_in.mV[VY], pos_in.mV[VZ], 1.f);
+		LLVector4a dir(dir_in.mV[VX], dir_in.mV[VY], dir_in.mV[VZ]);
+		const LLVector4a up(up_in.mV[VX], up_in.mV[VY], up_in.mV[VZ]);
+
+		LLVector4a left_norm;
+		left_norm.setCross3(dir, up);
+		left_norm.normalize3fast();
+		LLVector4a up_norm;
+		up_norm.setCross3(left_norm, dir);
+		up_norm.normalize3fast();
+		LLVector4a& dir_norm = dir;
+		dir.normalize3fast();
+
+		LLVector4a left_dot;
+		left_dot.setAllDot3(left_norm, pos);
+		left_dot.negate();
+		LLVector4a up_dot;
+		up_dot.setAllDot3(up_norm, pos);
+		up_dot.negate();
+		LLVector4a dir_dot;
+		dir_dot.setAllDot3(dir_norm, pos);
+
+		dir_norm.negate();
+
+		LLMatrix4a lookat_mat;
+		lookat_mat.setRow<0>(left_norm);
+		lookat_mat.setRow<1>(up_norm);
+		lookat_mat.setRow<2>(dir_norm);
+		lookat_mat.setRow<3>(LLVector4a(0, 0, 0, 1));
+
+		lookat_mat.getRow<0>().copyComponent<3>(left_dot);
+		lookat_mat.getRow<1>().copyComponent<3>(up_dot);
+		lookat_mat.getRow<2>().copyComponent<3>(dir_dot);
+
+		lookat_mat.transpose();
+
+		return lookat_mat;
+	}
+
+	inline const LLMatrix4a& genNDCtoWC()
+	{
+		return TRANS_MAT;
+	}
+
+
 	inline bool projectf(const LLVector3& object, const LLMatrix4a& modelview, const LLMatrix4a& projection, const LLRect& viewport, LLVector3& windowCoordinate)
 	{
 		//Begin SSE intrinsics
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index 7045e6d6b1f..ee648d2ae70 100644
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -29,6 +29,7 @@
 #include "pipeline.h"
 
 // library includes
+#include "alglmath.h"
 #include "llaudioengine.h" // For debugging.
 #include "llerror.h"
 #include "llviewercontrol.h"
@@ -9376,12 +9377,17 @@ void LLPipeline::setupSpotLight(LLGLSLShader& shader, LLDrawable* drawablep)
 	LLVector3 origin = np - at_axis*dist;
 
 	//matrix from volume space to agent space
-	LLMatrix4 light_mat(quat, LLVector4(origin,1.f));
+	LLMatrix4 light_mat_(quat, LLVector4(origin,1.f));
 
-	glh::matrix4f light_to_agent((F32*) light_mat.mMatrix);
-	glh::matrix4f light_to_screen = get_current_modelview() * light_to_agent;
+	LLMatrix4a cur_modelview;
+	cur_modelview.loadu(gGLModelView);
 
-	glh::matrix4f screen_to_light = light_to_screen.inverse();
+	LLMatrix4a light_mat;
+	light_mat.loadu(light_mat_.mMatrix[0]);
+	LLMatrix4a light_to_screen;
+	light_to_screen.setMul(cur_modelview,light_mat);
+	LLMatrix4a screen_to_light = light_to_screen;
+	screen_to_light.invert();
 
 	F32 s = volume->getLightRadius()*1.5f;
 	F32 near_clip = dist;
@@ -9392,30 +9398,28 @@ void LLPipeline::setupSpotLight(LLGLSLShader& shader, LLDrawable* drawablep)
 	F32 fovy = fov * RAD_TO_DEG;
 	F32 aspect = width/height;
 
-	glh::matrix4f trans(0.5f, 0.f, 0.f, 0.5f,
-				0.f, 0.5f, 0.f, 0.5f,
-				0.f, 0.f, 0.5f, 0.5f,
-				0.f, 0.f, 0.f, 1.f);
+	LLVector4a p1(0, 0, -(near_clip+0.01f));
+	LLVector4a p2(0, 0, -(near_clip+1.f));
 
-	glh::vec3f p1(0, 0, -(near_clip+0.01f));
-	glh::vec3f p2(0, 0, -(near_clip+1.f));
+	LLVector4a screen_origin(LLVector4a::getZero());
 
-	glh::vec3f screen_origin(0, 0, 0);
+	light_to_screen.affineTransform(p1,p1);
+	light_to_screen.affineTransform(p2,p2);
+	light_to_screen.affineTransform(screen_origin,screen_origin);
 
-	light_to_screen.mult_matrix_vec(p1);
-	light_to_screen.mult_matrix_vec(p2);
-	light_to_screen.mult_matrix_vec(screen_origin);
+	LLVector4a n;
+	n.setSub(p2,p1);
+	n.normalize3fast();
 
-	glh::vec3f n = p2-p1;
-	n.normalize();
-	
 	F32 proj_range = far_clip - near_clip;
-	glh::matrix4f light_proj = gl_perspective(fovy, aspect, near_clip, far_clip);
-	screen_to_light = trans * light_proj * screen_to_light;
-	shader.uniformMatrix4fv(LLShaderMgr::PROJECTOR_MATRIX, 1, FALSE, screen_to_light.m);
-	shader.uniform3fv(LLShaderMgr::PROJECTOR_P, 1, p1.v);
-	shader.uniform3fv(LLShaderMgr::PROJECTOR_N, 1, n.v);
-	shader.uniform3fv(LLShaderMgr::PROJECTOR_ORIGIN, 1, screen_origin.v);
+	LLMatrix4a light_proj = ALGLMath::genPersp(fovy, aspect, near_clip, far_clip);
+	light_proj.setMul(ALGLMath::genNDCtoWC(),light_proj);
+	screen_to_light.setMul(light_proj,screen_to_light);
+
+	shader.uniformMatrix4fv(LLShaderMgr::PROJECTOR_MATRIX, 1, FALSE, screen_to_light.getF32ptr());
+	shader.uniform3fv(LLShaderMgr::PROJECTOR_P, 1, p1.getF32ptr());
+	shader.uniform3fv(LLShaderMgr::PROJECTOR_N, 1, n.getF32ptr());
+	shader.uniform3fv(LLShaderMgr::PROJECTOR_ORIGIN, 1, screen_origin.getF32ptr());
 	shader.uniform1f(LLShaderMgr::PROJECTOR_RANGE, proj_range);
 	shader.uniform1f(LLShaderMgr::PROJECTOR_AMBIANCE, params.mV[2]);
 
-- 
GitLab