diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index 5e8abaa73f0b2fc5f46c3224b78b09eb77266aec..ce978bf4b958ac297db51d3f86fccb9cb0c1b2be 100644 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -66,11 +66,42 @@ public: mMatrix[3] = rhs.getRow<3>(); } + LLMatrix4a(const LLQuad& q1,const LLQuad& q2,const LLQuad& q3,const LLQuad& q4) + { + mMatrix[0] = q1; + mMatrix[1] = q2; + mMatrix[2] = q3; + mMatrix[3] = q4; + } + LLMatrix4a(const LLMatrix4& rhs) { loadu(rhs); } + LLMatrix4a(const LLQuaternion2& quat) + { + const LLVector4a& xyzw = quat.getVector4a(); + LLVector4a nyxwz = _mm_shuffle_ps(xyzw, xyzw, _MM_SHUFFLE(2,3,0,1)); + nyxwz.negate(); + const LLVector4a xnyynx = _mm_unpacklo_ps(xyzw,nyxwz); + const LLVector4a znwwnz = _mm_unpackhi_ps(xyzw,nyxwz); + + LLMatrix4a mata; + mata.setRow<0>(_mm_shuffle_ps(xyzw, xnyynx, _MM_SHUFFLE(0,1,2,3))); + mata.setRow<1>(_mm_shuffle_ps(znwwnz, xyzw, _MM_SHUFFLE(1,0,2,3))); + mata.setRow<2>(_mm_shuffle_ps(xnyynx, xyzw, _MM_SHUFFLE(2,3,3,2))); + mata.setRow<3>(_mm_shuffle_ps(xnyynx, znwwnz, _MM_SHUFFLE(2,3,1,3))); + + LLMatrix4a matb; + matb.setRow<0>(_mm_shuffle_ps(xyzw, xnyynx, _MM_SHUFFLE(3,1,2,3))); + matb.setRow<1>(_mm_shuffle_ps(znwwnz, xnyynx, _MM_SHUFFLE(1,0,2,3))); + matb.setRow<2>(_mm_shuffle_ps(xnyynx, znwwnz, _MM_SHUFFLE(3,2,3,2))); + matb.setRow<3>(xyzw); + + setMul(matb,mata); + } + // Do NOT add aditional operators without consulting someone with SSE experience inline const LLMatrix4a& operator= (const LLMatrix4a& rhs) { @@ -509,7 +540,153 @@ public: return ret; } - //======================Logic==================== + //=============Affine transformation matrix only========================= + + //Multiply matrix with a pure translation matrix. + inline void applyTranslation_affine(const F32& x, const F32& y, const F32& z) + { + const LLVector4a xyz0(x,y,z,0); //load + LLVector4a xxxx; + xxxx.splat<0>(xyz0); + LLVector4a yyyy; + yyyy.splat<1>(xyz0); + LLVector4a zzzz; + zzzz.splat<2>(xyz0); + + LLVector4a sum1; + LLVector4a sum2; + LLVector4a sum3; + + sum1.setMul(xxxx,mMatrix[0]); + sum2.setMul(yyyy,mMatrix[1]); + sum3.setMul(zzzz,mMatrix[2]); + + mMatrix[3].add(sum1); + mMatrix[3].add(sum2); + mMatrix[3].add(sum3); + } + + //Multiply matrix with a pure translation matrix. + inline void applyTranslation_affine(const LLVector3& trans) + { + applyTranslation_affine(trans.mV[VX],trans.mV[VY],trans.mV[VZ]); + } + + //Multiply matrix with a pure scale matrix. + inline void applyScale_affine(const F32& x, const F32& y, const F32& z) + { + const LLVector4a xyz0(x,y,z,0); //load + LLVector4a xxxx; + xxxx.splat<0>(xyz0); + LLVector4a yyyy; + yyyy.splat<1>(xyz0); + LLVector4a zzzz; + zzzz.splat<2>(xyz0); + + mMatrix[0].mul(xxxx); + mMatrix[1].mul(yyyy); + mMatrix[2].mul(zzzz); + } + + //Multiply matrix with a pure scale matrix. + inline void applyScale_affine(const LLVector3& scale) + { + applyScale_affine(scale.mV[VX],scale.mV[VY],scale.mV[VZ]); + } + + //Multiply matrix with a pure scale matrix. + inline void applyScale_affine(const F32& s) + { + const LLVector4a scale(s); //load + mMatrix[0].mul(scale); + mMatrix[1].mul(scale); + mMatrix[2].mul(scale); + } + + //Direct addition to row3. + inline void translate_affine(const LLVector3& trans) + { + LLVector4a translation; + translation.load3(trans.mV); + mMatrix[3].add(translation); + } + + //Direct assignment of row3. + inline void setTranslate_affine(const LLVector3& trans) + { + static const LLVector4Logical mask = _mm_load_ps((F32*)&S_V4LOGICAL_MASK_TABLE[3*4]); + + LLVector4a translation; + translation.load3(trans.mV); + + mMatrix[3].setSelectWithMask(mask,mMatrix[3],translation); + } + + inline void mul_affine(const LLMatrix4a& rhs) + { + LLVector4a x0,y0,z0; + LLVector4a x1,y1,z1; + LLVector4a x2,y2,z2; + LLVector4a x3,y3,z3; + + //12 shuffles + x0.splat<0>(rhs.mMatrix[0]); + x1.splat<0>(rhs.mMatrix[1]); + x2.splat<0>(rhs.mMatrix[2]); + x3.splat<0>(rhs.mMatrix[3]); + + y0.splat<1>(rhs.mMatrix[0]); + y1.splat<1>(rhs.mMatrix[1]); + y2.splat<1>(rhs.mMatrix[2]); + y3.splat<1>(rhs.mMatrix[3]); + + z0.splat<2>(rhs.mMatrix[0]); + z1.splat<2>(rhs.mMatrix[1]); + z2.splat<2>(rhs.mMatrix[2]); + z3.splat<2>(rhs.mMatrix[3]); + + //12 muls + x0.mul(mMatrix[0]); + x1.mul(mMatrix[0]); + x2.mul(mMatrix[0]); + x3.mul(mMatrix[0]); + + y0.mul(mMatrix[1]); + y1.mul(mMatrix[1]); + y2.mul(mMatrix[1]); + y3.mul(mMatrix[1]); + + z0.mul(mMatrix[2]); + z1.mul(mMatrix[2]); + z2.mul(mMatrix[2]); + z3.mul(mMatrix[2]); + + //9 adds + x0.add(y0); + + x1.add(y1); + + x2.add(y2); + + x3.add(y3); + z3.add(mMatrix[3]); + + mMatrix[0].setAdd(x0,z0); + mMatrix[1].setAdd(x1,z1); + mMatrix[2].setAdd(x2,z2); + mMatrix[3].setAdd(x3,z3); + } + + inline void extractRotation_affine() + { + static const LLVector4Logical mask = _mm_load_ps((F32*)&S_V4LOGICAL_MASK_TABLE[3*4]); + mMatrix[0].setSelectWithMask(mask,_mm_setzero_ps(),mMatrix[0]); + mMatrix[1].setSelectWithMask(mask,_mm_setzero_ps(),mMatrix[1]); + mMatrix[2].setSelectWithMask(mask,_mm_setzero_ps(),mMatrix[2]); + mMatrix[3].setSelectWithMask(mask,LLVector4a(1.f),_mm_setzero_ps()); + } + + //======================Logic==================== private: template<bool mins> inline void init_foos(LLMatrix4a& foos) const { diff --git a/indra/llmath/llquaternion2.h b/indra/llmath/llquaternion2.h index fc32165b092c16c12a69e16d52cc2bdbacd4e769..6cfe91a024a4cf784af664dc3364f89d5b65dc0a 100644 --- a/indra/llmath/llquaternion2.h +++ b/indra/llmath/llquaternion2.h @@ -85,6 +85,8 @@ public: // Quantize this quaternion to 16 bit precision inline void quantize16(); + inline void mul(const LLQuaternion2& b); + ///////////////////////// // Quaternion inspection ///////////////////////// diff --git a/indra/llmath/llquaternion2.inl b/indra/llmath/llquaternion2.inl index 2a6987552d2949dcf392c1c00603ff70a97a86ea..52d67620f13d40a037fa986809900a3a8bc3517d 100644 --- a/indra/llmath/llquaternion2.inl +++ b/indra/llmath/llquaternion2.inl @@ -50,6 +50,39 @@ inline LLVector4a& LLQuaternion2::getVector4aRw() return mQ; } +inline void LLQuaternion2::mul(const LLQuaternion2& b) +{ + static LL_ALIGN_16(const unsigned int signMask[4]) = { 0x0, 0x0, 0x0, 0x80000000 }; + + LLVector4a sum1, sum2, prod1, prod2, prod3, prod4; + const LLVector4a& va = mQ; + const LLVector4a& vb = b.getVector4a(); + + // [VX] [VY] [VZ] [VW] + //prod1: +wx +wy +wz +ww Bwwww*Axyzw + //prod2: +xw +yw +zw -xx Bxyzx*Awwwx [VW] sign flip + //prod3: +yz +zx +xy -yy Byzxy*Azxyy [VW] sign flip + //prod4: -zy -xz -yx -zz Bzxyz*Ayzzz + + const LLVector4a Bwwww = _mm_shuffle_ps(vb,vb,_MM_SHUFFLE(3,3,3,3)); + const LLVector4a Bxyzx = _mm_shuffle_ps(vb,vb,_MM_SHUFFLE(0,2,1,0)); + const LLVector4a Awwwx = _mm_shuffle_ps(va,va,_MM_SHUFFLE(0,3,3,3)); + const LLVector4a Byzxy = _mm_shuffle_ps(vb,vb,_MM_SHUFFLE(1,0,2,1)); + const LLVector4a Azxyy = _mm_shuffle_ps(va,va,_MM_SHUFFLE(1,1,0,2)); + const LLVector4a Bzxyz = _mm_shuffle_ps(vb,vb,_MM_SHUFFLE(2,1,0,2)); + const LLVector4a Ayzxz = _mm_shuffle_ps(va,va,_MM_SHUFFLE(2,0,2,1)); + + prod1.setMul(Bwwww,va); + prod2.setMul(Bxyzx,Awwwx); + prod3.setMul(Byzxy,Azxyy); + prod4.setMul(Bzxyz,Ayzxz); + + sum1.setAdd(prod2,prod3); + sum1 = _mm_xor_ps(sum1, _mm_load_ps((const float*)signMask)); + sum2.setSub(prod1,prod4); + mQ.setAdd(sum1,sum2); +} + ///////////////////////// // Quaternion modification ///////////////////////// diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index dd1a71efe9528fea7f7ac6a65f2f25976827f800..e553119a37207511714833ff6ef1084bd2a84d88 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -128,7 +128,7 @@ public: inline void loadua(const F32* src); // Load only three floats beginning at address 'src'. Slowest method. - inline void load3(const F32* src); + inline void load3(const F32* src, const F32 w=0.f); // Store to a 16-byte aligned memory address inline void store4a(F32* dst) const; @@ -285,6 +285,8 @@ public: void quantize8( const LLVector4a& low, const LLVector4a& high ); void quantize16( const LLVector4a& low, const LLVector4a& high ); + void negate(); + //////////////////////////////////// // LOGICAL //////////////////////////////////// diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index f7062b6e367aecc343b3d2b3f5bb1497b3603e72..4eba273471ebd5d5f7db062cc837b82edbc9c0c3 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -41,11 +41,11 @@ inline void LLVector4a::loadua(const F32* src) } // Load only three floats beginning at address 'src'. Slowest method. -inline void LLVector4a::load3(const F32* src) +inline void LLVector4a::load3(const F32* src, const F32 w) { // mQ = { 0.f, src[2], src[1], src[0] } = { W, Z, Y, X } // NB: This differs from the convention of { Z, Y, X, W } - mQ = _mm_set_ps(0.f, src[2], src[1], src[0]); + mQ = _mm_set_ps(w, src[2], src[1], src[0]); } // Store to a 16-byte aligned memory address @@ -536,6 +536,11 @@ inline void LLVector4a::clamp( const LLVector4a& low, const LLVector4a& high ) setSelectWithMask( lowMask, low, *this ); } +inline void LLVector4a::negate() +{ + static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + mQ = _mm_xor_ps(*reinterpret_cast<const LLQuad*>(signMask), mQ); +} //////////////////////////////////// // LOGICAL diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index c6ceaefcbe34326a9fb5dcd474e52bb544981e7c..8953d79c2ce5c90690132fabdedc6f4f2cf7c2f7 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -3661,15 +3661,12 @@ S32 LLVolume::getNumTriangles(S32* vcount) const void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, std::vector<LLVector3> &normals, const LLVector3& obj_cam_vec_in, - const LLMatrix4& mat_in, - const LLMatrix3& norm_mat_in, + const LLMatrix4a& mat_in, + const LLMatrix4a& norm_mat_in, S32 face_mask) { - LLMatrix4a mat; - mat.loadu(mat_in); - - LLMatrix4a norm_mat; - norm_mat.loadu(norm_mat_in); + const LLMatrix4a& mat = mat_in; + const LLMatrix4a& norm_mat = norm_mat_in; LLVector4a obj_cam_vec; obj_cam_vec.load3(obj_cam_vec_in.mV); diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 7bb31f929cadd2385945ac6630f4312718fba50b..fac694cb4cfdd49281ca9d374bf84cdb2bfedb4c 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -1025,8 +1025,8 @@ public: void generateSilhouetteVertices(std::vector<LLVector3> &vertices, std::vector<LLVector3> &normals, const LLVector3& view_vec, - const LLMatrix4& mat, - const LLMatrix3& norm_mat, + const LLMatrix4a& mat, + const LLMatrix4a& norm_mat, S32 face_index); //get the face index of the face that intersects with the given line segment at the point diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index 241d5da4133a771b76e396fe87dbc51d1f5b03b5..6f09e54b3deba99f9c8ba0d7c985d4de95e30c43 100644 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -578,7 +578,7 @@ void LLFace::renderSelected(LLViewerTexture *imagep, const LLColor4& color) // called when selecting a face during edit of a mesh object LLGLEnable offset(GL_POLYGON_OFFSET_FILL); glPolygonOffset(-1.f, -1.f); - gGL.multMatrix((F32*) volume->getRelativeXform().mMatrix); + gGL.multMatrix(volume->getRelativeXform().getF32ptr()); const LLVolumeFace& vol_face = rigged->getVolumeFace(getTEOffset()); LLVertexBuffer::drawElements(LLRender::TRIANGLES, vol_face.mNumVertices, vol_face.mPositions, vol_face.mTexCoords, vol_face.mNumIndices, vol_face.mIndices); } @@ -807,14 +807,13 @@ bool less_than_max_mag(const LLVector4a& vec) } BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, - const LLMatrix4& mat_vert_in, BOOL global_volume) + const LLMatrix4a& mat_vert_in, BOOL global_volume) { //get bounding box if (mDrawablep->isState(LLDrawable::REBUILD_VOLUME | LLDrawable::REBUILD_POSITION | LLDrawable::REBUILD_RIGGED)) { //VECTORIZE THIS - LLMatrix4a mat_vert; - mat_vert.loadu(mat_vert_in); + const LLMatrix4a& mat_vert = mat_vert_in; LLVector4a min,max; diff --git a/indra/newview/llface.h b/indra/newview/llface.h index 0997c186cc4d6b918696151c5bff3bef5c26d01f..cbe2834fb837a28fd8a151f82a7f91df7a8a6509 100644 --- a/indra/newview/llface.h +++ b/indra/newview/llface.h @@ -187,7 +187,7 @@ public: void setSize(S32 numVertices, S32 num_indices = 0, bool align = false); BOOL genVolumeBBoxes(const LLVolume &volume, S32 f, - const LLMatrix4& mat_vert_in, BOOL global_volume = FALSE); + const LLMatrix4a& mat_vert_in, BOOL global_volume = FALSE); void init(LLDrawable* drawablep, LLViewerObject* objp); void destroy(); @@ -233,7 +233,7 @@ public: void notifyAboutMissingAsset(LLViewerTexture *texture); public: //aligned members - LLVector4a mExtents[2]; + LL_ALIGN_16(LLVector4a mExtents[2]); private: F32 adjustPartialOverlapPixelArea(F32 cos_angle_to_view_dir, F32 radius ); diff --git a/indra/newview/llflexibleobject.cpp b/indra/newview/llflexibleobject.cpp index db42fb161777484f7d421a05ed8fbd0d8b88449a..ea05fa04ca0b8d954880a325fecb0372b7af3627 100644 --- a/indra/newview/llflexibleobject.cpp +++ b/indra/newview/llflexibleobject.cpp @@ -905,32 +905,35 @@ LLQuaternion LLVolumeImplFlexible::getEndRotation() void LLVolumeImplFlexible::updateRelativeXform(bool force_identity) { - LLQuaternion delta_rot; - LLVector3 delta_pos, delta_scale; + LLVOVolume* vo = (LLVOVolume*) mVO; bool use_identity = vo->mDrawable->isSpatialRoot() || force_identity; + vo->mRelativeXform.setIdentity(); + //matrix from local space to parent relative/global space - delta_rot = use_identity ? LLQuaternion() : vo->mDrawable->getRotation(); - delta_pos = use_identity ? LLVector3(0,0,0) : vo->mDrawable->getPosition(); - delta_scale = LLVector3(1,1,1); + LLVector4a delta_pos; + LLQuaternion2 delta_rot; + if(use_identity) + { + delta_pos.set(0,0,0,1.f); + delta_rot.getVector4aRw() = delta_pos; + } + else + { + delta_pos.load3(vo->mDrawable->getPosition().mV,1.f); + delta_rot.getVector4aRw().loadua(vo->mDrawable->getRotation().mQ); + vo->mRelativeXform.getRow<0>().setRotated(delta_rot,vo->mRelativeXform.getRow<0>()); + vo->mRelativeXform.getRow<1>().setRotated(delta_rot,vo->mRelativeXform.getRow<1>()); + vo->mRelativeXform.getRow<2>().setRotated(delta_rot,vo->mRelativeXform.getRow<2>()); + } - // Vertex transform (4x4) - LLVector3 x_axis = LLVector3(delta_scale.mV[VX], 0.f, 0.f) * delta_rot; - LLVector3 y_axis = LLVector3(0.f, delta_scale.mV[VY], 0.f) * delta_rot; - LLVector3 z_axis = LLVector3(0.f, 0.f, delta_scale.mV[VZ]) * delta_rot; + vo->mRelativeXform.setRow<3>(delta_pos); - vo->mRelativeXform.initRows(LLVector4(x_axis, 0.f), - LLVector4(y_axis, 0.f), - LLVector4(z_axis, 0.f), - LLVector4(delta_pos, 1.f)); - - x_axis.normVec(); - y_axis.normVec(); - z_axis.normVec(); - - vo->mRelativeXformInvTrans.setRows(x_axis, y_axis, z_axis); + vo->mRelativeXformInvTrans = vo->mRelativeXform; + vo->mRelativeXformInvTrans.invert(); + vo->mRelativeXformInvTrans.transpose(); } const LLMatrix4& LLVolumeImplFlexible::getWorldMatrix(LLXformMatrix* xform) const diff --git a/indra/newview/llselectmgr.cpp b/indra/newview/llselectmgr.cpp index 61eb159ae24097468c0f8db14b1ef29ecdcff9b9..a3c82450653fd745cbda2cdcbc7f0af8f6210fb8 100644 --- a/indra/newview/llselectmgr.cpp +++ b/indra/newview/llselectmgr.cpp @@ -6257,7 +6257,7 @@ void pushWireframe(LLDrawable* drawable) { LLVertexBuffer::unbind(); gGL.pushMatrix(); - gGL.multMatrix((F32*) vobj->getRelativeXform().mMatrix); + gGL.multMatrix(vobj->getRelativeXform().getF32ptr()); LLVolume* volume = NULL; diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index 0d33447fc53e3ce695d454d2364f458469085f97..298c57ac58e7f7b432bc933097aa1550bcd2eccf 100644 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -2078,7 +2078,7 @@ void renderNormals(LLDrawable* drawablep) { LLVolume* volume = vol->getVolume(); gGL.pushMatrix(); - gGL.multMatrix((F32*) vol->getRelativeXform().mMatrix); + gGL.multMatrix(vol->getRelativeXform().getF32ptr()); gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE); @@ -2230,7 +2230,7 @@ void renderPhysicsShape(LLDrawable* drawable, LLVOVolume* volume) LLVector3 size(0.25f,0.25f,0.25f); gGL.pushMatrix(); - gGL.multMatrix((F32*) volume->getRelativeXform().mMatrix); + gGL.multMatrix(volume->getRelativeXform().getF32ptr()); if (type == LLPhysicsShapeBuilderUtil::PhysicsShapeSpecification::USER_MESH) { @@ -2963,7 +2963,7 @@ void renderRaycast(LLDrawable* drawablep) gGL.pushMatrix(); gGL.translatef(trans.mV[0], trans.mV[1], trans.mV[2]); - gGL.multMatrix((F32*) vobj->getRelativeXform().mMatrix); + gGL.multMatrix(vobj->getRelativeXform().getF32ptr()); LLVector4a start, end; if (transform) diff --git a/indra/newview/llviewercamera.cpp b/indra/newview/llviewercamera.cpp index 50398ffeb15d411d3b282c1a5272113866b3431a..fb24dbe31c4570ac1ad3684c2758c3d0d3742338 100644 --- a/indra/newview/llviewercamera.cpp +++ b/indra/newview/llviewercamera.cpp @@ -716,14 +716,12 @@ BOOL LLViewerCamera::areVertsVisible(LLViewerObject* volumep, BOOL all_verts) LLVOVolume* vo_volume = (LLVOVolume*) volumep; vo_volume->updateRelativeXform(); - LLMatrix4 mat = vo_volume->getRelativeXform(); LLMatrix4 render_mat(vo_volume->getRenderRotation(), LLVector4(vo_volume->getRenderPosition())); LLMatrix4a render_mata; render_mata.loadu(render_mat); - LLMatrix4a mata; - mata.loadu(mat); + const LLMatrix4a& mata = vo_volume->getRelativeXform();; num_faces = volume->getNumVolumeFaces(); for (i = 0; i < num_faces; i++) diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index b72228121ef3993a8911f9d3c83e15fa8ec41dee..ef50150f71c170d1a7981c5aa9dff57a74ab075d 100644 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -1522,93 +1522,53 @@ void LLVOVolume::updateRelativeXform(bool force_identity) { //rigged volume (which is in agent space) is used for generating bounding boxes etc //inverse of render matrix should go to partition space mRelativeXform = getRenderMatrix(); - - F32* dst = (F32*) mRelativeXformInvTrans.mMatrix; - F32* src = (F32*) mRelativeXform.mMatrix; - dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; - dst[3] = src[4]; dst[4] = src[5]; dst[5] = src[6]; - dst[6] = src[8]; dst[7] = src[9]; dst[8] = src[10]; - + mRelativeXformInvTrans = mRelativeXform; mRelativeXform.invert(); mRelativeXformInvTrans.transpose(); } else if (drawable->isActive() || force_identity) { // setup relative transforms - LLQuaternion delta_rot; - LLVector3 delta_pos, delta_scale; - //matrix from local space to parent relative/global space bool use_identity = force_identity || drawable->isSpatialRoot(); - delta_rot = use_identity ? LLQuaternion() : mDrawable->getRotation(); - delta_pos = use_identity ? LLVector3(0,0,0) : mDrawable->getPosition(); - delta_scale = mDrawable->getScale(); - - // Vertex transform (4x4) - LLVector3 x_axis = LLVector3(delta_scale.mV[VX], 0.f, 0.f) * delta_rot; - LLVector3 y_axis = LLVector3(0.f, delta_scale.mV[VY], 0.f) * delta_rot; - LLVector3 z_axis = LLVector3(0.f, 0.f, delta_scale.mV[VZ]) * delta_rot; - - mRelativeXform.initRows(LLVector4(x_axis, 0.f), - LLVector4(y_axis, 0.f), - LLVector4(z_axis, 0.f), - LLVector4(delta_pos, 1.f)); - - // compute inverse transpose for normals - // mRelativeXformInvTrans.setRows(x_axis, y_axis, z_axis); - // mRelativeXformInvTrans.invert(); - // mRelativeXformInvTrans.setRows(x_axis, y_axis, z_axis); - // grumble - invert is NOT a matrix invert, so we do it by hand: - - LLMatrix3 rot_inverse = LLMatrix3(~delta_rot); - - LLMatrix3 scale_inverse; - scale_inverse.setRows(LLVector3(1.0, 0.0, 0.0) / delta_scale.mV[VX], - LLVector3(0.0, 1.0, 0.0) / delta_scale.mV[VY], - LLVector3(0.0, 0.0, 1.0) / delta_scale.mV[VZ]); - - - mRelativeXformInvTrans = rot_inverse * scale_inverse; + if(use_identity) + { + mRelativeXform.setIdentity(); + mRelativeXform.applyScale_affine(mDrawable->getScale()); + } + else + { + mRelativeXform = LLQuaternion2(mDrawable->getRotation()); + mRelativeXform.applyScale_affine(mDrawable->getScale()); + mRelativeXform.setTranslate_affine(mDrawable->getPosition()); + } + mRelativeXformInvTrans = mRelativeXform; + mRelativeXformInvTrans.invert(); mRelativeXformInvTrans.transpose(); } else { - LLVector3 pos = getPosition(); - LLVector3 scale = getScale(); - LLQuaternion rot = getRotation(); - + LLVector4a pos; + pos.load3(getPosition().mV); + LLQuaternion2 rot(getRotation()); if (mParent) { - pos *= mParent->getRotation(); - pos += mParent->getPosition(); - rot *= mParent->getRotation(); + LLMatrix4a lrot = LLQuaternion2(mParent->getRotation()); + lrot.rotate(pos,pos); + LLVector4a lpos; + lpos.load3(mParent->getPosition().mV); + pos.add(lpos); + rot.mul(LLQuaternion2(mParent->getRotation())); } - - //LLViewerRegion* region = getRegion(); - //pos += region->getOriginAgent(); - - LLVector3 x_axis = LLVector3(scale.mV[VX], 0.f, 0.f) * rot; - LLVector3 y_axis = LLVector3(0.f, scale.mV[VY], 0.f) * rot; - LLVector3 z_axis = LLVector3(0.f, 0.f, scale.mV[VZ]) * rot; - - mRelativeXform.initRows(LLVector4(x_axis, 0.f), - LLVector4(y_axis, 0.f), - LLVector4(z_axis, 0.f), - LLVector4(pos, 1.f)); - - // compute inverse transpose for normals - LLMatrix3 rot_inverse = LLMatrix3(~rot); - - LLMatrix3 scale_inverse; - scale_inverse.setRows(LLVector3(1.0, 0.0, 0.0) / scale.mV[VX], - LLVector3(0.0, 1.0, 0.0) / scale.mV[VY], - LLVector3(0.0, 0.0, 1.0) / scale.mV[VZ]); - - - mRelativeXformInvTrans = rot_inverse * scale_inverse; + mRelativeXform = rot; + mRelativeXform.applyScale_affine(getScale()); + mRelativeXform.setTranslate_affine(LLVector3(pos.getF32ptr())); + + mRelativeXformInvTrans = mRelativeXform; + mRelativeXformInvTrans.invert(); mRelativeXformInvTrans.transpose(); } } @@ -3285,10 +3245,10 @@ void LLVOVolume::generateSilhouette(LLSelectNode* nodep, const LLVector3& view_p } updateRelativeXform(); - LLMatrix4 trans_mat = mRelativeXform; + LLMatrix4a trans_mat = mRelativeXform; if (mDrawable->isStatic()) { - trans_mat.translate(getRegion()->getOriginAgent()); + trans_mat.translate_affine(getRegion()->getOriginAgent()); } volume->generateSilhouetteVertices(nodep->mSilhouetteVertices, nodep->mSilhouetteNormals, view_vector, trans_mat, mRelativeXformInvTrans, nodep->getTESelectMask()); @@ -5554,11 +5514,8 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) { llassert(!face->isState(LLFace::RIGGED)); - LLMatrix4a temprelxformmat(vobj->getRelativeXform()); - LLMatrix4a temprelxformmatinv; - temprelxformmatinv.loadu(vobj->getRelativeXformInvTrans()); if (!face->getGeometryVolume(*volume, face->getTEOffset(), - temprelxformmat, temprelxformmatinv, face->getGeomIndex())) + vobj->getRelativeXform(), vobj->getRelativeXformInvTrans(), face->getGeomIndex())) { //something's gone wrong with the vertex buffer accounting, rebuild this group group->dirtyGeom(); gPipeline.markRebuild(group, TRUE); @@ -6496,11 +6453,8 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, LLFac U32 te_idx = facep->getTEOffset(); llassert(!facep->isState(LLFace::RIGGED)); - LLMatrix4a temprelxformmat(vobj->getRelativeXform()); - LLMatrix4a temprelxformmatinv; - temprelxformmatinv.loadu(vobj->getRelativeXformInvTrans()); if (!facep->getGeometryVolume(*volume, te_idx, - temprelxformmat, temprelxformmatinv, index_offset,true)) + vobj->getRelativeXform(), vobj->getRelativeXformInvTrans(), index_offset,true)) { LL_WARNS() << "Failed to get geometry for face!" << LL_ENDL; } diff --git a/indra/newview/llvovolume.h b/indra/newview/llvovolume.h index 6eb04f649ce8f9ce30f733a3ebbf0541eb7b0715..edd983b498318d192a63a60fa71e1dd544f1f1b7 100644 --- a/indra/newview/llvovolume.h +++ b/indra/newview/llvovolume.h @@ -27,6 +27,7 @@ #ifndef LL_LLVOVOLUME_H #define LL_LLVOVOLUME_H +#include "llmemory.h" #include "llviewerobject.h" #include "llviewertexture.h" #include "llviewermedia.h" @@ -126,8 +127,8 @@ public: /*virtual*/ BOOL setParent(LLViewerObject* parent) override; S32 getLOD() const override { return mLOD; } const LLVector3 getPivotPositionAgent() const override; - const LLMatrix4& getRelativeXform() const { return mRelativeXform; } - const LLMatrix3& getRelativeXformInvTrans() const { return mRelativeXformInvTrans; } + const LLMatrix4a& getRelativeXform() const { return mRelativeXform; } + const LLMatrix4a& getRelativeXformInvTrans() const { return mRelativeXformInvTrans; } /*virtual*/ const LLMatrix4 getRenderMatrix() const override; typedef std::map<LLUUID, S32> texture_cost_t; U32 getRenderCost(texture_cost_t &textures) const; @@ -361,8 +362,8 @@ private: BOOL mLODChanged; BOOL mSculptChanged; F32 mSpotLightPriority; - LLMatrix4 mRelativeXform; - LLMatrix3 mRelativeXformInvTrans; + LL_ALIGN_16(LLMatrix4a mRelativeXform); + LL_ALIGN_16(LLMatrix4a mRelativeXformInvTrans); BOOL mVolumeChanged; F32 mVObjRadius; LLVolumeInterface *mVolumeImpl;