Skip to content
Snippets Groups Projects
Commit 9656a9e9 authored by Rye Mutt's avatar Rye Mutt :bread:
Browse files

Enable newer SSE instructions when code generation flags are enabled

parent d022964a
No related branches found
No related tags found
No related merge requests found
...@@ -92,7 +92,7 @@ class LL_COMMON_API LLUUID ...@@ -92,7 +92,7 @@ class LL_COMMON_API LLUUID
*/ */
LL_FORCE_INLINE __m128i load_unaligned_si128(const U8* p) const LL_FORCE_INLINE __m128i load_unaligned_si128(const U8* p) const
{ {
#if defined(AL_AVX) #if defined(AL_AVX) || defined(__SSE3__)
return _mm_lddqu_si128(reinterpret_cast<const __m128i*>(p)); return _mm_lddqu_si128(reinterpret_cast<const __m128i*>(p));
#else #else
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
...@@ -102,7 +102,7 @@ class LL_COMMON_API LLUUID ...@@ -102,7 +102,7 @@ class LL_COMMON_API LLUUID
BOOL isNull() const // Faster than comparing to LLUUID::null. BOOL isNull() const // Faster than comparing to LLUUID::null.
{ {
__m128i mm = load_unaligned_si128(mData); __m128i mm = load_unaligned_si128(mData);
#if defined(AL_AVX) #if defined(AL_AVX) || defined(__SSE4_1__)
return _mm_test_all_zeros(mm, mm) != 0; return _mm_test_all_zeros(mm, mm) != 0;
#else #else
mm = _mm_cmpeq_epi8(mm, _mm_setzero_si128()); mm = _mm_cmpeq_epi8(mm, _mm_setzero_si128());
...@@ -126,7 +126,7 @@ class LL_COMMON_API LLUUID ...@@ -126,7 +126,7 @@ class LL_COMMON_API LLUUID
__m128i mm_right = load_unaligned_si128(rhs.mData); __m128i mm_right = load_unaligned_si128(rhs.mData);
__m128i mm_cmp = _mm_cmpeq_epi32(mm_left, mm_right); __m128i mm_cmp = _mm_cmpeq_epi32(mm_left, mm_right);
#if defined(AL_AVX) #if defined(AL_AVX) || defined(__SSE4_1__)
return _mm_test_all_ones(mm_cmp); return _mm_test_all_ones(mm_cmp);
#else #else
return _mm_movemask_epi8(mm_cmp) == 0xFFFF; return _mm_movemask_epi8(mm_cmp) == 0xFFFF;
......
...@@ -248,7 +248,7 @@ inline void LLVector4a::setCross3(const LLVector4a& a, const LLVector4a& b) ...@@ -248,7 +248,7 @@ inline void LLVector4a::setCross3(const LLVector4a& a, const LLVector4a& b)
// Set all elements to the dot product of the x, y, and z elements in a and b // Set all elements to the dot product of the x, y, and z elements in a and b
inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b) inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b)
{ {
#if AL_AVX #if AL_AVX || defined(__SSE4_1__)
mQ = _mm_dp_ps(a.mQ, b.mQ, 0x7f); mQ = _mm_dp_ps(a.mQ, b.mQ, 0x7f);
#else #else
// ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] } // ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] }
...@@ -269,7 +269,7 @@ inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b) ...@@ -269,7 +269,7 @@ inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b)
// Set all elements to the dot product of the x, y, z, and w elements in a and b // Set all elements to the dot product of the x, y, z, and w elements in a and b
inline void LLVector4a::setAllDot4(const LLVector4a& a, const LLVector4a& b) inline void LLVector4a::setAllDot4(const LLVector4a& a, const LLVector4a& b)
{ {
#if AL_AVX #if AL_AVX || defined(__SSE4_1__)
mQ = _mm_dp_ps(a.mQ, b.mQ, 0xff); mQ = _mm_dp_ps(a.mQ, b.mQ, 0xff);
#else #else
// ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] } // ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] }
...@@ -290,7 +290,7 @@ inline void LLVector4a::setAllDot4(const LLVector4a& a, const LLVector4a& b) ...@@ -290,7 +290,7 @@ inline void LLVector4a::setAllDot4(const LLVector4a& a, const LLVector4a& b)
// Return the 3D dot product of this vector and b // Return the 3D dot product of this vector and b
inline LLSimdScalar LLVector4a::dot3(const LLVector4a& b) const inline LLSimdScalar LLVector4a::dot3(const LLVector4a& b) const
{ {
#if AL_AVX #if AL_AVX || defined(__SSE4_1__)
return _mm_dp_ps(mQ, b.mQ, 0x7f); return _mm_dp_ps(mQ, b.mQ, 0x7f);
#else #else
const LLQuad ab = _mm_mul_ps( mQ, b.mQ ); const LLQuad ab = _mm_mul_ps( mQ, b.mQ );
...@@ -304,7 +304,7 @@ inline LLSimdScalar LLVector4a::dot3(const LLVector4a& b) const ...@@ -304,7 +304,7 @@ inline LLSimdScalar LLVector4a::dot3(const LLVector4a& b) const
// Return the 4D dot product of this vector and b // Return the 4D dot product of this vector and b
inline LLSimdScalar LLVector4a::dot4(const LLVector4a& b) const inline LLSimdScalar LLVector4a::dot4(const LLVector4a& b) const
{ {
#if AL_AVX #if AL_AVX || defined(__SSE4_1__)
return _mm_dp_ps(mQ, b.mQ, 0xff); return _mm_dp_ps(mQ, b.mQ, 0xff);
#else #else
// ab = { w, z, y, x } // ab = { w, z, y, x }
......
...@@ -54,7 +54,7 @@ class LLMaterialID ...@@ -54,7 +54,7 @@ class LLMaterialID
*/ */
LL_FORCE_INLINE __m128i load_unaligned_si128(const U8* p) const LL_FORCE_INLINE __m128i load_unaligned_si128(const U8* p) const
{ {
#if defined(AL_AVX) #if defined(AL_AVX) || defined(__SSE3__)
return _mm_lddqu_si128(reinterpret_cast<const __m128i*>(p)); return _mm_lddqu_si128(reinterpret_cast<const __m128i*>(p));
#else #else
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
...@@ -67,7 +67,7 @@ class LLMaterialID ...@@ -67,7 +67,7 @@ class LLMaterialID
__m128i mm_right = load_unaligned_si128(rhs.mID); __m128i mm_right = load_unaligned_si128(rhs.mID);
__m128i mm_cmp = _mm_cmpeq_epi32(mm_left, mm_right); __m128i mm_cmp = _mm_cmpeq_epi32(mm_left, mm_right);
#if defined(AL_AVX) #if defined(AL_AVX) || defined(__SSE4_1__)
return _mm_test_all_ones(mm_cmp); return _mm_test_all_ones(mm_cmp);
#else #else
return _mm_movemask_epi8(mm_cmp) == 0xFFFF; return _mm_movemask_epi8(mm_cmp) == 0xFFFF;
...@@ -131,7 +131,7 @@ class LLMaterialID ...@@ -131,7 +131,7 @@ class LLMaterialID
bool isNull() const bool isNull() const
{ {
__m128i mm = load_unaligned_si128(mID); __m128i mm = load_unaligned_si128(mID);
#if defined(AL_AVX) #if defined(AL_AVX) || defined(__SSE4_1__)
return _mm_test_all_zeros(mm, mm) != 0; return _mm_test_all_zeros(mm, mm) != 0;
#else #else
mm = _mm_cmpeq_epi8(mm, _mm_setzero_si128()); mm = _mm_cmpeq_epi8(mm, _mm_setzero_si128());
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment