diff --git a/indra/llcommon/lluuid.h b/indra/llcommon/lluuid.h index 916d53274be0284d795036d11305ec8e6ea2bb7c..b750378eee7b29ffad71856700ecd2b3dc2f13cc 100644 --- a/indra/llcommon/lluuid.h +++ b/indra/llcommon/lluuid.h @@ -92,7 +92,7 @@ class LL_COMMON_API LLUUID */ LL_FORCE_INLINE __m128i load_unaligned_si128(const U8* p) const { -#if defined(AL_AVX) +#if defined(AL_AVX) || defined(__SSE3__) return _mm_lddqu_si128(reinterpret_cast<const __m128i*>(p)); #else return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); @@ -102,7 +102,7 @@ class LL_COMMON_API LLUUID BOOL isNull() const // Faster than comparing to LLUUID::null. { __m128i mm = load_unaligned_si128(mData); -#if defined(AL_AVX) +#if defined(AL_AVX) || defined(__SSE4_1__) return _mm_test_all_zeros(mm, mm) != 0; #else mm = _mm_cmpeq_epi8(mm, _mm_setzero_si128()); @@ -126,7 +126,7 @@ class LL_COMMON_API LLUUID __m128i mm_right = load_unaligned_si128(rhs.mData); __m128i mm_cmp = _mm_cmpeq_epi32(mm_left, mm_right); -#if defined(AL_AVX) +#if defined(AL_AVX) || defined(__SSE4_1__) return _mm_test_all_ones(mm_cmp); #else return _mm_movemask_epi8(mm_cmp) == 0xFFFF; diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index aef6a258208846e139d4c5f25b5070dd3b2ecfd3..1949dd457e9bf25e410cecd0f96828552f68a93c 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -248,7 +248,7 @@ inline void LLVector4a::setCross3(const LLVector4a& a, const LLVector4a& b) // Set all elements to the dot product of the x, y, and z elements in a and b inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b) { -#if AL_AVX +#if AL_AVX || defined(__SSE4_1__) mQ = _mm_dp_ps(a.mQ, b.mQ, 0x7f); #else // ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] } @@ -269,7 +269,7 @@ inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b) // Set all elements to the dot product of the x, y, z, and w elements in a and b inline void LLVector4a::setAllDot4(const LLVector4a& a, const LLVector4a& b) { -#if AL_AVX +#if AL_AVX || defined(__SSE4_1__) mQ = _mm_dp_ps(a.mQ, b.mQ, 0xff); #else // ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] } @@ -290,7 +290,7 @@ inline void LLVector4a::setAllDot4(const LLVector4a& a, const LLVector4a& b) // Return the 3D dot product of this vector and b inline LLSimdScalar LLVector4a::dot3(const LLVector4a& b) const { -#if AL_AVX +#if AL_AVX || defined(__SSE4_1__) return _mm_dp_ps(mQ, b.mQ, 0x7f); #else const LLQuad ab = _mm_mul_ps( mQ, b.mQ ); @@ -304,7 +304,7 @@ inline LLSimdScalar LLVector4a::dot3(const LLVector4a& b) const // Return the 4D dot product of this vector and b inline LLSimdScalar LLVector4a::dot4(const LLVector4a& b) const { -#if AL_AVX +#if AL_AVX || defined(__SSE4_1__) return _mm_dp_ps(mQ, b.mQ, 0xff); #else // ab = { w, z, y, x } diff --git a/indra/llprimitive/llmaterialid.h b/indra/llprimitive/llmaterialid.h index 5a27eeb89b7ed812a07bad1aedf8085ccb18017b..aba4edb62d4a173f1f9dfa9afe36dedc3147f89e 100644 --- a/indra/llprimitive/llmaterialid.h +++ b/indra/llprimitive/llmaterialid.h @@ -54,7 +54,7 @@ class LLMaterialID */ LL_FORCE_INLINE __m128i load_unaligned_si128(const U8* p) const { -#if defined(AL_AVX) +#if defined(AL_AVX) || defined(__SSE3__) return _mm_lddqu_si128(reinterpret_cast<const __m128i*>(p)); #else return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); @@ -67,7 +67,7 @@ class LLMaterialID __m128i mm_right = load_unaligned_si128(rhs.mID); __m128i mm_cmp = _mm_cmpeq_epi32(mm_left, mm_right); -#if defined(AL_AVX) +#if defined(AL_AVX) || defined(__SSE4_1__) return _mm_test_all_ones(mm_cmp); #else return _mm_movemask_epi8(mm_cmp) == 0xFFFF; @@ -131,7 +131,7 @@ class LLMaterialID bool isNull() const { __m128i mm = load_unaligned_si128(mID); -#if defined(AL_AVX) +#if defined(AL_AVX) || defined(__SSE4_1__) return _mm_test_all_zeros(mm, mm) != 0; #else mm = _mm_cmpeq_epi8(mm, _mm_setzero_si128());