Skip to content
Snippets Groups Projects
Commit 705f7277 authored by Rye Mutt's avatar Rye Mutt :bread:
Browse files

SIMD uuid parsing for SSE 4.2 enabled builds

parent 1dae1a70
No related branches found
No related tags found
No related merge requests found
...@@ -45,7 +45,8 @@ set(CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Release;Debug" CACHE STRING ...@@ -45,7 +45,8 @@ set(CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Release;Debug" CACHE STRING
"Supported build types." FORCE) "Supported build types." FORCE)
# SIMD config # SIMD config
option(USE_SSE41 "Enable usage of the SSE4.2 instruction set" OFF) option(USE_SSE41 "Enable usage of the SSE4.1 instruction set" OFF)
option(USE_SSE42 "Enable usage of the SSE4.2 instruction set" OFF)
option(USE_AVX "Enable usage of the AVX instruction set" OFF) option(USE_AVX "Enable usage of the AVX instruction set" OFF)
option(USE_AVX2 "Enable usage of the AVX2 instruction set" OFF) option(USE_AVX2 "Enable usage of the AVX2 instruction set" OFF)
if((USE_SSE41 AND USE_AVX) OR (USE_SSE41 AND USE_AVX AND USE_AVX2) OR (USE_AVX AND USE_AVX2)) if((USE_SSE41 AND USE_AVX) OR (USE_SSE41 AND USE_AVX AND USE_AVX2) OR (USE_AVX AND USE_AVX2))
...@@ -150,6 +151,8 @@ if (WINDOWS) ...@@ -150,6 +151,8 @@ if (WINDOWS)
elseif (USE_AVX) elseif (USE_AVX)
set(GLOBAL_CXX_FLAGS "${GLOBAL_CXX_FLAGS} /arch:AVX") set(GLOBAL_CXX_FLAGS "${GLOBAL_CXX_FLAGS} /arch:AVX")
add_definitions(/DAL_AVX=1) add_definitions(/DAL_AVX=1)
elseif (USE_SSE42)
add_definitions(/D__SSE3__=1 /D__SSSE3__=1 /D__SSE4__=1 /D__SSE4_1__=1 /D__SSE4_2__=1)
elseif (USE_SSE41) elseif (USE_SSE41)
add_definitions(/D__SSE3__=1 /D__SSSE3__=1 /D__SSE4__=1 /D__SSE4_1__=1) add_definitions(/D__SSE3__=1 /D__SSSE3__=1 /D__SSE4__=1 /D__SSE4_1__=1)
elseif (ADDRESS_SIZE EQUAL 32) elseif (ADDRESS_SIZE EQUAL 32)
......
...@@ -208,42 +208,8 @@ BOOL LLUUID::set(const char* in_string, BOOL emit) ...@@ -208,42 +208,8 @@ BOOL LLUUID::set(const char* in_string, BOOL emit)
return set(absl::NullSafeStringView(in_string),emit); return set(absl::NullSafeStringView(in_string),emit);
} }
BOOL LLUUID::set(const std::string_view in_string, BOOL emit) BOOL LLUUID::parseInternalScalar(const char* in_string, bool broken_format, bool emit)
{
BOOL broken_format = FALSE;
// empty strings should make NULL uuid
if (in_string.empty())
{
setNull();
return TRUE;
}
if (in_string.length() != (UUID_STR_LENGTH - 1)) /* Flawfinder: ignore */
{
// I'm a moron. First implementation didn't have the right UUID format.
// Shouldn't see any of these any more
if (in_string.length() == (UUID_STR_LENGTH - 2)) /* Flawfinder: ignore */
{ {
if(emit)
{
LL_WARNS() << "Warning! Using broken UUID string format" << LL_ENDL;
}
broken_format = TRUE;
}
else
{
// Bad UUID string. Spam as INFO, as most cases we don't care.
if(emit)
{
//don't spam the logs because a resident can't spell.
LL_WARNS() << "Bad UUID string: " << in_string << LL_ENDL;
}
setNull();
return FALSE;
}
}
U8 cur_pos = 0; U8 cur_pos = 0;
S32 i; S32 i;
for (i = 0; i < UUID_BYTES; i++) for (i = 0; i < UUID_BYTES; i++)
...@@ -308,26 +274,129 @@ BOOL LLUUID::set(const std::string_view in_string, BOOL emit) ...@@ -308,26 +274,129 @@ BOOL LLUUID::set(const std::string_view in_string, BOOL emit)
} }
cur_pos++; cur_pos++;
} }
return TRUE;
}
#if defined(__SSE4_2__)
BOOL LLUUID::parseInternalSIMD(const char* in_string, bool emit)
{
__m128i mm_lower_mask_1, mm_lower_mask_2, mm_upper_mask_1, mm_upper_mask_2;
const __m128i mm_lower = _mm_loadu_si128(reinterpret_cast<const __m128i *>(in_string));
const __m128i mm_upper = _mm_loadu_si128(reinterpret_cast<const __m128i *>(in_string + UUID_BYTES + 3));
mm_lower_mask_1 = _mm_shuffle_epi8(mm_lower, _mm_setr_epi8(0, 2, 4, 6, 9, 11, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1));
mm_lower_mask_2 = _mm_shuffle_epi8(mm_lower, _mm_setr_epi8(1, 3, 5, 7, 10, 12, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1));
mm_upper_mask_1 = _mm_shuffle_epi8(mm_upper, _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 0, 2, 5, 7, 9, 11, 13, -1));
mm_upper_mask_2 = _mm_shuffle_epi8(mm_upper, _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 3, 6, 8, 10, 12, 14, -1));
// Since we had hypens between the character we have 36 characters which does not fit in two 16 char loads
// therefor we must manually insert them here
mm_lower_mask_1 = _mm_insert_epi8(mm_lower_mask_1, in_string[16], 7);
mm_lower_mask_2 = _mm_insert_epi8(mm_lower_mask_2, in_string[17], 7);
mm_upper_mask_1 = _mm_insert_epi8(mm_upper_mask_1, in_string[34], 15);
mm_upper_mask_2 = _mm_insert_epi8(mm_upper_mask_2, in_string[35], 15);
// Merge [aaaaaaaa|aaaaaaaa|00000000|00000000] | [00000000|00000000|bbbbbbbb|bbbbbbbb] -> [aaaaaaaa|aaaaaaaa|bbbbbbbb|bbbbbbbb]
__m128i mm_mask_merge_1 = _mm_or_si128(mm_lower_mask_1, mm_upper_mask_1);
__m128i mm_mask_merge_2 = _mm_or_si128(mm_lower_mask_2, mm_upper_mask_2);
// Check if all characters are between 0-9, A-Z or a-z
const __m128i mm_allowed_char_range = _mm_setr_epi8('0', '9', 'A', 'Z', 'a', 'z', 0, -1, 0, -1, 0, -1, 0, -1, 0, -1);
const int cmp_lower = _mm_cmpistri(mm_allowed_char_range, mm_mask_merge_1, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_NEGATIVE_POLARITY);
const int cmp_upper = _mm_cmpistri(mm_allowed_char_range, mm_mask_merge_2, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_NEGATIVE_POLARITY);
if (cmp_lower != UUID_BYTES || cmp_upper != UUID_BYTES)
{
if(emit)
{
LL_WARNS() << "Invalid UUID string: " << in_string << LL_ENDL;
}
setNull();
return FALSE;
}
const __m128i nine = _mm_set1_epi8('9');
const __m128i mm_above_nine_mask_1 = _mm_cmpgt_epi8(mm_mask_merge_1, nine);
const __m128i mm_above_nine_mask_2 = _mm_cmpgt_epi8(mm_mask_merge_2, nine);
__m128i mm_letter_mask_1 = _mm_and_si128(mm_mask_merge_1, mm_above_nine_mask_1);
__m128i mm_letter_mask_2 = _mm_and_si128(mm_mask_merge_2, mm_above_nine_mask_2);
// Convert all letters to to lower case first
const __m128i toLowerCase = _mm_set1_epi8(0x20);
mm_letter_mask_1 = _mm_or_si128(mm_letter_mask_1, toLowerCase);
mm_letter_mask_2 = _mm_or_si128(mm_letter_mask_2, toLowerCase);
// now convert to hex
const __m128i toHex = _mm_set1_epi8('a' - 10 - '0');
const __m128i fixedUppercase1 = _mm_sub_epi8(mm_letter_mask_1, toHex);
const __m128i fixedUppercase2 = _mm_sub_epi8(mm_letter_mask_2, toHex);
const __m128i mm_blended_high = _mm_blendv_epi8(mm_mask_merge_1, fixedUppercase1, mm_above_nine_mask_1);
const __m128i mm_blended_low = _mm_blendv_epi8(mm_mask_merge_2, fixedUppercase2, mm_above_nine_mask_2);
const __m128i zero = _mm_set1_epi8('0');
__m128i lo = _mm_sub_epi8(mm_blended_low, zero);
__m128i hi = _mm_sub_epi8(mm_blended_high, zero);
hi = _mm_slli_epi16(hi, 4);
_mm_storeu_si128(reinterpret_cast<__m128i *>(mData), _mm_xor_si128(hi, lo));
return TRUE; return TRUE;
} }
#endif
BOOL LLUUID::validate(const std::string_view in_string) BOOL LLUUID::set(const std::string_view in_string, BOOL emit)
{
// empty strings should make NULL uuid
if (in_string.empty())
{ {
setNull();
return TRUE;
}
BOOL broken_format = FALSE; BOOL broken_format = FALSE;
if (in_string.length() != (UUID_STR_LENGTH - 1)) /* Flawfinder: ignore */ if (in_string.length() != (UUID_STR_LENGTH - 1)) /* Flawfinder: ignore */
{ {
// I'm a moron. First implementation didn't have the right UUID format. // I'm a moron. First implementation didn't have the right UUID format.
// Shouldn't see any of these any more
if (in_string.length() == (UUID_STR_LENGTH - 2)) /* Flawfinder: ignore */ if (in_string.length() == (UUID_STR_LENGTH - 2)) /* Flawfinder: ignore */
{ {
if(emit)
{
LL_WARNS() << "Warning! Using broken UUID string format" << LL_ENDL;
}
broken_format = TRUE; broken_format = TRUE;
} }
else else
{ {
// Bad UUID string. Spam as INFO, as most cases we don't care.
if(emit)
{
//don't spam the logs because a resident can't spell.
LL_WARNS() << "Bad UUID string: " << in_string << LL_ENDL;
}
setNull();
return FALSE; return FALSE;
} }
} }
#if defined(__SSE4_2__)
if(broken_format)
{
return parseInternalScalar(in_string.data(), broken_format, emit);
}
else
{
return parseInternalSIMD(in_string.data(), emit);
}
#else
return parseInternalScalar(in_string.data(), broken_format, emit);
#endif
return TRUE;
}
BOOL validate_internal_scalar(const char* str_ptr, bool broken_format)
{
U8 cur_pos = 0; U8 cur_pos = 0;
for (U32 i = 0; i < 16; i++) for (U32 i = 0; i < 16; i++)
{ {
...@@ -341,38 +410,92 @@ BOOL LLUUID::validate(const std::string_view in_string) ...@@ -341,38 +410,92 @@ BOOL LLUUID::validate(const std::string_view in_string)
} }
} }
if ((in_string[cur_pos] >= '0') && (in_string[cur_pos] <= '9')) if (((str_ptr[cur_pos] >= '0') && (str_ptr[cur_pos] <= '9'))
|| ((str_ptr[cur_pos] >= 'a') && (str_ptr[cur_pos] <='f'))
|| ((str_ptr[cur_pos] >= 'A') && (str_ptr[cur_pos] <='F')))
{ {
} }
else if ((in_string[cur_pos] >= 'a') && (in_string[cur_pos] <='f')) else
{ {
return FALSE;
} }
else if ((in_string[cur_pos] >= 'A') && (in_string[cur_pos] <='F'))
cur_pos++;
if (((str_ptr[cur_pos] >= '0') && (str_ptr[cur_pos] <= '9'))
|| ((str_ptr[cur_pos] >= 'a') && (str_ptr[cur_pos] <='f'))
|| ((str_ptr[cur_pos] >= 'A') && (str_ptr[cur_pos] <='F')))
{ {
} }
else else
{ {
return FALSE; return FALSE;
} }
cur_pos++; cur_pos++;
}
return TRUE;
}
if ((in_string[cur_pos] >= '0') && (in_string[cur_pos] <= '9')) #if defined(__SSE4_2__)
BOOL validate_internal_simd(const char* str_ptr)
{ {
} __m128i mm_lower_mask_1, mm_lower_mask_2, mm_upper_mask_1, mm_upper_mask_2;
else if ((in_string[cur_pos] >= 'a') && (in_string[cur_pos] <='f')) const __m128i mm_lower = _mm_loadu_si128(reinterpret_cast<const __m128i *>(str_ptr));
const __m128i mm_upper = _mm_loadu_si128(reinterpret_cast<const __m128i *>(str_ptr + UUID_BYTES + 3));
mm_lower_mask_1 = _mm_shuffle_epi8(mm_lower, _mm_setr_epi8(0, 2, 4, 6, 9, 11, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1));
mm_lower_mask_2 = _mm_shuffle_epi8(mm_lower, _mm_setr_epi8(1, 3, 5, 7, 10, 12, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1));
mm_upper_mask_1 = _mm_shuffle_epi8(mm_upper, _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 0, 2, 5, 7, 9, 11, 13, -1));
mm_upper_mask_2 = _mm_shuffle_epi8(mm_upper, _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 3, 6, 8, 10, 12, 14, -1));
// Since we had hypens between the character we have 36 characters which does not fit in two 16 char loads
// therefor we must manually insert them here
mm_lower_mask_1 = _mm_insert_epi8(mm_lower_mask_1, str_ptr[16], 7);
mm_lower_mask_2 = _mm_insert_epi8(mm_lower_mask_2, str_ptr[17], 7);
mm_upper_mask_1 = _mm_insert_epi8(mm_upper_mask_1, str_ptr[34], 15);
mm_upper_mask_2 = _mm_insert_epi8(mm_upper_mask_2, str_ptr[35], 15);
// Merge [aaaaaaaa|aaaaaaaa|00000000|00000000] | [00000000|00000000|bbbbbbbb|bbbbbbbb] -> [aaaaaaaa|aaaaaaaa|bbbbbbbb|bbbbbbbb]
__m128i mm_mask_merge_1 = _mm_or_si128(mm_lower_mask_1, mm_upper_mask_1);
__m128i mm_mask_merge_2 = _mm_or_si128(mm_lower_mask_2, mm_upper_mask_2);
// Check if all characters are between 0-9, A-Z or a-z
const __m128i mm_allowed_char_range = _mm_setr_epi8('0', '9', 'A', 'Z', 'a', 'z', 0, -1, 0, -1, 0, -1, 0, -1, 0, -1);
const int cmp_lower = _mm_cmpistri(mm_allowed_char_range, mm_mask_merge_1, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_NEGATIVE_POLARITY);
const int cmp_upper = _mm_cmpistri(mm_allowed_char_range, mm_mask_merge_2, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_NEGATIVE_POLARITY);
if (cmp_lower != UUID_BYTES || cmp_upper != UUID_BYTES)
{ {
return FALSE;
} }
else if ((in_string[cur_pos] >= 'A') && (in_string[cur_pos] <='F'))
{ return TRUE;
} }
else #endif
BOOL LLUUID::validate(std::string_view in_string)
{
if (in_string.empty())
{ {
return FALSE; return FALSE;
} }
cur_pos++;
static constexpr auto HYPEN_UUID = 36;
static constexpr auto BROKEN_UUID = 35;
size_t in_str_size = in_string.size();
if(in_str_size == HYPEN_UUID)
{
#if defined(__SSE4_2__)
return validate_internal_simd(in_string.data());
#else
return validate_internal_scalar(in_string.data(), false);
#endif
} }
return TRUE; else if (in_str_size == BROKEN_UUID)
{
return validate_internal_scalar(in_string.data(), true);
}
return FALSE;
} }
const LLUUID& LLUUID::operator^=(const LLUUID& rhs) const LLUUID& LLUUID::operator^=(const LLUUID& rhs)
......
...@@ -71,6 +71,12 @@ class LL_COMMON_API LLUUID ...@@ -71,6 +71,12 @@ class LL_COMMON_API LLUUID
static LLUUID generateNewID(); static LLUUID generateNewID();
static LLUUID generateNewID(const std::string& stream); //static version of above for use in initializer expressions such as constructor params, etc. static LLUUID generateNewID(const std::string& stream); //static version of above for use in initializer expressions such as constructor params, etc.
private:
BOOL parseInternalScalar(const char* in_string, bool broken_format, bool emit);
#if defined(__SSE4_2__)
BOOL parseInternalSIMD(const char* in_string, bool emit);
#endif
public:
BOOL set(const char *in_string, BOOL emit = TRUE); // Convert from string, if emit is FALSE, do not emit warnings BOOL set(const char *in_string, BOOL emit = TRUE); // Convert from string, if emit is FALSE, do not emit warnings
BOOL set(const std::string_view in_string, BOOL emit = TRUE); // Convert from string, if emit is FALSE, do not emit warnings BOOL set(const std::string_view in_string, BOOL emit = TRUE); // Convert from string, if emit is FALSE, do not emit warnings
void setNull(); // Faster than setting to LLUUID::null. void setNull(); // Faster than setting to LLUUID::null.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment