From 8fc0abd6995c17b3645b6af5ba4f3e2e8de9b74b Mon Sep 17 00:00:00 2001 From: Rye Mutt <rye@alchemyviewer.org> Date: Sun, 23 Aug 2020 01:20:52 -0400 Subject: [PATCH] Improvements to malloc behavior on 64bit platforms that default to a 16 byte alignment --- indra/llcommon/llmemory.h | 63 ++++++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index f04ae5f5cbe..770897e7205 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -48,12 +48,16 @@ class LLMutex ; #define LL_ALIGN_OF __align_of__ #endif -#if LL_WINDOWS +#if LL_WINDOWS || LL_LINUX +#if ADDRESS_SIZE == 64 +#define LL_DEFAULT_HEAP_ALIGN 16 +#else #define LL_DEFAULT_HEAP_ALIGN 8 +#endif #elif LL_DARWIN #define LL_DEFAULT_HEAP_ALIGN 16 #elif LL_LINUX -#define LL_DEFAULT_HEAP_ALIGN 8 +#error "UNKNOWN PLATFORM HEAP SIZE" #endif @@ -146,38 +150,49 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16(). { +#if (ADDRESS_SIZE == 64 && (defined(LL_WINDOWS) || defined(LL_DARWIN) || defined(LL_LINUX))) + return malloc(size); // default x86_64 malloc alignment on windows, mac, and linux is 16 byte aligned +#else #if defined(LL_WINDOWS) return _aligned_malloc(size, 16); #elif defined(LL_DARWIN) return malloc(size); // default osx malloc is 16 byte aligned. #else - void *rtn; + void *rtn = NULL; if (LL_LIKELY(0 == posix_memalign(&rtn, 16, size))) return rtn; else // bad alignment requested, or out of memory return NULL; #endif +#endif } inline void ll_aligned_free_16(void *p) { +#if (ADDRESS_SIZE == 64 && (defined(LL_WINDOWS) || defined(LL_DARWIN) || defined(LL_LINUX))) + free(p); // default x86_64 malloc alignment on windows, mac, and linux is 16 byte aligned +#else #if defined(LL_WINDOWS) _aligned_free(p); #elif defined(LL_DARWIN) - return free(p); + free(p); #else free(p); // posix_memalign() is compatible with heap deallocator #endif +#endif } inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // returned hunk MUST be freed with ll_aligned_free_16(). { +#if (ADDRESS_SIZE == 64 && (defined(LL_WINDOWS) || defined(LL_DARWIN) || defined(LL_LINUX))) + return realloc(ptr, size); // default x86_64 malloc alignment on windows, mac, and linux is 16 byte aligned +#else #if defined(LL_WINDOWS) return _aligned_realloc(ptr, size, 16); #elif defined(LL_DARWIN) return realloc(ptr,size); // default osx malloc is 16 byte aligned. #else - //FIXME: memcpy is SLOW + //FIXME: memcpy is SLOW but posix lacks aligned realloc void* ret = ll_aligned_malloc_16(size); if (ptr) { @@ -190,16 +205,15 @@ inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // r } return ret; #endif +#endif } inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32(). { #if defined(LL_WINDOWS) return _aligned_malloc(size, 32); -#elif defined(LL_DARWIN) - return ll_aligned_malloc_fallback( size, 32 ); #else - void *rtn; + void *rtn = NULL; if (LL_LIKELY(0 == posix_memalign(&rtn, 32, size))) return rtn; else // bad alignment requested, or out of memory @@ -211,8 +225,28 @@ inline void ll_aligned_free_32(void *p) { #if defined(LL_WINDOWS) _aligned_free(p); -#elif defined(LL_DARWIN) - ll_aligned_free_fallback( p ); +#else + free(p); // posix_memalign() is compatible with heap deallocator +#endif +} + +inline void* ll_aligned_malloc_64(size_t size) // returned hunk MUST be freed with ll_aligned_free_64(). +{ +#if defined(LL_WINDOWS) + return _aligned_malloc(size, 64); +#else + void *rtn = NULL; + if (LL_LIKELY(0 == posix_memalign(&rtn, 64, size))) + return rtn; + else // bad alignment requested, or out of memory + return NULL; +#endif +} + +inline void ll_aligned_free_64(void *p) +{ +#if defined(LL_WINDOWS) + _aligned_free(p); #else free(p); // posix_memalign() is compatible with heap deallocator #endif @@ -234,6 +268,10 @@ LL_FORCE_INLINE void* ll_aligned_malloc(size_t size) { return ll_aligned_malloc_32(size); } + else if (ALIGNMENT == 64) + { + return ll_aligned_malloc_64(size); + } else { return ll_aligned_malloc_fallback(size, ALIGNMENT); @@ -255,6 +293,10 @@ LL_FORCE_INLINE void ll_aligned_free(void* ptr) { return ll_aligned_free_32(ptr); } + else if (ALIGNMENT == 64) + { + return ll_aligned_free_64(ptr); + } else { return ll_aligned_free_fallback(ptr); @@ -325,6 +367,7 @@ inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __ // Copy remainder 16b tail chunks (or ALL 16b chunks for sub-64b copies) // + llassert(0 == (((U8*) end - (U8*) dst) % 16)); while (dst < end) { _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); -- GitLab