Skip to content
Snippets Groups Projects
llmemory.h 10.7 KiB
Newer Older
 * @file llmemory.h
 * @brief Memory allocation/deallocation header-stuff goes here.
 * $LicenseInfo:firstyear=2002&license=viewerlgpl$
 * Second Life Viewer Source Code
 * Copyright (C) 2010, Linden Research, Inc.
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation;
 * version 2.1 of the License only.
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * Lesser General Public License for more details.
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
 * $/LicenseInfo$
#ifndef LLMEMORY_H
#define LLMEMORY_H

#include <stdint.h>

#define LL_CHECK_MEMORY llassert(_CrtCheckMemory());
#define LL_ALIGN_OF __alignof
#define LL_ALIGN_OF __align_of__

#if ADDRESS_SIZE == 64
#elif LL_LINUX
LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment);

// This is incredibly expensive - in profiling Windows RWD builds, 30%
// of CPU time was in aligment checks.

#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(uintptr_t(ptr),((U32)alignment))
#define ll_assert_aligned(ptr,alignment)

#include <xmmintrin.h>

template <typename T> T* LL_NEXT_ALIGNED_ADDRESS(T* address) 
	return reinterpret_cast<T*>(
		(uintptr_t(address) + 0xF) & ~0xF);

template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) 
	return reinterpret_cast<T*>(
		(uintptr_t(address) + 0x3F) & ~0x3F);


#define			LL_ALIGN_PREFIX(x)
#define			LL_ALIGN_POSTFIX(x)		__attribute__((aligned(x)))


#define			LL_ALIGN_PREFIX(x)		__declspec(align(x))
#define			LL_ALIGN_POSTFIX(x)

#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined"

#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16)

	// for enable buffer overrun detection predefine LL_DEBUG_BUFFER_OVERRUN in current library
	// change preprocessor code to: #if 1 && defined(LL_WINDOWS)

#if 0 && defined(LL_WINDOWS)
	void* ll_aligned_malloc_fallback( size_t size, int align );
	void ll_aligned_free_fallback( void* ptr );
	inline void* ll_aligned_malloc_fallback( size_t size, int align )
	#if defined(LL_WINDOWS)
		return _aligned_malloc(size, align);
		void* mem = malloc( size + (align - 1) + sizeof(void*) );
        if (mem)
            aligned = ((char*)mem) + sizeof(void*);
            aligned += align - ((uintptr_t)aligned & (align - 1));
	inline void ll_aligned_free_fallback( void* ptr )
	#if defined(LL_WINDOWS)
		if (ptr)
			free( ((void**)ptr)[-1] );
inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
#if (ADDRESS_SIZE == 64 && (defined(LL_WINDOWS) || defined(LL_DARWIN) || defined(LL_LINUX)))
    return malloc(size); // default x86_64 malloc alignment on windows, mac, and linux is 16 byte aligned
#elif defined(LL_DARWIN)
	return malloc(size); // default osx malloc is 16 byte aligned.
	if (LL_LIKELY(0 == posix_memalign(&rtn, 16, size)))
		return rtn;
	else // bad alignment requested, or out of memory
		return NULL;
inline void ll_aligned_free_16(void *p)
#if (ADDRESS_SIZE == 64 && (defined(LL_WINDOWS) || defined(LL_DARWIN) || defined(LL_LINUX)))
    free(p); // default x86_64 malloc alignment on windows, mac, and linux is 16 byte aligned
	free(p); // posix_memalign() is compatible with heap deallocator
inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // returned hunk MUST be freed with ll_aligned_free_16().
#if (ADDRESS_SIZE == 64 && (defined(LL_WINDOWS) || defined(LL_DARWIN) || defined(LL_LINUX)))
    return realloc(ptr, size); // default x86_64 malloc alignment on windows, mac, and linux is 16 byte aligned
	return _aligned_realloc(ptr, size, 16);
#elif defined(LL_DARWIN)
	return realloc(ptr,size); // default osx malloc is 16 byte aligned.
	//FIXME: memcpy is SLOW but posix lacks aligned realloc
	void* ret = ll_aligned_malloc_16(size);
	if (ptr)
		if (ret)
			// Only copy the size of the smallest memory block to avoid memory corruption.
			memcpy(ret, ptr, llmin(old_size, size));
inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32().
#if defined(LL_WINDOWS)
	if (LL_LIKELY(0 == posix_memalign(&rtn, 32, size)))
		return rtn;
	else // bad alignment requested, or out of memory
		return NULL;

inline void ll_aligned_free_32(void *p)
#if defined(LL_WINDOWS)
	free(p); // posix_memalign() is compatible with heap deallocator

inline void* ll_aligned_malloc_64(size_t size) // returned hunk MUST be freed with ll_aligned_free_64().
#if defined(LL_WINDOWS)
	return _aligned_malloc(size, 64);
	void *rtn = NULL;
	if (LL_LIKELY(0 == posix_memalign(&rtn, 64, size)))
		return rtn;
	else // bad alignment requested, or out of memory
		return NULL;

inline void ll_aligned_free_64(void *p)
#if defined(LL_WINDOWS)
	free(p); // posix_memalign() is compatible with heap deallocator
// general purpose dispatch functions that are forced inline so they can compile down to a single call
template<size_t ALIGNMENT>
LL_FORCE_INLINE void* ll_aligned_malloc(size_t size)
	else if (ALIGNMENT == 64)
		return ll_aligned_malloc_64(size);
		return ll_aligned_malloc_fallback(size, ALIGNMENT);
template<size_t ALIGNMENT>
LL_FORCE_INLINE void ll_aligned_free(void* ptr)
	else if (ALIGNMENT == 64)
		return ll_aligned_free_64(ptr);

// Copy words 16-byte blocks from src to dst. Source and destination MUST NOT OVERLAP. 
// Source and dest must be 16-byte aligned and size must be multiple of 16.
inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes)
	assert(src != NULL);
	assert(dst != NULL);
	assert(bytes > 0);
	assert((bytes % sizeof(F32))== 0); 

	assert((src < dst) ? ((src + bytes) <= dst) : ((dst + bytes) <= src));

	char* end = dst + bytes;

	if (bytes > 64)

		// Find start of 64b aligned area within block
		void* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst);
		//at least 64 bytes before the end of the destination, switch to 16 byte copies
		void* end_64 = end-64;
		// Prefetch the head of the 64b area now
		_mm_prefetch((char*)begin_64, _MM_HINT_NTA);
		_mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA);
		_mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA);
		_mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA);
		// Copy 16b chunks until we're 64b aligned
		while (dst < begin_64)

			_mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
			dst += 16;
			src += 16;
		// Copy 64b chunks up to your tail
		// might be good to shmoo the 512b prefetch offset
		// (characterize performance for various values)
		while (dst < end_64)
			_mm_prefetch((char*)src + 512, _MM_HINT_NTA);
			_mm_prefetch((char*)dst + 512, _MM_HINT_NTA);
			_mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
			_mm_store_ps((F32*)(dst + 16), _mm_load_ps((F32*)(src + 16)));
			_mm_store_ps((F32*)(dst + 32), _mm_load_ps((F32*)(src + 32)));
			_mm_store_ps((F32*)(dst + 48), _mm_load_ps((F32*)(src + 48)));
			dst += 64;
			src += 64;

	// Copy remainder 16b tail chunks (or ALL 16b chunks for sub-64b copies)
	llassert(0 == (((U8*) end - (U8*) dst) % 16));
	while (dst < end)
		_mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
		dst += 16;
		src += 16;

#define __DEBUG_PRIVATE_MEM__  0

class LL_COMMON_API LLMemory
	// Return the resident set size of the current process, in bytes.
	// Return value is zero if not known.
	static U64 getCurrentRSS();
	static void* tryToAlloc(void* address, U32 size);
	static void initMaxHeapSizeGB(F32Gigabytes max_heap_size, BOOL prevent_heap_failure);
	static void updateMemoryInfo() ;
	static void logMemoryInfo(BOOL update = FALSE);
	static U32Kilobytes getAvailableMemKB() ;
	static U32Kilobytes getMaxMemKB() ;
	static U32Kilobytes getAllocatedMemKB() ;
	static U32Kilobytes sAvailPhysicalMemInKB ;
	static U32Kilobytes sMaxPhysicalMemInKB ;
	static U32Kilobytes sAllocatedMemInKB;
	static U32Kilobytes sAllocatedPageSizeInKB ;
	static U32Kilobytes sMaxHeapSizeInKB;
	static BOOL sEnableMemoryFailurePrevention;

// LLRefCount moved to llrefcount.h

// LLPointer moved to llpointer.h

// LLSafeHandle moved to llsafehandle.h

// LLSingleton moved to llsingleton.h