diff --git a/indra/llaudio/llaudiodecodemgr.cpp b/indra/llaudio/llaudiodecodemgr.cpp
index 6c97a64ed7080d9284f207d7b4931857b4a4ac68..bcff03ceaa094c164c2c0f1ad320fd8fd7a66f62 100644
--- a/indra/llaudio/llaudiodecodemgr.cpp
+++ b/indra/llaudio/llaudiodecodemgr.cpp
@@ -40,6 +40,7 @@
 
 #include "vorbis/codec.h"
 #include "vorbis/vorbisfile.h"
+#include <iterator>
 
 extern LLAudioEngine *gAudiop;
 
diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt
index 5b76703af7d3a53fb01163873b50d0d1013f0fc0..6f5fe1832a774378d4fd8e47dc6e447b94e829d1 100644
--- a/indra/llcommon/CMakeLists.txt
+++ b/indra/llcommon/CMakeLists.txt
@@ -242,6 +242,7 @@ set(llcommon_HEADER_FILES
     llstringtable.h
     llsys.h
     llthread.h
+    llthreadlocalstorage.h
     llthreadsafequeue.h
     lltimer.h
     lltrace.h
diff --git a/indra/llcommon/llapr.cpp b/indra/llcommon/llapr.cpp
index 092c27693621389ab28770a05ef8da0bee5d08bd..d911f258b6338be48de8a142b5b5ed5d4e8b55b6 100644
--- a/indra/llcommon/llapr.cpp
+++ b/indra/llcommon/llapr.cpp
@@ -29,7 +29,7 @@
 #include "linden_common.h"
 #include "llapr.h"
 #include "apr_dso.h"
-#include "llthreadlocalpointer.h"
+#include "llthreadlocalstorage.h"
 
 apr_pool_t *gAPRPoolp = NULL; // Global APR memory pool
 LLVolatileAPRPool *LLAPRFile::sAPRFilePoolp = NULL ; //global volatile APR memory pool.
diff --git a/indra/llcommon/llfasttimer.cpp b/indra/llcommon/llfasttimer.cpp
index 7c90b946af0fb0a83fdc9f0a3377e9595319cf0f..ad8cf7296eabe49cba702f0affa0cb79ff4ff8a3 100644
--- a/indra/llcommon/llfasttimer.cpp
+++ b/indra/llcommon/llfasttimer.cpp
@@ -70,8 +70,6 @@ U64         TimeBlock::sClockResolution = 1000000000; // Nanosecond resolution
 U64         TimeBlock::sClockResolution = 1000000; // Microsecond resolution
 #endif
 
-LLThreadLocalPointer<CurTimerData> TimeBlock::sCurTimerData;
-
 static LLMutex*			sLogLock = NULL;
 static std::queue<LLSD> sLogQueue;
 
@@ -118,7 +116,7 @@ struct SortTimerByName
 	}
 };
 
-TimeBlock& TimeBlock::getRootTimer()
+TimeBlock& TimeBlock::getRootTimeBlock()
 {
 	static TimeBlock root_timer("root", true, NULL);
 	return root_timer;
@@ -185,7 +183,7 @@ void TimeBlock::processTimes()
 {
 	get_clock_count(); // good place to calculate clock frequency
 	U64 cur_time = getCPUClockCount64();
-	CurTimerData* cur_data = sCurTimerData.get();
+	BlockTimerStackRecord* stack_record = ThreadTimerStack::getInstance();
 
 	// set up initial tree
 	for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), end_it = LLInstanceTracker<TimeBlock>::endInstances(); 
@@ -193,11 +191,11 @@ void TimeBlock::processTimes()
 		++it)
 	{
 		TimeBlock& timer = *it;
-		if (&timer == &TimeBlock::getRootTimer()) continue;
+		if (&timer == &TimeBlock::getRootTimeBlock()) continue;
 
 		// bootstrap tree construction by attaching to last timer to be on stack
 		// when this timer was called
-		if (timer.getParent() == &TimeBlock::getRootTimer())
+		if (timer.getParent() == &TimeBlock::getRootTimeBlock())
 		{
 			TimeBlockAccumulator* accumulator = timer.getPrimaryAccumulator();
 
@@ -214,20 +212,21 @@ void TimeBlock::processTimes()
 	// bump timers up tree if they have been flagged as being in the wrong place
 	// do this in a bottom up order to promote descendants first before promoting ancestors
 	// this preserves partial order derived from current frame's observations
-	for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(TimeBlock::getRootTimer());
+	for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(TimeBlock::getRootTimeBlock());
 		it != end_timer_tree_bottom_up();
 		++it)
 	{
 		TimeBlock* timerp = *it;
 
 		// sort timers by time last called, so call graph makes sense
-		if (timerp->getTreeNode().mNeedsSorting)
+		TimeBlockTreeNode& tree_node = timerp->getTreeNode();
+		if (tree_node.mNeedsSorting)
 		{
-			std::sort(timerp->beginChildren(), timerp->endChildren(), SortTimerByName());
+			std::sort(tree_node.mChildren.begin(), tree_node.mChildren.end(), SortTimerByName());
 		}
 
 		// skip root timer
-		if (timerp != &TimeBlock::getRootTimer())
+		if (timerp != &TimeBlock::getRootTimeBlock())
 		{
 			TimeBlockAccumulator* accumulator = timerp->getPrimaryAccumulator();
 
@@ -250,27 +249,27 @@ void TimeBlock::processTimes()
 	}
 
 	// walk up stack of active timers and accumulate current time while leaving timing structures active
-	BlockTimer* cur_timer = cur_data->mCurTimer;
-	TimeBlockAccumulator* accumulator = cur_data->mTimerData->getPrimaryAccumulator();
+	BlockTimer* cur_timer = stack_record->mActiveTimer;
+	TimeBlockAccumulator* accumulator = stack_record->mTimeBlock->getPrimaryAccumulator();
 	// root defined by parent pointing to self
-	while(cur_timer && cur_timer->mLastTimerData.mCurTimer != cur_timer)
+	while(cur_timer && cur_timer->mLastTimerData.mActiveTimer != cur_timer)
 	{
 		U64 cumulative_time_delta = cur_time - cur_timer->mStartTime;
-		U64 self_time_delta = cumulative_time_delta - cur_data->mChildTime;
-		cur_data->mChildTime = 0;
+		U64 self_time_delta = cumulative_time_delta - stack_record->mChildTime;
+		stack_record->mChildTime = 0;
 		accumulator->mSelfTimeCounter += self_time_delta;
 		accumulator->mTotalTimeCounter += cumulative_time_delta;
 
 		cur_timer->mStartTime = cur_time;
 
-		cur_data = &cur_timer->mLastTimerData;
-		cur_data->mChildTime += cumulative_time_delta;
-		if (cur_data->mTimerData)
+		stack_record = &cur_timer->mLastTimerData;
+		stack_record->mChildTime += cumulative_time_delta;
+		if (stack_record->mTimeBlock)
 		{
-			accumulator = cur_data->mTimerData->getPrimaryAccumulator();
+			accumulator = stack_record->mTimeBlock->getPrimaryAccumulator();
 		}
 
-		cur_timer = cur_timer->mLastTimerData.mCurTimer;
+		cur_timer = cur_timer->mLastTimerData.mActiveTimer;
 	}
 
 
@@ -374,7 +373,7 @@ void TimeBlock::dumpCurTimes()
 	LLTrace::Recording& last_frame_recording = frame_recording.getLastRecordingPeriod();
 
 	// walk over timers in depth order and output timings
-	for(timer_tree_dfs_iterator_t it = begin_timer_tree(TimeBlock::getRootTimer());
+	for(timer_tree_dfs_iterator_t it = begin_timer_tree(TimeBlock::getRootTimeBlock());
 		it != end_timer_tree();
 		++it)
 	{
diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
index 4d820d0664c633374cdfa0b7eabf94980f36ec6f..995eebd16aa279ec591e818fb82d4ef189b14255 100644
--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@@ -38,13 +38,29 @@ class LLMutex;
 namespace LLTrace
 {
 
-struct CurTimerData
+struct BlockTimerStackRecord
 {
-	class BlockTimer*	mCurTimer;
-	class TimeBlock*	mTimerData;
+	class BlockTimer*	mActiveTimer;
+	class TimeBlock*	mTimeBlock;
 	U64					mChildTime;
 };
 
+class ThreadTimerStack 
+:	public BlockTimerStackRecord, 
+	public LLThreadLocalSingleton<ThreadTimerStack>
+{
+	friend LLThreadLocalSingleton<ThreadTimerStack>;
+	ThreadTimerStack() 
+	{}
+
+public:
+	ThreadTimerStack& operator=(const BlockTimerStackRecord& other)
+	{
+		BlockTimerStackRecord::operator=(other);
+		return *this;
+	}
+};
+
 class BlockTimer
 {
 public:
@@ -58,7 +74,7 @@ class BlockTimer
 private:
 
 	U64				mStartTime;
-	CurTimerData	mLastTimerData;
+	BlockTimerStackRecord	mLastTimerData;
 };
 
 // stores a "named" timer instance to be reused via multiple BlockTimer stack instances
@@ -67,7 +83,7 @@ class TimeBlock
 	public LLInstanceTracker<TimeBlock>
 {
 public:
-	TimeBlock(const char* name, bool open = false, TimeBlock* parent = &getRootTimer());
+	TimeBlock(const char* name, bool open = false, TimeBlock* parent = &getRootTimeBlock());
 
 	TimeBlockTreeNode& getTreeNode() const;
 	TimeBlock* getParent() const { return getTreeNode().getParent(); }
@@ -92,7 +108,7 @@ class TimeBlock
 		return static_cast<TraceType<TimeBlockAccumulator::SelfTimeAspect>&>(*(TraceType<TimeBlockAccumulator>*)this);
 	}
 
-	static TimeBlock& getRootTimer();
+	static TimeBlock& getRootTimeBlock();
 	static void pushLog(LLSD sd);
 	static void setLogLock(LLMutex* mutex);
 	static void writeLog(std::ostream& os);
@@ -252,7 +268,6 @@ class TimeBlock
 	static std::string							sLogName;
 	static bool									sMetricLog,
 												sLog;	
-	static LLThreadLocalPointer<CurTimerData>	sCurTimerData;
 	static U64									sClockResolution;
 };
 
@@ -261,8 +276,8 @@ LL_FORCE_INLINE BlockTimer::BlockTimer(TimeBlock& timer)
 #if FAST_TIMER_ON
 	mStartTime = TimeBlock::getCPUClockCount64();
 
-	CurTimerData* cur_timer_data = TimeBlock::sCurTimerData.get();
-	TimeBlockAccumulator* accumulator = cur_timer_data->mTimerData->getPrimaryAccumulator();
+	BlockTimerStackRecord* cur_timer_data = ThreadTimerStack::getIfExists();
+	TimeBlockAccumulator* accumulator = cur_timer_data->mTimeBlock->getPrimaryAccumulator();
 	accumulator->mActiveCount++;
 	// keep current parent as long as it is active when we are
 	accumulator->mMoveUpTree |= (accumulator->mParent->getPrimaryAccumulator()->mActiveCount == 0);
@@ -270,8 +285,8 @@ LL_FORCE_INLINE BlockTimer::BlockTimer(TimeBlock& timer)
 	// store top of stack
 	mLastTimerData = *cur_timer_data;
 	// push new information
-	cur_timer_data->mCurTimer = this;
-	cur_timer_data->mTimerData = &timer;
+	cur_timer_data->mActiveTimer = this;
+	cur_timer_data->mTimeBlock = &timer;
 	cur_timer_data->mChildTime = 0;
 #endif
 }
@@ -280,8 +295,8 @@ LL_FORCE_INLINE BlockTimer::~BlockTimer()
 {
 #if FAST_TIMER_ON
 	U64 total_time = TimeBlock::getCPUClockCount64() - mStartTime;
-	CurTimerData* cur_timer_data = TimeBlock::sCurTimerData.get();
-	TimeBlockAccumulator* accumulator = cur_timer_data->mTimerData->getPrimaryAccumulator();
+	BlockTimerStackRecord* cur_timer_data = ThreadTimerStack::getIfExists();
+	TimeBlockAccumulator* accumulator = cur_timer_data->mTimeBlock->getPrimaryAccumulator();
 
 	accumulator->mCalls++;
 	accumulator->mSelfTimeCounter += total_time - cur_timer_data->mChildTime;
@@ -290,12 +305,12 @@ LL_FORCE_INLINE BlockTimer::~BlockTimer()
 
 	// store last caller to bootstrap tree creation
 	// do this in the destructor in case of recursion to get topmost caller
-	accumulator->mLastCaller = mLastTimerData.mTimerData;
+	accumulator->mLastCaller = mLastTimerData.mTimeBlock;
 
 	// we are only tracking self time, so subtract our total time delta from parents
 	mLastTimerData.mChildTime += total_time;
 
-	*TimeBlock::sCurTimerData = mLastTimerData;
+	*ThreadTimerStack::getIfExists() = mLastTimerData;
 #endif
 }
 
diff --git a/indra/llcommon/llsingleton.h b/indra/llcommon/llsingleton.h
index 49d99f2cd0fe7f0cf1665115dc83d0d883b98c24..f6b0a7194b3c1f6a2b83439df9d81d89881fa792 100644
--- a/indra/llcommon/llsingleton.h
+++ b/indra/llcommon/llsingleton.h
@@ -90,7 +90,7 @@ template <typename DERIVED_TYPE>
 class LLSingleton : private boost::noncopyable
 {
 	
-private:
+protected:
 	typedef enum e_init_state
 	{
 		UNINITIALIZED,
@@ -124,7 +124,7 @@ class LLSingleton : private boost::noncopyable
 public:
 	virtual ~LLSingleton()
 	{
-		SingletonInstanceData& data = getData();
+		SingletonInstanceData& data = getSingletonData();
 		data.mSingletonInstance = NULL;
 		data.mInitState = DELETED;
 	}
@@ -151,29 +151,15 @@ class LLSingleton : private boost::noncopyable
 	 */
 	static void deleteSingleton()
 	{
-		delete getData().mSingletonInstance;
-		getData().mSingletonInstance = NULL;
-		getData().mInitState = DELETED;
-	}
-
-	static SingletonInstanceData& getData()
-	{
-		// this is static to cache the lookup results
-		static void * & registry = LLSingletonRegistry::get<DERIVED_TYPE>();
-
-		// *TODO - look into making this threadsafe
-		if(NULL == registry)
-		{
-			static SingletonInstanceData data;
-			registry = &data;
-		}
-
-		return *static_cast<SingletonInstanceData *>(registry);
+		SingletonInstanceData& data = getSingletonData();
+		delete data.mSingletonInstance;
+		data.mSingletonInstance = NULL;
+		data.mInitState = DELETED;
 	}
 
 	static DERIVED_TYPE* getInstance()
 	{
-		SingletonInstanceData& data = getData();
+		SingletonInstanceData& data = getSingletonData();
 
 		if (data.mInitState == CONSTRUCTING)
 		{
@@ -197,6 +183,12 @@ class LLSingleton : private boost::noncopyable
 		return data.mSingletonInstance;
 	}
 
+	static DERIVED_TYPE* getIfExists()
+	{
+		SingletonInstanceData& data = getSingletonData();
+		return data.mSingletonInstance;
+	}
+
 	// Reference version of getInstance()
 	// Preferred over getInstance() as it disallows checking for NULL
 	static DERIVED_TYPE& instance()
@@ -208,17 +200,31 @@ class LLSingleton : private boost::noncopyable
 	// Use this to avoid accessing singletons before the can safely be constructed
 	static bool instanceExists()
 	{
-		return getData().mInitState == INITIALIZED;
+		return getSingletonData().mInitState == INITIALIZED;
 	}
 	
 	// Has this singleton already been deleted?
 	// Use this to avoid accessing singletons from a static object's destructor
 	static bool destroyed()
 	{
-		return getData().mInitState == DELETED;
+		return getSingletonData().mInitState == DELETED;
 	}
 
 private:
+	static SingletonInstanceData& getSingletonData()
+	{
+		// this is static to cache the lookup results
+		static void * & registry = LLSingletonRegistry::get<DERIVED_TYPE>();
+
+		// *TODO - look into making this threadsafe
+		if(NULL == registry)
+		{
+			static SingletonInstanceData data;
+			registry = &data;
+		}
+
+		return *static_cast<SingletonInstanceData *>(registry);
+	}
 	virtual void initSingleton() {}
 };
 
diff --git a/indra/llcommon/llthreadlocalpointer.h b/indra/llcommon/llthreadlocalpointer.h
deleted file mode 100644
index d40a8b5a2709d9aa6a2c28711556e5042660f9bd..0000000000000000000000000000000000000000
--- a/indra/llcommon/llthreadlocalpointer.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/** 
- * @file llthreadlocalpointer.h
- * @author Richard
- * @brief Pointer class that manages a distinct value per thread
- *
- * $LicenseInfo:firstyear=2004&license=viewerlgpl$
- * Second Life Viewer Source Code
- * Copyright (C) 2010, Linden Research, Inc.
- * 
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License only.
- * 
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- * 
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- * 
- * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
- * $/LicenseInfo$
- */
-
-#ifndef LL_LLTHREADLOCALPOINTER_H
-#define LL_LLTHREADLOCALPOINTER_H
-
-#include "llinstancetracker.h"
-#include "llapr.h"
-
-class LLThreadLocalPointerBase : public LLInstanceTracker<LLThreadLocalPointerBase>
-{
-public:
-	LLThreadLocalPointerBase()
-	:	mThreadKey(NULL)
-	{
-		if (sInitialized)
-		{
-			initStorage();
-		}
-	}
-
-	LLThreadLocalPointerBase( const LLThreadLocalPointerBase& other)
-		:	mThreadKey(NULL)
-	{
-		if (sInitialized)
-		{
-			initStorage();
-		}
-	}
-
-	~LLThreadLocalPointerBase()
-	{
-		destroyStorage();
-	}
-
-	static void initAllThreadLocalStorage();
-	static void destroyAllThreadLocalStorage();
-
-protected:
-	void set(void* value);
-
-	LL_FORCE_INLINE void* get()
-	{
-		// llassert(sInitialized);
-		void* ptr;
-		apr_status_t result =
-		apr_threadkey_private_get(&ptr, mThreadKey);
-		if (result != APR_SUCCESS)
-		{
-			ll_apr_warn_status(result);
-			llerrs << "Failed to get thread local data" << llendl;
-		}
-		return ptr;
-	}
-
-	LL_FORCE_INLINE const void* get() const
-	{
-		void* ptr;
-		apr_status_t result =
-		apr_threadkey_private_get(&ptr, mThreadKey);
-		if (result != APR_SUCCESS)
-		{
-			ll_apr_warn_status(result);
-			llerrs << "Failed to get thread local data" << llendl;
-		}
-		return ptr;
-	}
-
-	void initStorage();
-	void destroyStorage();
-
-protected:
-	apr_threadkey_t* mThreadKey;
-	static bool		sInitialized;
-};
-
-template <typename T>
-class LLThreadLocalPointer : public LLThreadLocalPointerBase
-{
-public:
-
-	LLThreadLocalPointer()
-	{}
-
-	explicit LLThreadLocalPointer(T* value)
-	{
-		set(value);
-	}
-
-
-	LLThreadLocalPointer(const LLThreadLocalPointer<T>& other)
-	:	LLThreadLocalPointerBase(other)
-	{
-		set(other.get());		
-	}
-
-	LL_FORCE_INLINE T* get()
-	{
-		return (T*)LLThreadLocalPointerBase::get();
-	}
-
-	const T* get() const
-	{
-		return (const T*)LLThreadLocalPointerBase::get();
-	}
-
-	T* operator -> ()
-	{
-		return (T*)get();
-	}
-
-	const T* operator -> () const
-	{
-		return (T*)get();
-	}
-
-	T& operator*()
-	{
-		return *(T*)get();
-	}
-
-	const T& operator*() const
-	{
-		return *(T*)get();
-	}
-
-	LLThreadLocalPointer<T>& operator = (T* value)
-	{
-		set((void*)value);
-		return *this;
-	}
-
-	bool operator ==(T* other)
-	{
-		if (!sInitialized) return false;
-		return get() == other;
-	}
-};
-
-#endif // LL_LLTHREADLOCALPOINTER_H
diff --git a/indra/llcommon/llthreadlocalstorage.h b/indra/llcommon/llthreadlocalstorage.h
new file mode 100644
index 0000000000000000000000000000000000000000..fdf0c1808502796cad9f388bfae58d65685471ab
--- /dev/null
+++ b/indra/llcommon/llthreadlocalstorage.h
@@ -0,0 +1,254 @@
+/** 
+ * @file llthreadlocalstorage.h
+ * @author Richard
+ * @brief Class wrappers for thread local storage
+ *
+ * $LicenseInfo:firstyear=2004&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#ifndef LL_LLTHREADLOCALSTORAGE_H
+#define LL_LLTHREADLOCALSTORAGE_H
+
+#include "llinstancetracker.h"
+#include "llapr.h"
+
+class LLThreadLocalPointerBase : public LLInstanceTracker<LLThreadLocalPointerBase>
+{
+public:
+	LLThreadLocalPointerBase()
+	:	mThreadKey(NULL)
+	{
+		if (sInitialized)
+		{
+			initStorage();
+		}
+	}
+
+	LLThreadLocalPointerBase( const LLThreadLocalPointerBase& other)
+		:	mThreadKey(NULL)
+	{
+		if (sInitialized)
+		{
+			initStorage();
+		}
+	}
+
+	~LLThreadLocalPointerBase()
+	{
+		destroyStorage();
+	}
+
+	static void initAllThreadLocalStorage();
+	static void destroyAllThreadLocalStorage();
+
+protected:
+	void set(void* value);
+
+	LL_FORCE_INLINE void* get() const
+	{
+		// llassert(sInitialized);
+		void* ptr;
+		apr_status_t result =
+		apr_threadkey_private_get(&ptr, mThreadKey);
+		if (result != APR_SUCCESS)
+		{
+			ll_apr_warn_status(result);
+			llerrs << "Failed to get thread local data" << llendl;
+		}
+		return ptr;
+	}
+
+	void initStorage();
+	void destroyStorage();
+
+protected:
+	apr_threadkey_t* mThreadKey;
+	static bool		sInitialized;
+};
+
+template <typename T>
+class LLThreadLocalPointer : public LLThreadLocalPointerBase
+{
+public:
+
+	LLThreadLocalPointer()
+	{}
+
+	explicit LLThreadLocalPointer(T* value)
+	{
+		set(value);
+	}
+
+
+	LLThreadLocalPointer(const LLThreadLocalPointer<T>& other)
+	:	LLThreadLocalPointerBase(other)
+	{
+		set(other.get());		
+	}
+
+	LL_FORCE_INLINE T* get() const
+	{
+		return (T*)LLThreadLocalPointerBase::get();
+	}
+
+	T* operator -> () const
+	{
+		return (T*)get();
+	}
+
+	T& operator*() const
+	{
+		return *(T*)get();
+	}
+
+	LLThreadLocalPointer<T>& operator = (T* value)
+	{
+		set((void*)value);
+		return *this;
+	}
+
+	bool operator ==(const T* other) const
+	{
+		if (!sInitialized) return false;
+		return get() == other;
+	}
+};
+
+template<typename DERIVED_TYPE>
+class LLThreadLocalSingleton
+{
+	typedef enum e_init_state
+	{
+		UNINITIALIZED = 0,
+		CONSTRUCTING,
+		INITIALIZING,
+		INITIALIZED,
+		DELETED
+	} EInitState;
+
+public:
+	LLThreadLocalSingleton()
+	{}
+	
+	virtual ~LLThreadLocalSingleton()
+	{
+		sInstance = NULL;
+		sInitState = DELETED;
+	}
+
+	static void deleteSingleton()
+	{
+		delete sInstance;
+		sInstance = NULL;
+		sInitState = DELETED;
+	}
+
+	static DERIVED_TYPE* getInstance()
+	{
+		if (sInitState == CONSTRUCTING)
+		{
+			llerrs << "Tried to access singleton " << typeid(DERIVED_TYPE).name() << " from singleton constructor!" << llendl;
+		}
+
+		if (sInitState == DELETED)
+		{
+			llwarns << "Trying to access deleted singleton " << typeid(DERIVED_TYPE).name() << " creating new instance" << llendl;
+		}
+
+		if (!sInstance) 
+		{
+			sInitState = CONSTRUCTING;
+			sInstance = new DERIVED_TYPE(); 
+			sInitState = INITIALIZING;
+			sInstance->initSingleton(); 
+			sInitState = INITIALIZED;	
+		}
+
+		return sInstance;
+	}
+
+	static DERIVED_TYPE* getIfExists()
+	{
+		return sInstance;
+	}
+
+	// Reference version of getInstance()
+	// Preferred over getInstance() as it disallows checking for NULL
+	static DERIVED_TYPE& instance()
+	{
+		return *getInstance();
+	}
+
+	// Has this singleton been created uet?
+	// Use this to avoid accessing singletons before the can safely be constructed
+	static bool instanceExists()
+	{
+		return sInitState == INITIALIZED;
+	}
+
+	// Has this singleton already been deleted?
+	// Use this to avoid accessing singletons from a static object's destructor
+	static bool destroyed()
+	{
+		return sInitState == DELETED;
+	}
+private:
+	LLThreadLocalSingleton(const LLThreadLocalSingleton& other);
+	virtual void initSingleton() {}
+
+	static __declspec(thread) DERIVED_TYPE* sInstance;
+	static __declspec(thread) EInitState sInitState;
+};
+
+template<typename DERIVED_TYPE>
+__declspec(thread) DERIVED_TYPE* LLThreadLocalSingleton<DERIVED_TYPE>::sInstance = NULL;
+
+template<typename DERIVED_TYPE>
+__declspec(thread) typename LLThreadLocalSingleton<DERIVED_TYPE>::EInitState LLThreadLocalSingleton<DERIVED_TYPE>::sInitState = LLThreadLocalSingleton<DERIVED_TYPE>::UNINITIALIZED;
+
+template<typename DERIVED_TYPE>
+class LLThreadLocalSingletonPointer
+{
+public:
+	void operator =(DERIVED_TYPE* value)
+	{
+		sInstance = value;
+	}
+
+	LL_FORCE_INLINE static DERIVED_TYPE* getInstance()
+	{
+		return sInstance;
+	}
+
+	LL_FORCE_INLINE static void setInstance(DERIVED_TYPE* instance)
+	{
+		sInstance = instance;
+	}
+
+private:
+	static __declspec(thread) DERIVED_TYPE* sInstance;
+};
+
+template<typename DERIVED_TYPE>
+__declspec(thread) DERIVED_TYPE* LLThreadLocalSingletonPointer<DERIVED_TYPE>::sInstance = NULL;
+
+#endif // LL_LLTHREADLOCALSTORAGE_H
diff --git a/indra/llcommon/lltrace.cpp b/indra/llcommon/lltrace.cpp
index 9d0c93b35284af4bd139f68ce7e1815284793f2e..9cadd70dd8f80c184f5d4fe80d1b1fac9ea640bb 100644
--- a/indra/llcommon/lltrace.cpp
+++ b/indra/llcommon/lltrace.cpp
@@ -60,12 +60,23 @@ MasterThreadRecorder& getMasterThreadRecorder()
 	return *gMasterThreadRecorder;
 }
 
-LLThreadLocalPointer<ThreadRecorder>& get_thread_recorder()
+LLThreadLocalPointer<ThreadRecorder>& get_thread_recorder_ptr()
 {
 	static LLThreadLocalPointer<ThreadRecorder> s_thread_recorder;
 	return s_thread_recorder;
 }
 
+const LLThreadLocalPointer<ThreadRecorder>& get_thread_recorder()
+{
+	return get_thread_recorder_ptr();
+}
+
+void set_thread_recorder(ThreadRecorder* recorder)
+{
+	get_thread_recorder_ptr() = recorder;
+}
+
+
 TimeBlockTreeNode::TimeBlockTreeNode() 
 :	mBlock(NULL),
 	mParent(NULL),
diff --git a/indra/llcommon/lltrace.h b/indra/llcommon/lltrace.h
index 05191cafaa7dc405ca53385dbee428886e6da19c..285d4389af3465b3065bfe42a46867e428c0242f 100644
--- a/indra/llcommon/lltrace.h
+++ b/indra/llcommon/lltrace.h
@@ -34,7 +34,7 @@
 #include "llrefcount.h"
 #include "llunit.h"
 #include "llapr.h"
-#include "llthreadlocalpointer.h"
+#include "llthreadlocalstorage.h"
 
 #include <list>
 
@@ -70,7 +70,8 @@ namespace LLTrace
 	void cleanup();
 	bool isInitialized();
 
-	LLThreadLocalPointer<class ThreadRecorder>& get_thread_recorder();
+	const LLThreadLocalPointer<class ThreadRecorder>& get_thread_recorder();
+	void set_thread_recorder(class ThreadRecorder*);
 
 	class MasterThreadRecorder& getMasterThreadRecorder();
 
@@ -106,9 +107,9 @@ namespace LLTrace
 
 		~AccumulatorBuffer()
 		{
-			if (sPrimaryStorage == mStorage)
+			if (LLThreadLocalSingletonPointer<ACCUMULATOR>::getInstance() == mStorage)
 			{
-				sPrimaryStorage = getDefaultBuffer()->mStorage;
+				LLThreadLocalSingletonPointer<ACCUMULATOR>::setInstance(getDefaultBuffer()->mStorage);
 			}
 			delete[] mStorage;
 		}
@@ -151,17 +152,17 @@ namespace LLTrace
 
 		void makePrimary()
 		{
-			sPrimaryStorage = mStorage;
+			LLThreadLocalSingletonPointer<ACCUMULATOR>::setInstance(mStorage);
 		}
 
 		bool isPrimary() const
 		{
-			return sPrimaryStorage == mStorage;
+			return LLThreadLocalSingletonPointer<ACCUMULATOR>::getInstance() == mStorage;
 		}
 
 		LL_FORCE_INLINE static ACCUMULATOR* getPrimaryStorage() 
 		{ 
-			return sPrimaryStorage.get(); 
+			return LLThreadLocalSingletonPointer<ACCUMULATOR>::getInstance(); 
 		}
 
 		// NOTE: this is not thread-safe.  We assume that slots are reserved in the main thread before any child threads are spawned
@@ -214,7 +215,6 @@ namespace LLTrace
 		{
 			// this buffer is allowed to leak so that trace calls from global destructors have somewhere to put their data
 			// so as not to trigger an access violation
-			//TODO: make this thread local but need to either demand-init apr or remove apr dependency
 			static self_t* sBuffer = new AccumulatorBuffer(StaticAllocationMarker());
 			static bool sInitialized = false;
 			if (!sInitialized)
@@ -229,9 +229,7 @@ namespace LLTrace
 		ACCUMULATOR*								mStorage;
 		size_t										mStorageSize;
 		size_t										mNextStorageSlot;
-		static LLThreadLocalPointer<ACCUMULATOR>	sPrimaryStorage;
 	};
-	template<typename ACCUMULATOR> LLThreadLocalPointer<ACCUMULATOR> AccumulatorBuffer<ACCUMULATOR>::sPrimaryStorage;
 
 	//TODO: replace with decltype when C++11 is enabled
 	template<typename T>
@@ -250,10 +248,9 @@ namespace LLTrace
 		TraceType(const char* name, const char* description = NULL)
 		:	LLInstanceTracker<TraceType<ACCUMULATOR>, std::string>(name),
 			mName(name),
-			mDescription(description ? description : "")	
-		{
-			mAccumulatorIndex = AccumulatorBuffer<ACCUMULATOR>::getDefaultBuffer()->reserveSlot();
-		}
+			mDescription(description ? description : ""),
+			mAccumulatorIndex(AccumulatorBuffer<ACCUMULATOR>::getDefaultBuffer()->reserveSlot())
+		{}
 
 		LL_FORCE_INLINE ACCUMULATOR* getPrimaryAccumulator() const
 		{
@@ -263,13 +260,12 @@ namespace LLTrace
 
 		size_t getIndex() const { return mAccumulatorIndex; }
 
-		std::string& getName() { return mName; }
 		const std::string& getName() const { return mName; }
 
 	protected:
-		std::string	mName;
-		std::string mDescription;
-		size_t		mAccumulatorIndex;
+		const std::string	mName;
+		const std::string	mDescription;
+		const size_t		mAccumulatorIndex;
 	};
 
 	template<typename T>
diff --git a/indra/llcommon/lltracethreadrecorder.cpp b/indra/llcommon/lltracethreadrecorder.cpp
index 156b0ef26b26555ee7b569f9137b0b5d39d221da..9fb789c62d3a12dfe31edb6db300ce6d38afaf74 100644
--- a/indra/llcommon/lltracethreadrecorder.cpp
+++ b/indra/llcommon/lltracethreadrecorder.cpp
@@ -39,18 +39,17 @@ namespace LLTrace
 ThreadRecorder::ThreadRecorder()
 {
 	//NB: the ordering of initialization in this function is very fragile due to a large number of implicit dependencies
-	get_thread_recorder() = this;
+	set_thread_recorder(this);
+	TimeBlock& root_time_block = TimeBlock::getRootTimeBlock();
 
-	mRootTimerData = new CurTimerData();
-	mRootTimerData->mTimerData = &TimeBlock::getRootTimer();
-	TimeBlock::sCurTimerData = mRootTimerData;
+	ThreadTimerStack* timer_stack = ThreadTimerStack::getInstance();
+	timer_stack->mTimeBlock = &root_time_block;
+	timer_stack->mActiveTimer = NULL;
 
 	mNumTimeBlockTreeNodes = AccumulatorBuffer<TimeBlockAccumulator>::getDefaultBuffer()->size();
 	mTimeBlockTreeNodes = new TimeBlockTreeNode[mNumTimeBlockTreeNodes];
 
-	mFullRecording.start();
-
-	TimeBlock& root_timer = TimeBlock::getRootTimer();
+	mThreadRecording.start();
 
 	// initialize time block parent pointers
 	for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), end_it = LLInstanceTracker<TimeBlock>::endInstances(); 
@@ -60,15 +59,15 @@ ThreadRecorder::ThreadRecorder()
 		TimeBlock& time_block = *it;
 		TimeBlockTreeNode& tree_node = mTimeBlockTreeNodes[it->getIndex()];
 		tree_node.mBlock = &time_block;
-		tree_node.mParent = &root_timer;
+		tree_node.mParent = &root_time_block;
 
-		it->getPrimaryAccumulator()->mParent = &root_timer;
+		it->getPrimaryAccumulator()->mParent = &root_time_block;
 	}
 
-	mRootTimer = new BlockTimer(root_timer);
-	mRootTimerData->mCurTimer = mRootTimer;
+	mRootTimer = new BlockTimer(root_time_block);
+	timer_stack->mActiveTimer = mRootTimer;
 
-	TimeBlock::getRootTimer().getPrimaryAccumulator()->mActiveCount = 1;
+	TimeBlock::getRootTimeBlock().getPrimaryAccumulator()->mActiveCount = 1;
 }
 
 ThreadRecorder::~ThreadRecorder()
@@ -79,9 +78,7 @@ ThreadRecorder::~ThreadRecorder()
 	{
 		mActiveRecordings.front().mTargetRecording->stop();
 	}
-	get_thread_recorder() = NULL;
-	TimeBlock::sCurTimerData = NULL;
-	delete mRootTimerData;
+	set_thread_recorder(NULL);
 	delete[] mTimeBlockTreeNodes;
 }
 
@@ -196,12 +193,12 @@ SlaveThreadRecorder::~SlaveThreadRecorder()
 
 void SlaveThreadRecorder::pushToMaster()
 {
-	mFullRecording.stop();
+	mThreadRecording.stop();
 	{
 		LLMutexLock(getMasterThreadRecorder().getSlaveListMutex());
-		mSharedData.appendFrom(mFullRecording);
+		mSharedData.appendFrom(mThreadRecording);
 	}
-	mFullRecording.start();
+	mThreadRecording.start();
 }
 
 void SlaveThreadRecorder::SharedData::appendFrom( const Recording& source )
diff --git a/indra/llcommon/lltracethreadrecorder.h b/indra/llcommon/lltracethreadrecorder.h
index d09527eced2303176ba2a0235b2f41f7d367f4e5..337035974ca25405b9c47f5a4cc62388bde26879 100644
--- a/indra/llcommon/lltracethreadrecorder.h
+++ b/indra/llcommon/lltracethreadrecorder.h
@@ -62,13 +62,12 @@ namespace LLTrace
 
 			void moveBaselineToTarget();
 		};
-		Recording					mFullRecording;
+		Recording					mThreadRecording;
 		std::list<ActiveRecording>	mActiveRecordings;
 
-		struct CurTimerData*	mRootTimerData;
-		class BlockTimer*		mRootTimer;
-		TimeBlockTreeNode*		mTimeBlockTreeNodes;
-		size_t					mNumTimeBlockTreeNodes;
+		class BlockTimer*				mRootTimer;
+		TimeBlockTreeNode*				mTimeBlockTreeNodes;
+		size_t							mNumTimeBlockTreeNodes;
 	};
 
 	class LL_COMMON_API MasterThreadRecorder : public ThreadRecorder
diff --git a/indra/llmessage/llbuffer.cpp b/indra/llmessage/llbuffer.cpp
index 01da20f06029efc12e828a201f45e48827528ab7..1722b48f4407eb0dc5b69e5769882933e1e1c333 100644
--- a/indra/llmessage/llbuffer.cpp
+++ b/indra/llmessage/llbuffer.cpp
@@ -32,6 +32,7 @@
 #include "llmath.h"
 #include "llstl.h"
 #include "llthread.h"
+#include <iterator>
 
 #define ASSERT_LLBUFFERARRAY_MUTEX_LOCKED llassert(!mMutexp || mMutexp->isSelfLocked());
 
diff --git a/indra/llmessage/lldispatcher.cpp b/indra/llmessage/lldispatcher.cpp
index b2dc414a68e8eb42986f58bae961f52764b86499..7ac3651a76ba5abfd857c559c85faa09b1126d23 100644
--- a/indra/llmessage/lldispatcher.cpp
+++ b/indra/llmessage/lldispatcher.cpp
@@ -29,6 +29,7 @@
 #include "lldispatcher.h"
 
 #include <algorithm>
+#include <iterator>
 #include "llstl.h"
 #include "message.h"