From 67ec47e6da389661934ed2ddfa55ca58455fa7e5 Mon Sep 17 00:00:00 2001
From: Richard Linden <none@none>
Date: Tue, 13 Nov 2012 17:10:10 -0800
Subject: [PATCH] SH-3406 WIP convert fast timers to lltrace system moving fast
 timers into lltrace namespace and accumulation system

---
 indra/llcommon/llfasttimer.cpp    | 186 ++++++++++++---------
 indra/llcommon/llfasttimer.h      | 268 ++++++++++++++++--------------
 indra/llcommon/lltrace.cpp        |   7 +-
 indra/llcommon/lltrace.h          | 194 ++-------------------
 indra/llcommon/lltracerecording.h |  18 +-
 indra/llcommon/llunit.h           |  84 +++++-----
 indra/llui/llstatbar.h            |   8 +-
 indra/llui/llstatgraph.h          |   8 +-
 8 files changed, 330 insertions(+), 443 deletions(-)

diff --git a/indra/llcommon/llfasttimer.cpp b/indra/llcommon/llfasttimer.cpp
index 66452fd02aa..cced0bdfa97 100644
--- a/indra/llcommon/llfasttimer.cpp
+++ b/indra/llcommon/llfasttimer.cpp
@@ -33,8 +33,10 @@
 #include "lltreeiterators.h"
 #include "llsdserialize.h"
 #include "llunit.h"
+#include "llsd.h"
 
 #include <boost/bind.hpp>
+#include <queue>
 
 
 #if LL_WINDOWS
@@ -50,37 +52,40 @@
 #error "architecture not supported"
 #endif
 
+namespace LLTrace
+{
+
 //////////////////////////////////////////////////////////////////////////////
 // statics
 
-S32 LLFastTimer::sCurFrameIndex = -1;
-S32 LLFastTimer::sLastFrameIndex = -1;
-U64 LLFastTimer::sLastFrameTime = LLFastTimer::getCPUClockCount64();
-bool LLFastTimer::sPauseHistory = 0;
-bool LLFastTimer::sResetHistory = 0;
-LLFastTimer::CurTimerData LLFastTimer::sCurTimerData;
-BOOL LLFastTimer::sLog = FALSE;
-std::string LLFastTimer::sLogName = "";
-BOOL LLFastTimer::sMetricLog = FALSE;
-LLMutex* LLFastTimer::sLogLock = NULL;
-std::queue<LLSD> LLFastTimer::sLogQueue;
+S32 Time::sCurFrameIndex = -1;
+S32 Time::sLastFrameIndex = -1;
+U64 Time::sLastFrameTime = Time::getCPUClockCount64();
+bool Time::sPauseHistory = 0;
+bool Time::sResetHistory = 0;
+LLThreadLocalPointer<Time::CurTimerData> Time::sCurTimerData;
+bool Time::sLog = FALSE;
+std::string Time::sLogName = "";
+bool Time::sMetricLog = FALSE;
+static LLMutex*			sLogLock = NULL;
+static std::queue<LLSD> sLogQueue;
 
 #if LL_LINUX || LL_SOLARIS
-U64 LLFastTimer::sClockResolution = 1000000000; // Nanosecond resolution
+U64 Time::sClockResolution = 1000000000; // Nanosecond resolution
 #else
-U64 LLFastTimer::sClockResolution = 1000000; // Microsecond resolution
+U64 Time::sClockResolution = 1000000; // Microsecond resolution
 #endif
 
 // FIXME: move these declarations to the relevant modules
 
 // helper functions
-typedef LLTreeDFSPostIter<LLFastTimer::DeclareTimer, LLFastTimer::DeclareTimer::child_const_iter> timer_tree_bottom_up_iterator_t;
+typedef LLTreeDFSPostIter<BlockTimer, BlockTimer::child_const_iter> timer_tree_bottom_up_iterator_t;
 
-static timer_tree_bottom_up_iterator_t begin_timer_tree_bottom_up(LLFastTimer::DeclareTimer& id) 
+static timer_tree_bottom_up_iterator_t begin_timer_tree_bottom_up(BlockTimer& id) 
 { 
 	return timer_tree_bottom_up_iterator_t(&id, 
-							boost::bind(boost::mem_fn(&LLFastTimer::DeclareTimer::beginChildren), _1), 
-							boost::bind(boost::mem_fn(&LLFastTimer::DeclareTimer::endChildren), _1));
+							boost::bind(boost::mem_fn(&BlockTimer::beginChildren), _1), 
+							boost::bind(boost::mem_fn(&BlockTimer::endChildren), _1));
 }
 
 static timer_tree_bottom_up_iterator_t end_timer_tree_bottom_up() 
@@ -88,14 +93,14 @@ static timer_tree_bottom_up_iterator_t end_timer_tree_bottom_up()
 	return timer_tree_bottom_up_iterator_t(); 
 }
 
-typedef LLTreeDFSIter<LLFastTimer::DeclareTimer, LLFastTimer::DeclareTimer::child_const_iter> timer_tree_dfs_iterator_t;
+typedef LLTreeDFSIter<BlockTimer, BlockTimer::child_const_iter> timer_tree_dfs_iterator_t;
 
 
-static timer_tree_dfs_iterator_t begin_timer_tree(LLFastTimer::DeclareTimer& id) 
+static timer_tree_dfs_iterator_t begin_timer_tree(BlockTimer& id) 
 { 
 	return timer_tree_dfs_iterator_t(&id, 
-		boost::bind(boost::mem_fn(&LLFastTimer::DeclareTimer::beginChildren), _1), 
-							boost::bind(boost::mem_fn(&LLFastTimer::DeclareTimer::endChildren), _1));
+		boost::bind(boost::mem_fn(&BlockTimer::beginChildren), _1), 
+							boost::bind(boost::mem_fn(&BlockTimer::endChildren), _1));
 }
 
 static timer_tree_dfs_iterator_t end_timer_tree() 
@@ -103,21 +108,21 @@ static timer_tree_dfs_iterator_t end_timer_tree()
 	return timer_tree_dfs_iterator_t(); 
 }
 
-LLFastTimer::DeclareTimer& LLFastTimer::DeclareTimer::getRootTimer()
+BlockTimer& BlockTimer::getRootTimer()
 {
-	static DeclareTimer root_timer("root", true, NULL);
+	static BlockTimer root_timer("root", true, NULL);
 	return root_timer;
 }
 
 
 //static
 #if (LL_DARWIN || LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
-U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
+U64 Time::countsPerSecond() // counts per second for the *32-bit* timer
 {
 	return sClockResolution >> 8;
 }
 #else // windows or x86-mac or x86-linux or x86-solaris
-U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
+U64 Time::countsPerSecond() // counts per second for the *32-bit* timer
 {
 #if LL_FASTTIMER_USE_RDTSC || !LL_WINDOWS
 	//getCPUFrequency returns MHz and sCPUClockFrequency wants to be in Hz
@@ -140,19 +145,14 @@ U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
 }
 #endif
 
-LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name, bool open, DeclareTimer* parent)
-:	mName(name),
+BlockTimer::BlockTimer(const char* name, bool open, BlockTimer* parent)
+:	TraceType(name),
 	mCollapsed(true),
 	mParent(NULL),
 	mTreeTimeCounter(0),
 	mCountAverage(0),
 	mCallAverage(0),
-	mNeedsSorting(false),
-	mActiveCount(0),
-	mCalls(0),
-	mSelfTimeCounter(0),
-	mLastCaller(NULL),
-	mMoveUpTree(false)
+	mNeedsSorting(false)
 {
 	setCollapsed(!open);
 
@@ -171,13 +171,13 @@ LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name, bool open, Decl
 	memset(mCallHistory, 0, sizeof(U32) * HISTORY_NUM);
 }
 
-LLFastTimer::DeclareTimer::~DeclareTimer()
+BlockTimer::~BlockTimer()
 {
 	delete[] mCountHistory;
 	delete[] mCallHistory;
 }
 
-void LLFastTimer::DeclareTimer::setParent(DeclareTimer* parent)
+void BlockTimer::setParent(BlockTimer* parent)
 {
 	llassert_always(parent != this);
 	llassert_always(parent != NULL);
@@ -193,8 +193,8 @@ void LLFastTimer::DeclareTimer::setParent(DeclareTimer* parent)
 		// subtract average timing from previous parent
 		mParent->mCountAverage -= mCountAverage;
 
-		std::vector<DeclareTimer*>& children = mParent->getChildren();
-		std::vector<DeclareTimer*>::iterator found_it = std::find(children.begin(), children.end(), this);
+		std::vector<BlockTimer*>& children = mParent->getChildren();
+		std::vector<BlockTimer*>::iterator found_it = std::find(children.begin(), children.end(), this);
 		if (found_it != children.end())
 		{
 			children.erase(found_it);
@@ -209,10 +209,10 @@ void LLFastTimer::DeclareTimer::setParent(DeclareTimer* parent)
 	}
 }
 
-S32 LLFastTimer::DeclareTimer::getDepth()
+S32 BlockTimer::getDepth()
 {
 	S32 depth = 0;
-	DeclareTimer* timerp = mParent;
+	BlockTimer* timerp = mParent;
 	while(timerp)
 	{
 		depth++;
@@ -223,9 +223,9 @@ S32 LLFastTimer::DeclareTimer::getDepth()
 }
 
 // static
-void LLFastTimer::DeclareTimer::processTimes()
+void BlockTimer::processTimes()
 {
-	if (sCurFrameIndex < 0) return;
+	if (Time::getCurFrameIndex() < 0) return;
 
 	buildHierarchy();
 	accumulateTimings();
@@ -234,27 +234,27 @@ void LLFastTimer::DeclareTimer::processTimes()
 // sort child timers by name
 struct SortTimerByName
 {
-	bool operator()(const LLFastTimer::DeclareTimer* i1, const LLFastTimer::DeclareTimer* i2)
+	bool operator()(const BlockTimer* i1, const BlockTimer* i2)
 	{
 		return i1->getName() < i2->getName();
 	}
 };
 
 //static
-void LLFastTimer::DeclareTimer::buildHierarchy()
+void BlockTimer::buildHierarchy()
 {
-	if (sCurFrameIndex < 0 ) return;
+	if (Time::getCurFrameIndex() < 0 ) return;
 
 	// set up initial tree
 	{
-		for (instance_iter it = beginInstances(), end_it = endInstances(); it != end_it; ++it)
+		for (LLInstanceTracker<BlockTimer>::instance_iter it = LLInstanceTracker<BlockTimer>::beginInstances(), end_it = LLInstanceTracker<BlockTimer>::endInstances(); it != end_it; ++it)
 		{
-			DeclareTimer& timer = *it;
-			if (&timer == &DeclareTimer::getRootTimer()) continue;
+			BlockTimer& timer = *it;
+			if (&timer == &BlockTimer::getRootTimer()) continue;
 			
 			// bootstrap tree construction by attaching to last timer to be on stack
 			// when this timer was called
-			if (timer.mLastCaller && timer.mParent == &DeclareTimer::getRootTimer())
+			if (timer.mLastCaller && timer.mParent == &BlockTimer::getRootTimer())
 			{
 				timer.setParent(timer.mLastCaller);
 				// no need to push up tree on first use, flag can be set spuriously
@@ -266,13 +266,13 @@ void LLFastTimer::DeclareTimer::buildHierarchy()
 	// bump timers up tree if they've been flagged as being in the wrong place
 	// do this in a bottom up order to promote descendants first before promoting ancestors
 	// this preserves partial order derived from current frame's observations
-	for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(DeclareTimer::getRootTimer());
+	for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(BlockTimer::getRootTimer());
 		it != end_timer_tree_bottom_up();
 		++it)
 	{
-		DeclareTimer* timerp = *it;
+		BlockTimer* timerp = *it;
 		// skip root timer
-		if (timerp == &DeclareTimer::getRootTimer()) continue;
+		if (timerp == &BlockTimer::getRootTimer()) continue;
 
 		if (timerp->mMoveUpTree)
 		{
@@ -289,11 +289,11 @@ void LLFastTimer::DeclareTimer::buildHierarchy()
 	}
 
 	// sort timers by time last called, so call graph makes sense
-	for(timer_tree_dfs_iterator_t it = begin_timer_tree(DeclareTimer::getRootTimer());
+	for(timer_tree_dfs_iterator_t it = begin_timer_tree(BlockTimer::getRootTimer());
 		it != end_timer_tree();
 		++it)
 	{
-		DeclareTimer* timerp = (*it);
+		BlockTimer* timerp = (*it);
 		if (timerp->mNeedsSorting)
 		{
 			std::sort(timerp->getChildren().begin(), timerp->getChildren().end(), SortTimerByName());
@@ -303,12 +303,12 @@ void LLFastTimer::DeclareTimer::buildHierarchy()
 }
 
 //static
-void LLFastTimer::DeclareTimer::accumulateTimings()
+void BlockTimer::accumulateTimings()
 {
 	U32 cur_time = getCPUClockCount32();
 
 	// walk up stack of active timers and accumulate current time while leaving timing structures active
-	LLFastTimer* cur_timer = sCurTimerData.mCurTimer;
+	Time* cur_timer = sCurTimerData.mCurTimer;
 	// root defined by parent pointing to self
 	CurTimerData* cur_data = &sCurTimerData;
 	while(cur_timer && cur_timer->mLastTimerData.mCurTimer != cur_timer)
@@ -328,11 +328,11 @@ void LLFastTimer::DeclareTimer::accumulateTimings()
 	}
 
 	// traverse tree in DFS post order, or bottom up
-	for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(DeclareTimer::getRootTimer());
+	for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(BlockTimer::getRootTimer());
 		it != end_timer_tree_bottom_up();
 		++it)
 	{
-		DeclareTimer* timerp = (*it);
+		BlockTimer* timerp = (*it);
 		timerp->mTreeTimeCounter = timerp->mSelfTimeCounter;
 		for (child_const_iter child_it = timerp->beginChildren(); child_it != timerp->endChildren(); ++child_it)
 		{
@@ -354,7 +354,7 @@ void LLFastTimer::DeclareTimer::accumulateTimings()
 }
 
 // static
-void LLFastTimer::DeclareTimer::resetFrame()
+void BlockTimer::resetFrame()
 {
 	if (sLog)
 	{ //output current frame counts to performance log
@@ -379,7 +379,7 @@ void LLFastTimer::DeclareTimer::resetFrame()
 		{
 			for (instance_iter it = beginInstances(), end_it = endInstances(); it != end_it; ++it)
 			{
-				DeclareTimer& timer = *it;
+				BlockTimer& timer = *it;
 				sd[timer.getName()]["Time"] = (LLSD::Real) (timer.mSelfTimeCounter*iclock_freq);	
 				sd[timer.getName()]["Calls"] = (LLSD::Integer) timer.mCalls;
 				
@@ -401,7 +401,7 @@ void LLFastTimer::DeclareTimer::resetFrame()
 	// reset for next frame
 	for (instance_iter it = beginInstances(), end_it = endInstances(); it != end_it; ++it)
 	{
-		DeclareTimer& timer = *it;
+		BlockTimer& timer = *it;
 		timer.mSelfTimeCounter = 0;
 		timer.mCalls = 0;
 		timer.mLastCaller = NULL;
@@ -410,7 +410,7 @@ void LLFastTimer::DeclareTimer::resetFrame()
 }
 
 //static
-void LLFastTimer::DeclareTimer::reset()
+void BlockTimer::reset()
 {
 	resetFrame(); // reset frame data
 
@@ -420,7 +420,7 @@ void LLFastTimer::DeclareTimer::reset()
 
 	// root defined by parent pointing to self
 	CurTimerData* cur_data = &sCurTimerData;
-	LLFastTimer* cur_timer = cur_data->mCurTimer;
+	Time* cur_timer = cur_data->mCurTimer;
 	while(cur_timer && cur_timer->mLastTimerData.mCurTimer != cur_timer)
 	{
 		cur_timer->mStartTime = cur_time;
@@ -434,10 +434,10 @@ void LLFastTimer::DeclareTimer::reset()
 	{
 		for (instance_iter it = beginInstances(), end_it = endInstances(); it != end_it; ++it)
 		{
-			DeclareTimer& timer = *it;
-			if (&timer != &DeclareTimer::getRootTimer()) 
+			BlockTimer& timer = *it;
+			if (&timer != &BlockTimer::getRootTimer()) 
 			{
-				timer.setParent(&DeclareTimer::getRootTimer());
+				timer.setParent(&BlockTimer::getRootTimer());
 			}
 			
 			timer.mCountAverage = 0;
@@ -451,35 +451,35 @@ void LLFastTimer::DeclareTimer::reset()
 	sCurFrameIndex = 0;
 }
 
-U32 LLFastTimer::DeclareTimer::getHistoricalCount(S32 history_index) const
+U32 BlockTimer::getHistoricalCount(S32 history_index) const
 {
-	S32 history_idx = (getLastFrameIndex() + history_index) % LLFastTimer::DeclareTimer::HISTORY_NUM;
+	S32 history_idx = (getLastFrameIndex() + history_index) % BlockTimer::HISTORY_NUM;
 	return mCountHistory[history_idx];
 }
 
-U32 LLFastTimer::DeclareTimer::getHistoricalCalls(S32 history_index ) const
+U32 BlockTimer::getHistoricalCalls(S32 history_index ) const
 {
-	S32 history_idx = (getLastFrameIndex() + history_index) % LLFastTimer::DeclareTimer::HISTORY_NUM;
+	S32 history_idx = (getLastFrameIndex() + history_index) % BlockTimer::HISTORY_NUM;
 	return mCallHistory[history_idx];
 }
 
-std::vector<LLFastTimer::DeclareTimer*>::const_iterator LLFastTimer::DeclareTimer::beginChildren()
+std::vector<BlockTimer*>::const_iterator BlockTimer::beginChildren()
 { 
 	return mChildren.begin(); 
 }
 
-std::vector<LLFastTimer::DeclareTimer*>::const_iterator LLFastTimer::DeclareTimer::endChildren()
+std::vector<BlockTimer*>::const_iterator BlockTimer::endChildren()
 {
 	return mChildren.end();
 }
 
-std::vector<LLFastTimer::DeclareTimer*>& LLFastTimer::DeclareTimer::getChildren()
+std::vector<BlockTimer*>& BlockTimer::getChildren()
 {
 	return mChildren;
 }
 
 //static
-void LLFastTimer::nextFrame()
+void Time::nextFrame()
 {
 	countsPerSecond(); // good place to calculate clock frequency
 	U64 frame_time = getCPUClockCount64();
@@ -490,30 +490,30 @@ void LLFastTimer::nextFrame()
 
 	if (!sPauseHistory)
 	{
-		DeclareTimer::processTimes();
+		BlockTimer::processTimes();
 		sLastFrameIndex = sCurFrameIndex++;
 	}
 	
 	// get ready for next frame
-	DeclareTimer::resetFrame();
+	BlockTimer::resetFrame();
 	sLastFrameTime = frame_time;
 }
 
 //static
-void LLFastTimer::dumpCurTimes()
+void Time::dumpCurTimes()
 {
 	// accumulate timings, etc.
-	DeclareTimer::processTimes();
+	BlockTimer::processTimes();
 	
 	F64 clock_freq = (F64)countsPerSecond();
 	F64 iclock_freq = 1000.0 / clock_freq; // clock_ticks -> milliseconds
 
 	// walk over timers in depth order and output timings
-	for(timer_tree_dfs_iterator_t it = begin_timer_tree(DeclareTimer::getRootTimer());
+	for(timer_tree_dfs_iterator_t it = begin_timer_tree(BlockTimer::getRootTimer());
 		it != end_timer_tree();
 		++it)
 	{
-		DeclareTimer* timerp = (*it);
+		BlockTimer* timerp = (*it);
 		F64 total_time_ms = ((F64)timerp->getHistoricalCount(0) * iclock_freq);
 		// Don't bother with really brief times, keep output concise
 		if (total_time_ms < 0.1) continue;
@@ -534,14 +534,14 @@ void LLFastTimer::dumpCurTimes()
 }
 
 //static 
-void LLFastTimer::reset()
+void Time::reset()
 {
-	DeclareTimer::reset();
+	BlockTimer::reset();
 }
 
 
 //static
-void LLFastTimer::writeLog(std::ostream& os)
+void Time::writeLog(std::ostream& os)
 {
 	while (!sLogQueue.empty())
 	{
@@ -552,3 +552,25 @@ void LLFastTimer::writeLog(std::ostream& os)
 	}
 }
 
+
+void LLTrace::TimerAccumulator::addSamples( const LLTrace::TimerAccumulator& other )
+{
+	mSelfTimeCounter += other.mSelfTimeCounter;
+	mTotalTimeCounter += other.mTotalTimeCounter;
+	mCalls += other.mCalls;
+	if (!mLastCaller)
+	{
+		mLastCaller = other.mLastCaller;
+	}
+
+	//mActiveCount stays the same;
+	mMoveUpTree |= other.mMoveUpTree;
+}
+
+void LLTrace::TimerAccumulator::reset( const LLTrace::TimerAccumulator* other )
+{
+	mTotalTimeCounter = 0;
+	mSelfTimeCounter = 0;
+	mCalls = 0;
+}
+}
diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
index 40c2af34e81..f5e6d874a2b 100644
--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@@ -33,131 +33,26 @@
 
 class LLMutex;
 
-#include <queue>
-#include "llsd.h"
+#include "lltrace.h"
 
 #define LL_FASTTIMER_USE_RDTSC 1
 
-class LL_COMMON_API LLFastTimer
+namespace LLTrace
 {
-public:
-	// stores a "named" timer instance to be reused via multiple LLFastTimer stack instances
-	class LL_COMMON_API DeclareTimer
-	:	public LLInstanceTracker<DeclareTimer>
-	{
-	public:
-		DeclareTimer(const std::string& name, bool open = false, DeclareTimer* parent = &getRootTimer());
-		~DeclareTimer();
-
-		enum { HISTORY_NUM = 300 };
-
-		const std::string& getName() const { return mName; }
-		DeclareTimer* getParent() const { return mParent; }
-		void setParent(DeclareTimer* parent);
-		S32 getDepth();
-
-		typedef std::vector<DeclareTimer*>::const_iterator child_const_iter;
-		child_const_iter beginChildren();
-		child_const_iter endChildren();
-		std::vector<DeclareTimer*>& getChildren();
-
-		void setCollapsed(bool collapsed)	{ mCollapsed = collapsed; }
-		bool getCollapsed() const			{ return mCollapsed; }
-
-		U32 getCountAverage() const { return mCountAverage; }
-		U32 getCallAverage() const	{ return mCallAverage; }
-
-		U32 getHistoricalCount(S32 history_index = 0) const;
-		U32 getHistoricalCalls(S32 history_index = 0) const;
-
-		static DeclareTimer& getRootTimer();
-
-	private:
-		friend class LLFastTimer;
-
-		// recursive call to gather total time from children
-		static void accumulateTimings();
-
-		// updates cumulative times and hierarchy,
-		// can be called multiple times in a frame, at any point
-		static void processTimes();
-
-		static void buildHierarchy();
-		static void resetFrame();
-		static void reset();
-
-		//
-		// members
-		//
-		U32 						mSelfTimeCounter;
-		U32 						mTotalTimeCounter;
-		U32 						mCalls;
-		DeclareTimer*				mLastCaller;	// used to bootstrap tree construction
-		U16							mActiveCount;	// number of timers with this ID active on stack
-		bool						mMoveUpTree;	// needs to be moved up the tree of timers at the end of frame
-
-		std::string					mName;
-
-		// sum of recored self time and tree time of all children timers (might not match actual recorded time of children if topology is incomplete
-		U32 						mTreeTimeCounter; 
-
-		U32 						mCountAverage;
-		U32							mCallAverage;
-
-		U32*						mCountHistory;
-		U32*						mCallHistory;
-
-		// tree structure
-		DeclareTimer*				mParent;				// DeclareTimer of caller(parent)
-		std::vector<DeclareTimer*>	mChildren;
-		bool						mCollapsed;				// don't show children
-		bool						mNeedsSorting;			// sort children whenever child added
-	};
 
+class Time
+{
 public:
-	LL_FORCE_INLINE LLFastTimer(LLFastTimer::DeclareTimer& timer)
-	{
-#if FAST_TIMER_ON
-		mStartTime = getCPUClockCount32();
-
-		timer.mActiveCount++;
-		timer.mCalls++;
-		// keep current parent as long as it is active when we are
-		timer.mMoveUpTree |= (timer.mParent->mActiveCount == 0);
-
-		LLFastTimer::CurTimerData* cur_timer_data = &LLFastTimer::sCurTimerData;
-		mLastTimerData = *cur_timer_data;
-		cur_timer_data->mCurTimer = this;
-		cur_timer_data->mTimerData = &timer;
-		cur_timer_data->mChildTime = 0;
-#endif
-	}
-
-	LL_FORCE_INLINE ~LLFastTimer()
-	{
-#if FAST_TIMER_ON
-		U32 total_time = getCPUClockCount32() - mStartTime;
-		DeclareTimer* timer_data = LLFastTimer::sCurTimerData.mTimerData;
-		timer_data->mSelfTimeCounter += total_time - LLFastTimer::sCurTimerData.mChildTime;
-		timer_data->mTotalTimeCounter += total_time;
-		timer_data->mActiveCount--;
-
-		// store last caller to bootstrap tree creation
-		// do this in the destructor in case of recursion to get topmost caller
-		timer_data->mLastCaller = mLastTimerData.mTimerData;
-
-		// we are only tracking self time, so subtract our total time delta from parents
-		mLastTimerData.mChildTime += total_time;
+	typedef Time self_t;
+	typedef class BlockTimer DeclareTimer;
 
-		LLFastTimer::sCurTimerData = mLastTimerData;
-#endif
-	}
+public:
+	Time(BlockTimer& timer);
+	~Time();
 
 public:
-	static LLMutex*			sLogLock;
-	static std::queue<LLSD> sLogQueue;
-	static BOOL				sLog;
-	static BOOL				sMetricLog;
+	static bool				sLog;
+	static bool				sMetricLog;
 	static std::string		sLogName;
 	static bool 			sPauseHistory;
 	static bool 			sResetHistory;
@@ -180,11 +75,11 @@ class LL_COMMON_API LLFastTimer
 
 	struct CurTimerData
 	{
-		LLFastTimer*	mCurTimer;
-		DeclareTimer*	mTimerData;
-		U32				mChildTime;
+		Time*				mCurTimer;
+		BlockTimer*			mTimerData;
+		U64					mChildTime;
 	};
-	static CurTimerData		sCurTimerData;
+	static LLThreadLocalPointer<CurTimerData>	sCurTimerData;
 
 private:
 
@@ -211,14 +106,14 @@ class LL_COMMON_API LLFastTimer
 	//#undef _interlockedbittestandset
 	//#undef _interlockedbittestandreset
 
-	//inline U32 LLFastTimer::getCPUClockCount32()
+	//inline U32 Time::getCPUClockCount32()
 	//{
 	//	U64 time_stamp = __rdtsc();
 	//	return (U32)(time_stamp >> 8);
 	//}
 	//
 	//// return full timer value, *not* shifted by 8 bits
-	//inline U64 LLFastTimer::getCPUClockCount64()
+	//inline U64 Time::getCPUClockCount64()
 	//{
 	//	return __rdtsc();
 	//}
@@ -258,7 +153,7 @@ class LL_COMMON_API LLFastTimer
 	}
 
 #else
-	//LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp
+	//U64 get_clock_count(); // in lltimer.cpp
 	// These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures.
 	static U32 getCPUClockCount32()
 	{
@@ -330,10 +225,131 @@ class LL_COMMON_API LLFastTimer
 	static S32	sLastFrameIndex;
 	static U64	sLastFrameTime;
 
-	U32							mStartTime;
-	LLFastTimer::CurTimerData	mLastTimerData;
+	U64							mStartTime;
+	Time::CurTimerData	mLastTimerData;
+};
+
+struct TimerAccumulator
+{
+	void addSamples(const TimerAccumulator& other);
+	void reset(const TimerAccumulator* other);
+
+	//
+	// members
+	//
+	U64 						mSelfTimeCounter,
+								mTotalTimeCounter;
+	U32 						mCalls;
+	BlockTimer*					mLastCaller;	// used to bootstrap tree construction
+	U16							mActiveCount;	// number of timers with this ID active on stack
+	bool						mMoveUpTree;	// needs to be moved up the tree of timers at the end of frame
+};
+
+// stores a "named" timer instance to be reused via multiple Time stack instances
+class BlockTimer 
+:	public TraceType<TimerAccumulator>,
+	public LLInstanceTracker<BlockTimer>
+{
+public:
+	BlockTimer(const char* name, bool open = false, BlockTimer* parent = &getRootTimer());
+	~BlockTimer();
+
+	enum { HISTORY_NUM = 300 };
+
+	BlockTimer* getParent() const { return mParent; }
+	void setParent(BlockTimer* parent);
+	S32 getDepth();
+
+	typedef std::vector<BlockTimer*>::const_iterator child_const_iter;
+	child_const_iter beginChildren();
+	child_const_iter endChildren();
+	std::vector<BlockTimer*>& getChildren();
+
+	void setCollapsed(bool collapsed)	{ mCollapsed = collapsed; }
+	bool getCollapsed() const			{ return mCollapsed; }
+
+	U32 getCountAverage() const { return mCountAverage; }
+	U32 getCallAverage() const	{ return mCallAverage; }
+
+	U32 getHistoricalCount(S32 history_index = 0) const;
+	U32 getHistoricalCalls(S32 history_index = 0) const;
+
+	static BlockTimer& getRootTimer();
+
+private:
+	friend class Time;
+
+	// recursive call to gather total time from children
+	static void accumulateTimings();
+
+	// updates cumulative times and hierarchy,
+	// can be called multiple times in a frame, at any point
+	static void processTimes();
+
+	static void buildHierarchy();
+	static void resetFrame();
+	static void reset();
+
+	// sum of recorded self time and tree time of all children timers (might not match actual recorded time of children if topology is incomplete
+	U32 						mTreeTimeCounter; 
+
+	U32 						mCountAverage;
+	U32							mCallAverage;
+
+	U32*						mCountHistory;
+	U32*						mCallHistory;
+
+	// tree structure
+	BlockTimer*					mParent;				// BlockTimer of caller(parent)
+	std::vector<BlockTimer*>	mChildren;
+	bool						mCollapsed;				// don't show children
+	bool						mNeedsSorting;			// sort children whenever child added
 };
 
-typedef class LLFastTimer LLFastTimer;
+LL_FORCE_INLINE Time::Time(BlockTimer& timer)
+{
+#if FAST_TIMER_ON
+	mStartTime = getCPUClockCount64();
+
+	TimerAccumulator& accumulator = timer.getPrimaryAccumulator();
+	accumulator.mActiveCount++;
+	accumulator.mCalls++;
+	// keep current parent as long as it is active when we are
+	accumulator.mMoveUpTree |= (timer.mParent->getPrimaryAccumulator().mActiveCount == 0);
+
+	CurTimerData* cur_timer_data = Time::sCurTimerData.get();
+	// store top of stack
+	mLastTimerData = *cur_timer_data;
+	// push new information
+	cur_timer_data->mCurTimer = this;
+	cur_timer_data->mTimerData = &timer;
+	cur_timer_data->mChildTime = 0;
+#endif
+}
+
+LL_FORCE_INLINE Time::~Time()
+{
+#if FAST_TIMER_ON
+	U64 total_time = getCPUClockCount64() - mStartTime;
+	CurTimerData* cur_timer_data = Time::sCurTimerData.get();
+	TimerAccumulator& accumulator = cur_timer_data->mTimerData->getPrimaryAccumulator();
+	accumulator.mSelfTimeCounter += total_time - cur_timer_data->mChildTime;
+	accumulator.mTotalTimeCounter += total_time;
+	accumulator.mActiveCount--;
+
+	// store last caller to bootstrap tree creation
+	// do this in the destructor in case of recursion to get topmost caller
+	accumulator.mLastCaller = mLastTimerData.mTimerData;
+
+	// we are only tracking self time, so subtract our total time delta from parents
+	mLastTimerData.mChildTime += total_time;
+
+	*sCurTimerData = mLastTimerData;
+#endif
+}
+
+}
+
+typedef LLTrace::Time LLFastTimer;
 
 #endif // LL_LLFASTTIMER_H
diff --git a/indra/llcommon/lltrace.cpp b/indra/llcommon/lltrace.cpp
index 3f605f2c743..afb175c3984 100644
--- a/indra/llcommon/lltrace.cpp
+++ b/indra/llcommon/lltrace.cpp
@@ -28,6 +28,7 @@
 #include "lltrace.h"
 #include "lltracerecording.h"
 #include "lltracethreadrecorder.h"
+#include "llfasttimer.h"
 
 namespace LLTrace
 {
@@ -37,13 +38,15 @@ static MasterThreadRecorder* gMasterThreadRecorder = NULL;
 void init()
 {
 	gMasterThreadRecorder = new MasterThreadRecorder();
+	Time::sCurTimerData = new Time::CurTimerData();
 }
 
 void cleanup()
 {
 	delete gMasterThreadRecorder;
-	LLUnitStrict<LLUnits::Seconds, F32> seconds;
 	gMasterThreadRecorder = NULL;
+	delete Time::sCurTimerData.get();
+	Time::sCurTimerData = NULL;
 }
 
 MasterThreadRecorder& getMasterThreadRecorder()
@@ -59,6 +62,4 @@ LLThreadLocalPointer<ThreadRecorder>& get_thread_recorder()
 
 }
 
-BlockTimer::Recorder::StackEntry BlockTimer::sCurRecorder;
-
 }
diff --git a/indra/llcommon/lltrace.h b/indra/llcommon/lltrace.h
index 549e4078220..fb9dca5e841 100644
--- a/indra/llcommon/lltrace.h
+++ b/indra/llcommon/lltrace.h
@@ -32,7 +32,6 @@
 
 #include "llmemory.h"
 #include "llrefcount.h"
-//#include "lltracethreadrecorder.h"
 #include "llunit.h"
 #include "llapr.h"
 
@@ -61,8 +60,8 @@ namespace LLTrace
 	typedef LLUnit<LLUnits::Microseconds, F64>	Microseconds;
 	typedef LLUnit<LLUnits::Nanoseconds, F64>	Nanoseconds;
 
-	typedef LLUnit<LLUnits::Meters, F64>			Meters;
-	typedef LLUnit<LLUnits::Kilometers, F64>		Kilometers;
+	typedef LLUnit<LLUnits::Meters, F64>		Meters;
+	typedef LLUnit<LLUnits::Kilometers, F64>	Kilometers;
 	typedef LLUnit<LLUnits::Centimeters, F64>	Centimeters;
 	typedef LLUnit<LLUnits::Millimeters, F64>	Millimeters;
 
@@ -71,11 +70,11 @@ namespace LLTrace
 
 	LLThreadLocalPointer<class ThreadRecorder>& get_thread_recorder();
 
-	class LL_COMMON_API MasterThreadRecorder& getMasterThreadRecorder();
+	class MasterThreadRecorder& getMasterThreadRecorder();
 
 	// one per thread per type
 	template<typename ACCUMULATOR>
-	class LL_COMMON_API AccumulatorBuffer : public LLRefCount
+	class AccumulatorBuffer : public LLRefCount
 	{
 		static const U32 DEFAULT_ACCUMULATOR_BUFFER_SIZE = 64;
 	private:
@@ -199,7 +198,7 @@ namespace LLTrace
 	template<typename ACCUMULATOR> LLThreadLocalPointer<ACCUMULATOR> AccumulatorBuffer<ACCUMULATOR>::sPrimaryStorage;
 
 	template<typename ACCUMULATOR>
-	class LL_COMMON_API TraceType 
+	class TraceType 
 	:	 public LLInstanceTracker<TraceType<ACCUMULATOR>, std::string>
 	{
 	public:
@@ -218,6 +217,9 @@ namespace LLTrace
 
 		size_t getIndex() const { return mAccumulatorIndex; }
 
+		std::string& getName() { return mName; }
+		const std::string& getName() const { return mName; }
+
 	protected:
 		std::string	mName;
 		std::string mDescription;
@@ -225,7 +227,7 @@ namespace LLTrace
 	};
 
 	template<typename T>
-	class LL_COMMON_API MeasurementAccumulator
+	class MeasurementAccumulator
 	{
 	public:
 		typedef T value_t;
@@ -339,7 +341,7 @@ namespace LLTrace
 	};
 
 	template<typename T>
-	class LL_COMMON_API CountAccumulator
+	class CountAccumulator
 	{
 	public:
 		typedef CountAccumulator<T> self_t;
@@ -378,11 +380,8 @@ namespace LLTrace
 		U32	mNumSamples;
 	};
 
-	typedef TraceType<MeasurementAccumulator<F64> > measurement_common_float_t;
-	typedef TraceType<MeasurementAccumulator<S64> > measurement_common_int_t;
-
-	template <typename T = F64, typename IS_UNIT = void>
-	class LL_COMMON_API Measurement
+	template <typename T = F64>
+	class Measurement
 	:	public TraceType<MeasurementAccumulator<typename LLUnits::HighestPrecisionType<T>::type_t> >
 	{
 	public:
@@ -392,37 +391,16 @@ namespace LLTrace
 		:	TraceType(name, description)
 		{}
 
-		void sample(T value)
-		{
-			getPrimaryAccumulator().sample((storage_t)value);
-		}
-	};
-
-	template <typename T>
-	class LL_COMMON_API Measurement <T, typename T::is_unit_tag_t>
-	:	public TraceType<MeasurementAccumulator<typename LLUnits::HighestPrecisionType<typename T::storage_t>::type_t> >
-	{
-	public:
-		typedef typename LLUnits::HighestPrecisionType<typename T::storage_t>::type_t storage_t;
-
-		Measurement(const char* name, const char* description = NULL) 
-		:	TraceType(name, description)
-		{}
-
 		template<typename UNIT_T>
 		void sample(UNIT_T value)
 		{
-			T converted_value;
-			converted_value = value;
-			getPrimaryAccumulator().sample((storage_t)converted_value.value());
+			T converted_value(value);
+			getPrimaryAccumulator().sample((storage_t)converted_value);
 		}
 	};
 
-	typedef TraceType<CountAccumulator<F64> > count_common_float_t;
-	typedef TraceType<CountAccumulator<S64> > count_common_int_t;
-
-	template <typename T = F64, typename IS_UNIT = void>
-	class LL_COMMON_API Count 
+	template <typename T = F64>
+	class Count 
 	:	public TraceType<CountAccumulator<typename LLUnits::HighestPrecisionType<T>::type_t> >
 	{
 	public:
@@ -432,148 +410,12 @@ namespace LLTrace
 		:	TraceType(name)
 		{}
 
-		void add(T value)
-		{
-			getPrimaryAccumulator().add((storage_t)value);
-		}
-	};
-
-	template <typename T>
-	class LL_COMMON_API Count <T, typename T::is_unit_tag_t>
-	:	public TraceType<CountAccumulator<typename LLUnits::HighestPrecisionType<typename T::storage_t>::type_t> >
-	{
-	public:
-		typedef typename LLUnits::HighestPrecisionType<typename T::storage_t>::type_t storage_t;
-
-		Count(const char* name, const char* description = NULL) 
-		:	TraceType(name)
-		{}
-
 		template<typename UNIT_T>
 		void add(UNIT_T value)
 		{
-			T converted_value;
-			converted_value = value;
-			getPrimaryAccumulator().add((storage_t)converted_value.value());
-		}
-	};
-
-	class LL_COMMON_API TimerAccumulator
-	{
-	public:
-		typedef TimerAccumulator self_t;
-
-		U32 							mTotalTimeCounter,
-										mChildTimeCounter,
-										mCalls;
-
-		TimerAccumulator*				mParent;		// info for caller timer
-		TimerAccumulator*				mLastCaller;	// used to bootstrap tree construction
-		const class BlockTimer*			mTimer;			// points to block timer associated with this storage
-		U8								mActiveCount;	// number of timers with this ID active on stack
-		bool							mMoveUpTree;	// needs to be moved up the tree of timers at the end of frame
-		std::vector<TimerAccumulator*>	mChildren;		// currently assumed child timers
-
-		void addSamples(const TimerAccumulator& other)
-		{
-			mTotalTimeCounter += other.mTotalTimeCounter;
-			mChildTimeCounter += other.mChildTimeCounter;
-			mCalls += other.mCalls;
-		}
-
-		void reset(const self_t* other)
-		{
-			mTotalTimeCounter = 0;
-			mChildTimeCounter = 0;
-			mCalls = 0;
+			T converted_value(value);
+			getPrimaryAccumulator().add((storage_t)converted_value);
 		}
-
-	};
-
-	class LL_COMMON_API BlockTimer : public TraceType<TimerAccumulator>
-	{
-	public:
-		BlockTimer(const char* name)
-		:	TraceType(name)
-		{}
-
-		struct Recorder
-		{
-			struct StackEntry
-			{
-				Recorder*			mRecorder;
-				TimerAccumulator*	mAccumulator;
-				U32					mChildTime;
-			};
-
-			LL_FORCE_INLINE Recorder(BlockTimer& block_timer)
-			:	mLastRecorder(sCurRecorder)
-			{
-				mStartTime = getCPUClockCount32();
-				TimerAccumulator* accumulator = &block_timer.getPrimaryAccumulator(); // get per-thread accumulator
-				accumulator->mActiveCount++;
-				accumulator->mCalls++;
-				accumulator->mMoveUpTree |= (accumulator->mParent->mActiveCount == 0);
-
-				// push new timer on stack
-				sCurRecorder.mRecorder = this;
-				sCurRecorder.mAccumulator = accumulator;
-				sCurRecorder.mChildTime = 0;
-			}
-
-			LL_FORCE_INLINE ~Recorder()
-			{
-				U32 total_time = getCPUClockCount32() - mStartTime;
-
-				TimerAccumulator* accumulator = sCurRecorder.mAccumulator;
-				accumulator->mTotalTimeCounter += total_time;
-				accumulator->mChildTimeCounter += sCurRecorder.mChildTime;
-				accumulator->mActiveCount--;
-
-				accumulator->mLastCaller = mLastRecorder.mAccumulator;
-				mLastRecorder.mChildTime += total_time;
-
-				// pop stack
-				sCurRecorder = mLastRecorder;
-			}
-
-			StackEntry mLastRecorder;
-			U32 mStartTime;
-		};
-
-	private:
-		static U32 getCPUClockCount32()
-		{
-			U32 ret_val;
-			__asm
-			{
-				_emit   0x0f
-				_emit   0x31
-				shr eax,8
-				shl edx,24
-				or eax, edx
-				mov dword ptr [ret_val], eax
-			}
-			return ret_val;
-		}
-
-		// return full timer value, *not* shifted by 8 bits
-		static U64 getCPUClockCount64()
-		{
-			U64 ret_val;
-			__asm
-			{
-				_emit   0x0f
-				_emit   0x31
-				mov eax,eax
-				mov edx,edx
-				mov dword ptr [ret_val+4], edx
-				mov dword ptr [ret_val], eax
-			}
-			return ret_val;
-		}
-
-		static Recorder::StackEntry sCurRecorder;
 	};
 }
 
diff --git a/indra/llcommon/lltracerecording.h b/indra/llcommon/lltracerecording.h
index 16b80fd1d82..4dcadfdf296 100644
--- a/indra/llcommon/lltracerecording.h
+++ b/indra/llcommon/lltracerecording.h
@@ -120,7 +120,7 @@ namespace LLTrace
 		F64 getSum(const TraceType<CountAccumulator<F64> >& stat) const;
 		S64 getSum(const TraceType<CountAccumulator<S64> >& stat) const;
 		template <typename T>
-		T getSum(const Count<T, typename T::is_unit_tag_t>& stat) const
+		T getSum(const Count<T>& stat) const
 		{
 			return (T)getSum(static_cast<const TraceType<CountAccumulator<LLUnits::HighestPrecisionType<T>::type_t> >&> (stat));
 		}
@@ -128,7 +128,7 @@ namespace LLTrace
 		F64 getPerSec(const TraceType<CountAccumulator<F64> >& stat) const;
 		F64 getPerSec(const TraceType<CountAccumulator<S64> >& stat) const;
 		template <typename T>
-		T getPerSec(const Count<T, typename T::is_unit_tag_t>& stat) const
+		T getPerSec(const Count<T>& stat) const
 		{
 			return (T)getPerSec(static_cast<const TraceType<CountAccumulator<LLUnits::HighestPrecisionType<T>::type_t> >&> (stat));
 		}
@@ -141,7 +141,7 @@ namespace LLTrace
 		F64 getSum(const TraceType<MeasurementAccumulator<F64> >& stat) const;
 		S64 getSum(const TraceType<MeasurementAccumulator<S64> >& stat) const;
 		template <typename T>
-		T getSum(const Measurement<T, typename T::is_unit_tag_t>& stat) const
+		T getSum(const Measurement<T>& stat) const
 		{
 			return (T)getSum(static_cast<const TraceType<MeasurementAccumulator<LLUnits::HighestPrecisionType<T>::type_t> >&> (stat));
 		}
@@ -149,7 +149,7 @@ namespace LLTrace
 		F64 getPerSec(const TraceType<MeasurementAccumulator<F64> >& stat) const;
 		F64 getPerSec(const TraceType<MeasurementAccumulator<S64> >& stat) const;
 		template <typename T>
-		T getPerSec(const Measurement<T, typename T::is_unit_tag_t>& stat) const
+		T getPerSec(const Measurement<T>& stat) const
 		{
 			return (T)getPerSec(static_cast<const TraceType<MeasurementAccumulator<LLUnits::HighestPrecisionType<T>::type_t> >&> (stat));
 		}
@@ -157,7 +157,7 @@ namespace LLTrace
 		F64 getMin(const TraceType<MeasurementAccumulator<F64> >& stat) const;
 		S64 getMin(const TraceType<MeasurementAccumulator<S64> >& stat) const;
 		template <typename T>
-		T getMin(const Measurement<T, typename T::is_unit_tag_t>& stat) const
+		T getMin(const Measurement<T>& stat) const
 		{
 			return (T)getMin(static_cast<const TraceType<MeasurementAccumulator<LLUnits::HighestPrecisionType<T>::type_t> >&> (stat));
 		}
@@ -165,7 +165,7 @@ namespace LLTrace
 		F64 getMax(const TraceType<MeasurementAccumulator<F64> >& stat) const;
 		S64 getMax(const TraceType<MeasurementAccumulator<S64> >& stat) const;
 		template <typename T>
-		T getMax(const Measurement<T, typename T::is_unit_tag_t>& stat) const
+		T getMax(const Measurement<T>& stat) const
 		{
 			return (T)getMax(static_cast<const TraceType<MeasurementAccumulator<LLUnits::HighestPrecisionType<T>::type_t> >&> (stat));
 		}
@@ -173,7 +173,7 @@ namespace LLTrace
 		F64 getMean(const TraceType<MeasurementAccumulator<F64> >& stat) const;
 		F64 getMean(const TraceType<MeasurementAccumulator<S64> >& stat) const;
 		template <typename T>
-		T getMean(Measurement<T, typename T::is_unit_tag_t>& stat) const
+		T getMean(Measurement<T>& stat) const
 		{
 			return (T)getMean(static_cast<const TraceType<MeasurementAccumulator<LLUnits::HighestPrecisionType<T>::type_t> >&> (stat));
 		}
@@ -181,7 +181,7 @@ namespace LLTrace
 		F64 getStandardDeviation(const TraceType<MeasurementAccumulator<F64> >& stat) const;
 		F64 getStandardDeviation(const TraceType<MeasurementAccumulator<S64> >& stat) const;
 		template <typename T>
-		T getStandardDeviation(const Measurement<T, typename T::is_unit_tag_t>& stat) const
+		T getStandardDeviation(const Measurement<T>& stat) const
 		{
 			return (T)getMean(static_cast<const TraceType<MeasurementAccumulator<LLUnits::HighestPrecisionType<T>::type_t> >&> (stat));
 		}
@@ -189,7 +189,7 @@ namespace LLTrace
 		F64 getLastValue(const TraceType<MeasurementAccumulator<F64> >& stat) const;
 		S64 getLastValue(const TraceType<MeasurementAccumulator<S64> >& stat) const;
 		template <typename T>
-		T getLastValue(const Measurement<T, typename T::is_unit_tag_t>& stat) const
+		T getLastValue(const Measurement<T>& stat) const
 		{
 			return (T)getLastValue(static_cast<const TraceType<MeasurementAccumulator<LLUnits::HighestPrecisionType<T>::type_t> >&> (stat));
 		}
diff --git a/indra/llcommon/llunit.h b/indra/llcommon/llunit.h
index 54ba1d67db1..4ac2197c11d 100644
--- a/indra/llcommon/llunit.h
+++ b/indra/llcommon/llunit.h
@@ -209,26 +209,26 @@ struct LLUnitStrict : public LLUnit<UNIT_TYPE, STORAGE_TYPE>
 //
 // operator +
 //
-template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>
-LLUnit<STORAGE_TYPE1, UNIT_TYPE1> operator + (LLUnit<STORAGE_TYPE1, UNIT_TYPE1> first, LLUnit<STORAGE_TYPE2, UNIT_TYPE2> second)
+template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>
+LLUnit<STORAGE_TYPE1, UNIT_TYPE1> operator + (LLUnit<UNIT_TYPE1, STORAGE_TYPE1> first, LLUnit<UNIT_TYPE2, STORAGE_TYPE2> second)
 {
-	LLUnit<STORAGE_TYPE1, UNIT_TYPE1> result(first);
+	LLUnit<UNIT_TYPE1, STORAGE_TYPE1> result(first);
 	result += second;
 	return result;
 }
 
-template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
-LLUnit<STORAGE_TYPE, UNIT_TYPE> operator + (LLUnit<STORAGE_TYPE, UNIT_TYPE> first, SCALAR_TYPE second)
+template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
+LLUnit<UNIT_TYPE, STORAGE_TYPE> operator + (LLUnit<UNIT_TYPE, STORAGE_TYPE> first, SCALAR_TYPE second)
 {
-	LLUnit<STORAGE_TYPE, UNIT_TYPE> result(first);
+	LLUnit<UNIT_TYPE, STORAGE_TYPE> result(first);
 	result += second;
 	return result;
 }
 
-template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
-LLUnit<STORAGE_TYPE, UNIT_TYPE> operator + (SCALAR_TYPE first, LLUnit<STORAGE_TYPE, UNIT_TYPE> second)
+template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
+LLUnit<UNIT_TYPE, STORAGE_TYPE> operator + (SCALAR_TYPE first, LLUnit<UNIT_TYPE, STORAGE_TYPE> second)
 {
-	LLUnit<STORAGE_TYPE, UNIT_TYPE> result(first);
+	LLUnit<UNIT_TYPE, STORAGE_TYPE> result(first);
 	result += second;
 	return result;
 }
@@ -236,27 +236,27 @@ LLUnit<STORAGE_TYPE, UNIT_TYPE> operator + (SCALAR_TYPE first, LLUnit<STORAGE_TY
 //
 // operator -
 //
-template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>
-LLUnit<STORAGE_TYPE1, UNIT_TYPE1> operator - (LLUnit<STORAGE_TYPE1, UNIT_TYPE1> first, LLUnit<STORAGE_TYPE2, UNIT_TYPE2> second)
+template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>
+LLUnit<UNIT_TYPE1, STORAGE_TYPE1> operator - (LLUnit<UNIT_TYPE1, STORAGE_TYPE1> first, LLUnit<UNIT_TYPE2, STORAGE_TYPE2> second)
 {
-	LLUnit<STORAGE_TYPE1, UNIT_TYPE1> result(first);
+	LLUnit<UNIT_TYPE1, STORAGE_TYPE1> result(first);
 	result -= second;
 	return result;
 }
 
 
-template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
-LLUnit<STORAGE_TYPE, UNIT_TYPE> operator - (LLUnit<STORAGE_TYPE, UNIT_TYPE> first, SCALAR_TYPE second)
+template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
+LLUnit<UNIT_TYPE, STORAGE_TYPE> operator - (LLUnit<UNIT_TYPE, STORAGE_TYPE> first, SCALAR_TYPE second)
 {
-	LLUnit<STORAGE_TYPE, UNIT_TYPE> result(first);
+	LLUnit<UNIT_TYPE, STORAGE_TYPE> result(first);
 	result -= second;
 	return result;
 }
 
-template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
-LLUnit<STORAGE_TYPE, UNIT_TYPE> operator - (SCALAR_TYPE first, LLUnit<STORAGE_TYPE, UNIT_TYPE> second)
+template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
+LLUnit<UNIT_TYPE, STORAGE_TYPE> operator - (SCALAR_TYPE first, LLUnit<UNIT_TYPE, STORAGE_TYPE> second)
 {
-	LLUnit<STORAGE_TYPE, UNIT_TYPE> result(first);
+	LLUnit<UNIT_TYPE, STORAGE_TYPE> result(first);
 	result -= second;
 	return result;
 }
@@ -264,20 +264,20 @@ LLUnit<STORAGE_TYPE, UNIT_TYPE> operator - (SCALAR_TYPE first, LLUnit<STORAGE_TY
 //
 // operator *
 //
-template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
-LLUnit<STORAGE_TYPE, UNIT_TYPE> operator * (SCALAR_TYPE first, LLUnit<STORAGE_TYPE, UNIT_TYPE> second)
+template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
+LLUnit<UNIT_TYPE, STORAGE_TYPE> operator * (SCALAR_TYPE first, LLUnit<UNIT_TYPE, STORAGE_TYPE> second)
 {
-	return LLUnit<STORAGE_TYPE, UNIT_TYPE>(first * second.value());
+	return LLUnit<UNIT_TYPE, STORAGE_TYPE>(first * second.value());
 }
 
-template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
-LLUnit<STORAGE_TYPE, UNIT_TYPE> operator * (LLUnit<STORAGE_TYPE, UNIT_TYPE> first, SCALAR_TYPE second)
+template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
+LLUnit<UNIT_TYPE, STORAGE_TYPE> operator * (LLUnit<UNIT_TYPE, STORAGE_TYPE> first, SCALAR_TYPE second)
 {
-	return LLUnit<STORAGE_TYPE, UNIT_TYPE>(first.value() * second);
+	return LLUnit<UNIT_TYPE, STORAGE_TYPE>(first.value() * second);
 }
 
-template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>
-void operator * (LLUnit<STORAGE_TYPE1, UNIT_TYPE1>, LLUnit<STORAGE_TYPE2, UNIT_TYPE2>)
+template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>
+void operator * (LLUnit<UNIT_TYPE1, STORAGE_TYPE1>, LLUnit<UNIT_TYPE2, STORAGE_TYPE2>)
 {
 	// spurious use of dependent type to stop gcc from triggering the static assertion before instantiating the template
 	llstatic_assert(sizeof(STORAGE_TYPE1) == 0, "Multiplication of unit types results in new unit type - not supported.");
@@ -286,40 +286,40 @@ void operator * (LLUnit<STORAGE_TYPE1, UNIT_TYPE1>, LLUnit<STORAGE_TYPE2, UNIT_T
 //
 // operator /
 //
-template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
-SCALAR_TYPE operator / (SCALAR_TYPE first, LLUnit<STORAGE_TYPE, UNIT_TYPE> second)
+template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
+SCALAR_TYPE operator / (SCALAR_TYPE first, LLUnit<UNIT_TYPE, STORAGE_TYPE> second)
 {
 	return SCALAR_TYPE(first / second.value());
 }
 
-template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
-LLUnit<STORAGE_TYPE, UNIT_TYPE> operator / (LLUnit<STORAGE_TYPE, UNIT_TYPE> first, SCALAR_TYPE second)
+template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
+LLUnit<UNIT_TYPE, STORAGE_TYPE> operator / (LLUnit<UNIT_TYPE, STORAGE_TYPE> first, SCALAR_TYPE second)
 {
-	return LLUnit<STORAGE_TYPE, UNIT_TYPE>(first.value() / second);
+	return LLUnit<UNIT_TYPE, STORAGE_TYPE>(first.value() / second);
 }
 
-template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>
-void operator / (LLUnit<STORAGE_TYPE1, UNIT_TYPE1>, LLUnit<STORAGE_TYPE2, UNIT_TYPE2>)
+template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>
+void operator / (LLUnit<UNIT_TYPE1, STORAGE_TYPE1>, LLUnit<UNIT_TYPE2, STORAGE_TYPE2>)
 {
 	// spurious use of dependent type to stop gcc from triggering the static assertion before instantiating the template
 	llstatic_assert(sizeof(STORAGE_TYPE1) == 0, "Multiplication of unit types results in new unit type - not supported.");
 }
 
 #define COMPARISON_OPERATORS(op)                                                                     \
-template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>                            \
-bool operator op (SCALAR_TYPE first, LLUnit<STORAGE_TYPE, UNIT_TYPE> second)                         \
+template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>                            \
+bool operator op (SCALAR_TYPE first, LLUnit<UNIT_TYPE, STORAGE_TYPE> second)                         \
 {                                                                                                    \
 	return first op second.value();                                                                  \
 }                                                                                                    \
 	                                                                                                 \
-template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>                            \
-bool operator op (LLUnit<STORAGE_TYPE, UNIT_TYPE> first, SCALAR_TYPE second)                         \
+template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>                            \
+bool operator op (LLUnit<UNIT_TYPE, STORAGE_TYPE> first, SCALAR_TYPE second)                         \
 {                                                                                                    \
 	return first.value() op second;                                                                  \
 }                                                                                                    \
 	                                                                                                 \
-template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>   \
-bool operator op (LLUnit<STORAGE_TYPE1, UNIT_TYPE1> first, LLUnit<STORAGE_TYPE2, UNIT_TYPE2> second) \
+template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>   \
+bool operator op (LLUnit<UNIT_TYPE1, STORAGE_TYPE1> first, LLUnit<UNIT_TYPE2, STORAGE_TYPE2> second) \
 {                                                                                                    \
 	return first.value() op first.convert(second);                                                   \
 }
@@ -333,6 +333,12 @@ COMPARISON_OPERATORS(!=)
 
 namespace LLUnits
 {
+template<typename UNIT_TYPE, typename STORAGE_TYPE> 
+struct HighestPrecisionType<LLUnit<UNIT_TYPE, STORAGE_TYPE> >
+{
+	typedef typename HighestPrecisionType<STORAGE_TYPE>::type_t type_t;
+};
+
 #define LL_DECLARE_DERIVED_UNIT(base_unit_name, unit_name, conversion_factor)                                                                                   \
 struct unit_name                                                                                                                                                \
 {                                                                                                                                                               \
diff --git a/indra/llui/llstatbar.h b/indra/llui/llstatbar.h
index 083da8444e2..17c9c09cb26 100644
--- a/indra/llui/llstatbar.h
+++ b/indra/llui/llstatbar.h
@@ -100,10 +100,10 @@ class LLStatBar : public LLView
 	BOOL mDisplayHistory;
 	BOOL mDisplayMean;			// If true, display mean, if false, display current value
 
-	LLTrace::count_common_float_t* mCountFloatp;
-	LLTrace::count_common_int_t* mCountIntp;
-	LLTrace::measurement_common_float_t* mMeasurementFloatp;
-	LLTrace::measurement_common_int_t* mMeasurementIntp;
+	LLTrace::TraceType<CountAccumulator<F64> >* mCountFloatp;
+	LLTrace::TraceType<CountAccumulator<S64> >* mCountIntp;
+	LLTrace::TraceType<MeasurementAccumulator<F64> >* mMeasurementFloatp;
+	LLTrace::TraceType<MeasurementAccumulator<S64> >* mMeasurementIntp;
 
 	LLFrameTimer mUpdateTimer;
 	LLUIString mLabel;
diff --git a/indra/llui/llstatgraph.h b/indra/llui/llstatgraph.h
index f33c784262e..09b34c23582 100644
--- a/indra/llui/llstatgraph.h
+++ b/indra/llui/llstatgraph.h
@@ -57,10 +57,10 @@ class LLStatGraph : public LLView
 
 	struct StatParams : public LLInitParam::ChoiceBlock<StatParams>
 	{
-		Alternative<LLTrace::count_common_float_t* >	count_stat_float;
-		Alternative<LLTrace::count_common_int_t* >		count_stat_int;
-		Alternative<LLTrace::measurement_common_float_t* >	measurement_stat_float;
-		Alternative<LLTrace::measurement_common_int_t* >	measurement_stat_int;
+		Alternative<LLTrace::TraceType<CountAccumulator<F64> >* >		count_stat_float;
+		Alternative<LLTrace::TraceType<CountAccumulator<S64> >* >		count_stat_int;
+		Alternative<LLTrace::TraceType<MeasurementAccumulator<F64> >* >	measurement_stat_float;
+		Alternative<LLTrace::TraceType<MeasurementAccumulator<S64> >* >	measurement_stat_int;
 	};
 
 	struct Params : public LLInitParam::Block<Params, LLView::Params>
-- 
GitLab