From 02d503bf8f8890c6d4b57dd09a1fde2973715b75 Mon Sep 17 00:00:00 2001
From: Richard Linden <none@none>
Date: Thu, 29 Nov 2012 00:43:25 -0800
Subject: [PATCH] SH-3406 WIP convert fast timers to lltrace system moved
 runtime timer tree topology information to separate array instead of
 recording stack

---
 indra/llcommon/llfasttimer.cpp           | 85 +++++++++++++-----------
 indra/llcommon/llfasttimer.h             | 34 +++++-----
 indra/llcommon/lltrace.h                 | 13 +++-
 indra/llcommon/lltracethreadrecorder.cpp | 20 +++---
 indra/llcommon/lltracethreadrecorder.h   |  4 +-
 5 files changed, 91 insertions(+), 65 deletions(-)

diff --git a/indra/llcommon/llfasttimer.cpp b/indra/llcommon/llfasttimer.cpp
index e88a5a9ed15..1b3498cce3d 100644
--- a/indra/llcommon/llfasttimer.cpp
+++ b/indra/llcommon/llfasttimer.cpp
@@ -58,24 +58,27 @@ namespace LLTrace
 //////////////////////////////////////////////////////////////////////////////
 // statics
 
-S32 BlockTimer::sCurFrameIndex = -1;
-S32 BlockTimer::sLastFrameIndex = -1;
-U64 BlockTimer::sLastFrameTime = BlockTimer::getCPUClockCount64();
-bool BlockTimer::sPauseHistory = 0;
-bool BlockTimer::sResetHistory = 0;
-LLThreadLocalPointer<CurTimerData> BlockTimer::sCurTimerData;
-bool BlockTimer::sLog = false;
-std::string BlockTimer::sLogName = "";
-bool BlockTimer::sMetricLog = false;
-static LLMutex*			sLogLock = NULL;
-static std::queue<LLSD> sLogQueue;
+S32         BlockTimer::sCurFrameIndex   = -1;
+S32         BlockTimer::sLastFrameIndex  = -1;
+U64         BlockTimer::sLastFrameTime   = BlockTimer::getCPUClockCount64();
+bool        BlockTimer::sPauseHistory    = 0;
+bool        BlockTimer::sResetHistory    = 0;
+bool        BlockTimer::sLog		     = false;
+std::string BlockTimer::sLogName         = "";
+bool        BlockTimer::sMetricLog       = false;
 
 #if LL_LINUX || LL_SOLARIS
-U64 BlockTimer::sClockResolution = 1000000000; // Nanosecond resolution
+U64         BlockTimer::sClockResolution = 1000000000; // Nanosecond resolution
 #else
-U64 BlockTimer::sClockResolution = 1000000; // Microsecond resolution
+U64         BlockTimer::sClockResolution = 1000000; // Microsecond resolution
 #endif
 
+LLThreadLocalPointer<CurTimerData> BlockTimer::sCurTimerData;
+
+static LLMutex*			sLogLock = NULL;
+static std::queue<LLSD> sLogQueue;
+
+
 // FIXME: move these declarations to the relevant modules
 
 // helper functions
@@ -108,6 +111,7 @@ static timer_tree_dfs_iterator_t end_timer_tree()
 	return timer_tree_dfs_iterator_t(); 
 }
 
+
 BlockTimer& BlockTimer::getRootTimer()
 {
 	static BlockTimer root_timer("root", true, NULL);
@@ -142,7 +146,7 @@ U64 BlockTimer::countsPerSecond() // counts per second for the *32-bit* timer
 
 	// we drop the low-order byte in our timers, so report a lower frequency
 #else
-	// If we're not using RDTSC, each fasttimer tick is just a performance counter tick.
+	// If we're not using RDTSC, each fast timer tick is just a performance counter tick.
 	// Not redefining the clock frequency itself (in llprocessor.cpp/calculate_cpu_frequency())
 	// since that would change displayed MHz stats for CPUs
 	static bool firstcall = true;
@@ -268,17 +272,19 @@ void BlockTimer::buildHierarchy()
 			// when this timer was called
 			if (timer.mParent == &BlockTimer::getRootTimer())
 			{
-				if (timer.getPrimaryAccumulator().mLastCaller)
-			{
-				timer.setParent(timer.getPrimaryAccumulator().mLastCaller);
+				TimerTreeNode& tree_node = sCurTimerData->mTimerTreeData[timer.getIndex()];
+
+				if (tree_node.mLastCaller)
+				{
+					timer.setParent(tree_node.mLastCaller);
 				}
 				// no need to push up tree on first use, flag can be set spuriously
-				timer.getPrimaryAccumulator().mMoveUpTree = false;
+				tree_node.mMoveUpTree = false;
 			}
 		}
 	}
 
-	// bump timers up tree if they've been flagged as being in the wrong place
+	// bump timers up tree if they have been flagged as being in the wrong place
 	// do this in a bottom up order to promote descendants first before promoting ancestors
 	// this preserves partial order derived from current frame's observations
 	for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(BlockTimer::getRootTimer());
@@ -288,15 +294,16 @@ void BlockTimer::buildHierarchy()
 		BlockTimer* timerp = *it;
 		// skip root timer
 		if (timerp == &BlockTimer::getRootTimer()) continue;
+		TimerTreeNode& tree_node = sCurTimerData->mTimerTreeData[timerp->getIndex()];
 
-		if (timerp->getPrimaryAccumulator().mMoveUpTree)
+		if (tree_node.mMoveUpTree)
 		{
 			// since ancestors have already been visited, re-parenting won't affect tree traversal
 			//step up tree, bringing our descendants with us
 			LL_DEBUGS("FastTimers") << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() <<
 				" to child of " << timerp->getParent()->getParent()->getName() << LL_ENDL;
 			timerp->setParent(timerp->getParent()->getParent());
-			timerp->getPrimaryAccumulator().mMoveUpTree = false;
+			tree_node.mMoveUpTree = false;
 
 			// don't bubble up any ancestors until descendants are done bubbling up
 			it.skipAncestors();
@@ -322,11 +329,11 @@ void BlockTimer::accumulateTimings()
 {
 	U64 cur_time = getCPUClockCount64();
 
-	// walk up stack of active timers and accumulate current time while leaving timing structures active
-	Time* cur_timer = sCurTimerData->mCurTimer;
 	// root defined by parent pointing to self
 	CurTimerData* cur_data = sCurTimerData.get();
-	TimerAccumulator& accumulator = sCurTimerData->mTimerData->getPrimaryAccumulator();
+	// walk up stack of active timers and accumulate current time while leaving timing structures active
+	Time* cur_timer = cur_data->mCurTimer;
+	TimerAccumulator& accumulator = cur_data->mTimerData->getPrimaryAccumulator();
 	while(cur_timer && cur_timer->mLastTimerData.mCurTimer != cur_timer)
 	{
 		U64 cumulative_time_delta = cur_time - cur_timer->mStartTime;
@@ -431,11 +438,13 @@ void BlockTimer::resetFrame()
 	{
 		BlockTimer& timer = *it;
 		TimerAccumulator& accumulator = timer.getPrimaryAccumulator();
+		TimerTreeNode& tree_node = sCurTimerData->mTimerTreeData[timer.getIndex()];
+
 		accumulator.mSelfTimeCounter = 0;
 		accumulator.mTotalTimeCounter = 0;
 		accumulator.mCalls = 0;
-		accumulator.mLastCaller = NULL;
-		accumulator.mMoveUpTree = false;
+		tree_node.mLastCaller = NULL;
+		tree_node.mMoveUpTree = false;
 	}
 }
 
@@ -579,32 +588,28 @@ void Time::writeLog(std::ostream& os)
 }
 
 
-LLTrace::TimerAccumulator::TimerAccumulator() :	mSelfTimeCounter(0),
+TimerAccumulator::TimerAccumulator() :	mSelfTimeCounter(0),
 	mTotalTimeCounter(0),
-	mCalls(0),
-	mLastCaller(NULL),
-	mActiveCount(0),
-	mMoveUpTree(false)
+	mCalls(0)
 {}
 
-void LLTrace::TimerAccumulator::addSamples( const LLTrace::TimerAccumulator& other )
+void TimerAccumulator::addSamples( const TimerAccumulator& other )
 {
 	mSelfTimeCounter += other.mSelfTimeCounter;
 	mTotalTimeCounter += other.mTotalTimeCounter;
 	mCalls += other.mCalls;
-	if (!mLastCaller)
-	{
-		mLastCaller = other.mLastCaller;
-	}
-
-	//mActiveCount stays the same;
-	mMoveUpTree |= other.mMoveUpTree;
 }
 
-void LLTrace::TimerAccumulator::reset( const LLTrace::TimerAccumulator* other )
+void TimerAccumulator::reset( const TimerAccumulator* other )
 {
 	mTotalTimeCounter = 0;
 	mSelfTimeCounter = 0;
 	mCalls = 0;
 }
+
+TimerTreeNode::TimerTreeNode()
+:	mLastCaller(NULL),
+	mActiveCount(0),
+	mMoveUpTree(false)
+{}
 }
diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
index cfe2cf5371a..9f981480f28 100644
--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@@ -37,11 +37,13 @@ class LLMutex;
 
 namespace LLTrace
 {
+
 struct CurTimerData
 {
 	class Time*			mCurTimer;
 	class BlockTimer*	mTimerData;
 	U64					mChildTime;
+	TimerTreeNode*		mTimerTreeData;
 };
 
 class Time
@@ -269,20 +271,20 @@ class BlockTimer
 	// tree structure
 	BlockTimer*					mParent;				// BlockTimer of caller(parent)
 	std::vector<BlockTimer*>	mChildren;
-	bool						mCollapsed;				// don't show children
-	bool						mNeedsSorting;			// sort children whenever child added
+	bool						mCollapsed,				// don't show children
+								mNeedsSorting;			// sort children whenever child added
 
 	// statics
 	static std::string							sLogName;
-	static bool									sMetricLog;
-	static bool									sLog;	
+	static bool									sMetricLog,
+												sLog;	
 	static LLThreadLocalPointer<CurTimerData>	sCurTimerData;
 	static U64									sClockResolution;
-	static S32									sCurFrameIndex;
-	static S32									sLastFrameIndex;
+	static S32									sCurFrameIndex,
+												sLastFrameIndex;
 	static U64									sLastFrameTime;
-	static bool 								sPauseHistory;
-	static bool 								sResetHistory;
+	static bool 								sPauseHistory,
+												sResetHistory;
 
 };
 
@@ -291,13 +293,12 @@ LL_FORCE_INLINE Time::Time(BlockTimer& timer)
 #if FAST_TIMER_ON
 	mStartTime = BlockTimer::getCPUClockCount64();
 
-	TimerAccumulator& accumulator = timer.getPrimaryAccumulator();
-	accumulator.mActiveCount++;
-	accumulator.mCalls++;
+	CurTimerData* cur_timer_data = BlockTimer::sCurTimerData.get();
+	TimerTreeNode& tree_node = cur_timer_data->mTimerTreeData[timer.getIndex()];
+	tree_node.mActiveCount++;
 	// keep current parent as long as it is active when we are
-	accumulator.mMoveUpTree |= (timer.mParent->getPrimaryAccumulator().mActiveCount == 0);
+	tree_node.mMoveUpTree |= (cur_timer_data->mTimerTreeData[timer.mParent->getIndex()].mActiveCount == 0);
 
-	CurTimerData* cur_timer_data = BlockTimer::sCurTimerData.get();
 	// store top of stack
 	mLastTimerData = *cur_timer_data;
 	// push new information
@@ -313,13 +314,16 @@ LL_FORCE_INLINE Time::~Time()
 	U64 total_time = BlockTimer::getCPUClockCount64() - mStartTime;
 	CurTimerData* cur_timer_data = BlockTimer::sCurTimerData.get();
 	TimerAccumulator& accumulator = cur_timer_data->mTimerData->getPrimaryAccumulator();
+	TimerTreeNode& tree_node = cur_timer_data->mTimerTreeData[cur_timer_data->mTimerData->getIndex()];
+
+	accumulator.mCalls++;
 	accumulator.mSelfTimeCounter += total_time - cur_timer_data->mChildTime;
 	accumulator.mTotalTimeCounter += total_time;
-	accumulator.mActiveCount--;
+	tree_node.mActiveCount--;
 
 	// store last caller to bootstrap tree creation
 	// do this in the destructor in case of recursion to get topmost caller
-	accumulator.mLastCaller = mLastTimerData.mTimerData;
+	tree_node.mLastCaller = mLastTimerData.mTimerData;
 
 	// we are only tracking self time, so subtract our total time delta from parents
 	mLastTimerData.mChildTime += total_time;
diff --git a/indra/llcommon/lltrace.h b/indra/llcommon/lltrace.h
index 11651ef9531..ad9f170aaee 100644
--- a/indra/llcommon/lltrace.h
+++ b/indra/llcommon/lltrace.h
@@ -189,6 +189,11 @@ namespace LLTrace
 			delete[] old_storage;
 		}
 
+		size_t size()
+		{
+			return mNextStorageSlot;
+		}
+
 		static AccumulatorBuffer<ACCUMULATOR>& getDefaultBuffer()
 		{
 			static AccumulatorBuffer sBuffer(STATIC_ALLOC);
@@ -216,7 +221,7 @@ namespace LLTrace
 			mAccumulatorIndex = AccumulatorBuffer<ACCUMULATOR>::getDefaultBuffer().reserveSlot();
 		}
 
-		LL_FORCE_INLINE ACCUMULATOR& getPrimaryAccumulator()
+		LL_FORCE_INLINE ACCUMULATOR& getPrimaryAccumulator() const
 		{
 			return AccumulatorBuffer<ACCUMULATOR>::getPrimaryStorage()[mAccumulatorIndex];
 		}
@@ -399,6 +404,12 @@ namespace LLTrace
 		U64 						mSelfTimeCounter,
 									mTotalTimeCounter;
 		U32 						mCalls;
+	};
+
+	class TimerTreeNode
+	{
+	public:
+		TimerTreeNode();
 		class BlockTimer*			mLastCaller;	// used to bootstrap tree construction
 		U16							mActiveCount;	// number of timers with this ID active on stack
 		bool						mMoveUpTree;	// needs to be moved up the tree of timers at the end of frame
diff --git a/indra/llcommon/lltracethreadrecorder.cpp b/indra/llcommon/lltracethreadrecorder.cpp
index faaab4c8e71..0a2d79cf3a9 100644
--- a/indra/llcommon/lltracethreadrecorder.cpp
+++ b/indra/llcommon/lltracethreadrecorder.cpp
@@ -41,25 +41,29 @@ ThreadRecorder::ThreadRecorder()
 	get_thread_recorder() = this;
 	mFullRecording.start();
 
-	BlockTimer::sCurTimerData = new CurTimerData();
-}
+	mRootTimerData = new CurTimerData();
+	mRootTimerData->mTimerData = &BlockTimer::getRootTimer();
+	mRootTimerData->mTimerTreeData = new TimerTreeNode[AccumulatorBuffer<TimerAccumulator>::getDefaultBuffer().size()];
+	BlockTimer::sCurTimerData = mRootTimerData;
 
-ThreadRecorder::ThreadRecorder( const ThreadRecorder& other ) 
-:	mFullRecording(other.mFullRecording)
-{
-	get_thread_recorder() = this;
-	mFullRecording.start();
+	mRootTimer = new Time(BlockTimer::getRootTimer());
+	mRootTimerData->mCurTimer = mRootTimer;
+
+	mRootTimerData->mTimerTreeData[BlockTimer::getRootTimer().getIndex()].mActiveCount = 1;
 }
 
 ThreadRecorder::~ThreadRecorder()
 {
+	delete mRootTimer;
+
 	while(mActiveRecordings.size())
 	{
 		mActiveRecordings.front().mTargetRecording->stop();
 	}
 	get_thread_recorder() = NULL;
-	delete BlockTimer::sCurTimerData.get();
 	BlockTimer::sCurTimerData = NULL;
+	delete [] mRootTimerData->mTimerTreeData;
+	delete mRootTimerData;
 }
 
 void ThreadRecorder::activate( Recording* recording )
diff --git a/indra/llcommon/lltracethreadrecorder.h b/indra/llcommon/lltracethreadrecorder.h
index 44fe67384be..277a468a40c 100644
--- a/indra/llcommon/lltracethreadrecorder.h
+++ b/indra/llcommon/lltracethreadrecorder.h
@@ -41,7 +41,6 @@ namespace LLTrace
 		struct ActiveRecording;
 	public:
 		ThreadRecorder();
-		ThreadRecorder(const ThreadRecorder& other);
 
 		virtual ~ThreadRecorder();
 
@@ -63,6 +62,9 @@ namespace LLTrace
 		};
 		Recording					mFullRecording;
 		std::list<ActiveRecording>	mActiveRecordings;
+
+		struct CurTimerData*	mRootTimerData;
+		class Time*				mRootTimer;
 	};
 
 	class LL_COMMON_API MasterThreadRecorder : public ThreadRecorder
-- 
GitLab