Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
llfasttimer.cpp 13.31 KiB
/** 
 * @file llfasttimer.cpp
 * @brief Implementation of the fast timer.
 *
 * $LicenseInfo:firstyear=2004&license=viewerlgpl$
 * Second Life Viewer Source Code
 * Copyright (C) 2010, Linden Research, Inc.
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation;
 * version 2.1 of the License only.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * 
 * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
 * $/LicenseInfo$
 */
#include "linden_common.h"

#include "llfasttimer.h"

#include "llmemory.h"
#include "llprocessor.h"
#include "llsingleton.h"
#include "lltreeiterators.h"
#include "llsdserialize.h"
#include "llunit.h"
#include "llsd.h"
#include "lltracerecording.h"
#include "lltracethreadrecorder.h"

#include <boost/bind.hpp>
#include <queue>


#if LL_WINDOWS
#include "lltimer.h"
#elif LL_LINUX || LL_SOLARIS
#include <sys/time.h>
#include <sched.h>
#include "lltimer.h"
#elif LL_DARWIN
#include <sys/time.h>
#include "lltimer.h"	// get_clock_count()
#else 
#error "architecture not supported"
#endif

namespace LLTrace
{

//////////////////////////////////////////////////////////////////////////////
// statics

bool        TimeBlock::sLog		     = false;
std::string TimeBlock::sLogName         = "";
bool        TimeBlock::sMetricLog       = false;

#if LL_LINUX || LL_SOLARIS
U64         TimeBlock::sClockResolution = 1000000000; // Nanosecond resolution
#else
U64         TimeBlock::sClockResolution = 1000000; // Microsecond resolution
#endif

static LLMutex*			sLogLock = NULL;
static std::queue<LLSD> sLogQueue;


// FIXME: move these declarations to the relevant modules

// helper functions
typedef LLTreeDFSPostIter<TimeBlock, TimeBlock::child_const_iter> timer_tree_bottom_up_iterator_t;

static timer_tree_bottom_up_iterator_t begin_timer_tree_bottom_up(TimeBlock& id) 
{ 
	return timer_tree_bottom_up_iterator_t(&id, 
							boost::bind(boost::mem_fn(&TimeBlock::beginChildren), _1), 
							boost::bind(boost::mem_fn(&TimeBlock::endChildren), _1));
}

static timer_tree_bottom_up_iterator_t end_timer_tree_bottom_up() 
{ 
	return timer_tree_bottom_up_iterator_t(); 
}

typedef LLTreeDFSIter<TimeBlock, TimeBlock::child_const_iter> timer_tree_dfs_iterator_t;


static timer_tree_dfs_iterator_t begin_timer_tree(TimeBlock& id) 
{ 
	return timer_tree_dfs_iterator_t(&id, 
		boost::bind(boost::mem_fn(&TimeBlock::beginChildren), _1), 
							boost::bind(boost::mem_fn(&TimeBlock::endChildren), _1));
}

static timer_tree_dfs_iterator_t end_timer_tree() 
{ 
	return timer_tree_dfs_iterator_t(); 
}


// sort child timers by name
struct SortTimerByName
{
	bool operator()(const TimeBlock* i1, const TimeBlock* i2)
	{
		return i1->getName() < i2->getName();
	}
};

TimeBlock& TimeBlock::getRootTimeBlock()
{
	static TimeBlock root_timer("root", true, NULL);
	return root_timer;
}

void TimeBlock::pushLog(LLSD log)
{
	LLMutexLock lock(sLogLock);

	sLogQueue.push(log);
}

void TimeBlock::setLogLock(LLMutex* lock)
{
	sLogLock = lock;
}


//static
#if (LL_DARWIN || LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
U64 TimeBlock::countsPerSecond()
{
	return sClockResolution;
}
#else // windows or x86-mac or x86-linux or x86-solaris
U64 TimeBlock::countsPerSecond()
{
#if LL_FASTTIMER_USE_RDTSC || !LL_WINDOWS
	//getCPUFrequency returns MHz and sCPUClockFrequency wants to be in Hz
	static LLUnit<LLUnits::Hertz, U64> sCPUClockFrequency = LLProcessorInfo().getCPUFrequency();

#else
	// If we're not using RDTSC, each fast timer tick is just a performance counter tick.
	// Not redefining the clock frequency itself (in llprocessor.cpp/calculate_cpu_frequency())
	// since that would change displayed MHz stats for CPUs
	static bool firstcall = true;
	static U64 sCPUClockFrequency;
	if (firstcall)
	{
		QueryPerformanceFrequency((LARGE_INTEGER*)&sCPUClockFrequency);
		firstcall = false;
	}
#endif
	return sCPUClockFrequency.value();
}
#endif

TimeBlock::TimeBlock(const char* name, bool open, TimeBlock* parent)
:	TraceType<TimeBlockAccumulator>(name),
	mCollapsed(true)
{
	setCollapsed(!open);
}

TimeBlockTreeNode& TimeBlock::getTreeNode() const
{
	TimeBlockTreeNode* nodep = LLTrace::get_thread_recorder()->getTimeBlockTreeNode(getIndex());
	llassert(nodep);
	return *nodep;
}

// static
void TimeBlock::processTimes()
{
	get_clock_count(); // good place to calculate clock frequency
	U64 cur_time = getCPUClockCount64();

	// set up initial tree
	for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), end_it = LLInstanceTracker<TimeBlock>::endInstances(); 
		it != end_it; 
		++it)
	{
		TimeBlock& timer = *it;
		if (&timer == &TimeBlock::getRootTimeBlock()) continue;

		// bootstrap tree construction by attaching to last timer to be on stack
		// when this timer was called
		if (timer.getParent() == &TimeBlock::getRootTimeBlock())
		{
			TimeBlockAccumulator* accumulator = timer.getPrimaryAccumulator();

			if (accumulator->mLastCaller)
			{
				timer.setParent(accumulator->mLastCaller);
				accumulator->mParent = accumulator->mLastCaller;
			}
			// no need to push up tree on first use, flag can be set spuriously
			accumulator->mMoveUpTree = false;
		}
	}
	// bump timers up tree if they have been flagged as being in the wrong place
	// do this in a bottom up order to promote descendants first before promoting ancestors
	// this preserves partial order derived from current frame's observations
	for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(TimeBlock::getRootTimeBlock());
		it != end_timer_tree_bottom_up();
		++it)
	{
		TimeBlock* timerp = *it;

		// sort timers by time last called, so call graph makes sense
		TimeBlockTreeNode& tree_node = timerp->getTreeNode();
		if (tree_node.mNeedsSorting)
		{
			std::sort(tree_node.mChildren.begin(), tree_node.mChildren.end(), SortTimerByName());
		}

		// skip root timer
		if (timerp != &TimeBlock::getRootTimeBlock())
		{
			TimeBlockAccumulator* accumulator = timerp->getPrimaryAccumulator();

			if (accumulator->mMoveUpTree)
			{
				// since ancestors have already been visited, re-parenting won't affect tree traversal
				//step up tree, bringing our descendants with us
				LL_DEBUGS("FastTimers") << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() <<
					" to child of " << timerp->getParent()->getParent()->getName() << LL_ENDL;
				timerp->setParent(timerp->getParent()->getParent());
				accumulator->mParent = timerp->getParent();
				accumulator->mMoveUpTree = false;

				// don't bubble up any ancestors until descendants are done bubbling up
				// as ancestors may call this timer only on certain paths, so we want to resolve
				// child-most block locations before their parents
				it.skipAncestors();
			}
		}
	}

	// walk up stack of active timers and accumulate current time while leaving timing structures active
	BlockTimerStackRecord* stack_record			= ThreadTimerStack::getInstance();
	BlockTimer* cur_timer						= stack_record->mActiveTimer;
	TimeBlockAccumulator* accumulator = stack_record->mTimeBlock->getPrimaryAccumulator();
	
	// root defined by parent pointing to self
	while(cur_timer && cur_timer->mParentTimerData.mActiveTimer != cur_timer)
	{
		U64 cumulative_time_delta = cur_time - cur_timer->mStartTime;
		accumulator->mTotalTimeCounter += cumulative_time_delta - (accumulator->mTotalTimeCounter - cur_timer->mBlockStartTotalTimeCounter);
		accumulator->mSelfTimeCounter += cumulative_time_delta - stack_record->mChildTime;
		stack_record->mChildTime = 0;

		cur_timer->mStartTime = cur_time;
		cur_timer->mBlockStartTotalTimeCounter = accumulator->mTotalTimeCounter;

		stack_record = &cur_timer->mParentTimerData;
		accumulator = stack_record->mTimeBlock->getPrimaryAccumulator();
		cur_timer = stack_record->mActiveTimer;

		stack_record->mChildTime += cumulative_time_delta;
	}

	// reset for next frame
	for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(),
			end_it = LLInstanceTracker<TimeBlock>::endInstances();
		it != end_it;
		++it)
	{
		TimeBlock& timer = *it;
		TimeBlockAccumulator* accumulator = timer.getPrimaryAccumulator();

		accumulator->mLastCaller = NULL;
		accumulator->mMoveUpTree = false;
	}
}


std::vector<TimeBlock*>::iterator TimeBlock::beginChildren()
{ 
	return getTreeNode().mChildren.begin(); 
}

std::vector<TimeBlock*>::iterator TimeBlock::endChildren()
{
	return getTreeNode().mChildren.end();
}

std::vector<TimeBlock*>& TimeBlock::getChildren()
{
	return getTreeNode().mChildren;
}

//static
void TimeBlock::logStats()
{
	// get ready for next frame
	if (sLog)
	{ //output current frame counts to performance log

		static S32 call_count = 0;
		if (call_count % 100 == 0)
		{
			LL_DEBUGS("FastTimers") << "countsPerSecond: " << countsPerSecond() << LL_ENDL;
			LL_DEBUGS("FastTimers") << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << LL_ENDL;
			LL_DEBUGS("FastTimers") << "getCPUClockCount32() " << getCPUClockCount32() << LL_ENDL;
			LL_DEBUGS("FastTimers") << "getCPUClockCount64() " << getCPUClockCount64() << LL_ENDL;
			LL_DEBUGS("FastTimers") << "elapsed sec " << ((F64)getCPUClockCount64()) / (LLUnit<LLUnits::Hertz, F64>(LLProcessorInfo().getCPUFrequency())) << LL_ENDL;
		}
		call_count++;

		LLUnit<LLUnits::Seconds, F64> total_time(0);
		LLSD sd;

		{
			for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), 
				end_it = LLInstanceTracker<TimeBlock>::endInstances(); 
				it != end_it; 
			++it)
			{
				TimeBlock& timer = *it;
				LLTrace::PeriodicRecording& frame_recording = LLTrace::get_frame_recording();
				sd[timer.getName()]["Time"] = (LLSD::Real) (frame_recording.getLastRecordingPeriod().getSum(timer).value());	
				sd[timer.getName()]["Calls"] = (LLSD::Integer) (frame_recording.getLastRecordingPeriod().getSum(timer.callCount()));

				// computing total time here because getting the root timer's getCountHistory
				// doesn't work correctly on the first frame
				total_time += frame_recording.getLastRecordingPeriod().getSum(timer);
			}
		}

		sd["Total"]["Time"] = (LLSD::Real) total_time.value();
		sd["Total"]["Calls"] = (LLSD::Integer) 1;

		{		
			LLMutexLock lock(sLogLock);
			sLogQueue.push(sd);
		}
	}

}

//static
void TimeBlock::dumpCurTimes()
{
	LLTrace::PeriodicRecording& frame_recording = LLTrace::get_frame_recording();
	LLTrace::Recording& last_frame_recording = frame_recording.getLastRecordingPeriod();

	// walk over timers in depth order and output timings
	for(timer_tree_dfs_iterator_t it = begin_timer_tree(TimeBlock::getRootTimeBlock());
		it != end_timer_tree();
		++it)
	{
		TimeBlock* timerp = (*it);
		LLUnit<LLUnits::Seconds, F64> total_time_ms = last_frame_recording.getSum(*timerp);
		U32 num_calls = last_frame_recording.getSum(timerp->callCount());

		// Don't bother with really brief times, keep output concise
		if (total_time_ms < 0.1) continue;

		std::ostringstream out_str;
		TimeBlock* parent_timerp = timerp;
		while(parent_timerp && parent_timerp != parent_timerp->getParent())
		{
			out_str << "\t";
			parent_timerp = parent_timerp->getParent();
		}

		out_str << timerp->getName() << " " 
			<< std::setprecision(3) << total_time_ms.as<LLUnits::Milliseconds>().value() << " ms, "
			<< num_calls << " calls";

		llinfos << out_str.str() << llendl;
	}
}

//static
void TimeBlock::writeLog(std::ostream& os)
{
	while (!sLogQueue.empty())
	{
		LLSD& sd = sLogQueue.front();
		LLSDSerialize::toXML(sd, os);
		LLMutexLock lock(sLogLock);
		sLogQueue.pop();
	}
}

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// TimeBlockAccumulator
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////

TimeBlockAccumulator::TimeBlockAccumulator() 
:	mTotalTimeCounter(0),
	mSelfTimeCounter(0),
	mStartTotalTimeCounter(0),
	mCalls(0),
	mLastCaller(NULL),
	mActiveCount(0),
	mMoveUpTree(false),
	mParent(NULL)
{}

void TimeBlockAccumulator::addSamples( const TimeBlockAccumulator& other )
{
	mTotalTimeCounter += other.mTotalTimeCounter - other.mStartTotalTimeCounter;
	mSelfTimeCounter += other.mSelfTimeCounter;
	mCalls += other.mCalls;
	mLastCaller = other.mLastCaller;
	mActiveCount = other.mActiveCount;
	mMoveUpTree = other.mMoveUpTree;
	mParent = other.mParent;
}

void TimeBlockAccumulator::reset( const TimeBlockAccumulator* other )
{
	mCalls = 0;
	mSelfTimeCounter = 0;

	if (other)
	{
		mStartTotalTimeCounter = other->mTotalTimeCounter;
		mTotalTimeCounter = mStartTotalTimeCounter;

		mLastCaller = other->mLastCaller;
		mActiveCount = other->mActiveCount;
		mMoveUpTree = other->mMoveUpTree;
		mParent = other->mParent;
	}
	else
	{
		mStartTotalTimeCounter = mTotalTimeCounter;
	}
}

LLUnit<LLUnits::Seconds, F64> BlockTimer::getElapsedTime()
{
	U64 total_time = TimeBlock::getCPUClockCount64() - mStartTime;

	return (F64)total_time / (F64)TimeBlock::countsPerSecond();
}


} // namespace LLTrace