Skip to content
Snippets Groups Projects
Commit d136c4c2 authored by Richard Linden's avatar Richard Linden
Browse files

SH-4246 FIX interesting: fast timers significantly decreases framerate

removed implicit flushes on reads from recorders for better performance
made sure stack timers were updated on recorder deactivate
faster rendering and better ui for fast timer view
parent 1cb0c974
No related branches found
No related tags found
No related merge requests found
......@@ -178,43 +178,38 @@ TimeBlockTreeNode& TimeBlock::getTreeNode() const
return *nodep;
}
static LLFastTimer::DeclareTimer FTM_PROCESS_TIMES("Process FastTimer Times");
// not thread safe, so only call on main thread
//static
void TimeBlock::processTimes()
void TimeBlock::bootstrapTimerTree()
{
LLFastTimer _(FTM_PROCESS_TIMES);
get_clock_count(); // good place to calculate clock frequency
U64 cur_time = getCPUClockCount64();
// set up initial tree
for (LLInstanceTracker<TimeBlock>::instance_iter begin_it = LLInstanceTracker<TimeBlock>::beginInstances(), end_it = LLInstanceTracker<TimeBlock>::endInstances(), it = begin_it;
it != end_it;
++it)
{
TimeBlock& timer = *it;
if (&timer == &TimeBlock::getRootTimeBlock()) continue;
// bootstrap tree construction by attaching to last timer to be on stack
// when this timer was called
if (timer.getParent() == &TimeBlock::getRootTimeBlock())
{
TimeBlockAccumulator* accumulator = timer.getPrimaryAccumulator();
if (accumulator->mLastCaller)
{
timer.setParent(accumulator->mLastCaller);
accumulator->mParent = accumulator->mLastCaller;
}
// no need to push up tree on first use, flag can be set spuriously
// no need to push up tree on first use, flag can be set spuriously
accumulator->mMoveUpTree = false;
}
}
}
// bump timers up tree if they have been flagged as being in the wrong place
// do this in a bottom up order to promote descendants first before promoting ancestors
// this preserves partial order derived from current frame's observations
// bump timers up tree if they have been flagged as being in the wrong place
// do this in a bottom up order to promote descendants first before promoting ancestors
// this preserves partial order derived from current frame's observations
void TimeBlock::incrementalUpdateTimerTree()
{
for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(TimeBlock::getRootTimeBlock());
it != end_timer_tree_bottom_up();
++it)
......@@ -240,27 +235,35 @@ void TimeBlock::processTimes()
LL_DEBUGS("FastTimers") << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() <<
" to child of " << timerp->getParent()->getParent()->getName() << LL_ENDL;
timerp->setParent(timerp->getParent()->getParent());
accumulator->mParent = timerp->getParent();
accumulator->mMoveUpTree = false;
accumulator->mParent = timerp->getParent();
accumulator->mMoveUpTree = false;
// don't bubble up any ancestors until descendants are done bubbling up
// as ancestors may call this timer only on certain paths, so we want to resolve
// child-most block locations before their parents
// as ancestors may call this timer only on certain paths, so we want to resolve
// child-most block locations before their parents
it.skipAncestors();
}
}
}
}
void TimeBlock::updateTimes()
{
U64 cur_time = getCPUClockCount64();
// walk up stack of active timers and accumulate current time while leaving timing structures active
BlockTimerStackRecord* stack_record = ThreadTimerStack::getInstance();
BlockTimer* cur_timer = stack_record->mActiveTimer;
TimeBlockAccumulator* accumulator = stack_record->mTimeBlock->getPrimaryAccumulator();
BlockTimerStackRecord* stack_record = ThreadTimerStack::getInstance();
BlockTimer* cur_timer = stack_record->mActiveTimer;
TimeBlockAccumulator* accumulator = stack_record->mTimeBlock->getPrimaryAccumulator();
while(cur_timer
&& cur_timer->mParentTimerData.mActiveTimer != cur_timer) // root defined by parent pointing to self
{
U64 cumulative_time_delta = cur_time - cur_timer->mStartTime;
accumulator->mTotalTimeCounter += cumulative_time_delta - (accumulator->mTotalTimeCounter - cur_timer->mBlockStartTotalTimeCounter);
accumulator->mTotalTimeCounter += cumulative_time_delta
- (accumulator->mTotalTimeCounter
- cur_timer->mBlockStartTotalTimeCounter);
accumulator->mSelfTimeCounter += cumulative_time_delta - stack_record->mChildTime;
stack_record->mChildTime = 0;
......@@ -268,11 +271,28 @@ void TimeBlock::processTimes()
cur_timer->mBlockStartTotalTimeCounter = accumulator->mTotalTimeCounter;
stack_record = &cur_timer->mParentTimerData;
accumulator = stack_record->mTimeBlock->getPrimaryAccumulator();
cur_timer = stack_record->mActiveTimer;
accumulator = stack_record->mTimeBlock->getPrimaryAccumulator();
cur_timer = stack_record->mActiveTimer;
stack_record->mChildTime += cumulative_time_delta;
}
}
static LLFastTimer::DeclareTimer FTM_PROCESS_TIMES("Process FastTimer Times");
// not thread safe, so only call on main thread
//static
void TimeBlock::processTimes()
{
LLFastTimer _(FTM_PROCESS_TIMES);
get_clock_count(); // good place to calculate clock frequency
// set up initial tree
bootstrapTimerTree();
incrementalUpdateTimerTree();
updateTimes();
// reset for next frame
for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(),
......@@ -288,14 +308,13 @@ void TimeBlock::processTimes()
}
}
std::vector<TimeBlock*>::iterator TimeBlock::beginChildren()
{
{
return getTreeNode().mChildren.begin();
}
}
std::vector<TimeBlock*>::iterator TimeBlock::endChildren()
{
{
return getTreeNode().mChildren.end();
}
......
......@@ -115,6 +115,7 @@ class TimeBlock
static void pushLog(LLSD sd);
static void setLogLock(LLMutex* mutex);
static void writeLog(std::ostream& os);
static void updateTimes();
// dumps current cumulative frame stats to log
// call nextFrame() to reset timers
......@@ -262,6 +263,9 @@ class TimeBlock
// can be called multiple times in a frame, at any point
static void processTimes();
static void bootstrapTimerTree();
static void incrementalUpdateTimerTree();
// call this once a frame to periodically log timers
static void logStats();
......
......@@ -157,12 +157,12 @@ class AccumulatorBuffer : public LLRefCount
}
}
void flush()
void flush(LLUnitImplicit<F64, LLUnits::Seconds> time_stamp)
{
llassert(mStorageSize >= sNextStorageSlot);
for (size_t i = 0; i < sNextStorageSlot; i++)
{
mStorage[i].flush();
mStorage[i].flush(time_stamp);
}
}
......@@ -380,7 +380,7 @@ class EventAccumulator
mLastValue = other ? other->mLastValue : 0;
}
void flush() {}
void flush(LLUnitImplicit<F64, LLUnits::Seconds>) {}
F64 getSum() const { return mSum; }
F64 getMin() const { return mMin; }
......@@ -512,9 +512,8 @@ class SampleAccumulator
mHasValue = other ? other->mHasValue : false;
}
void flush()
void flush(LLUnitImplicit<F64, LLUnits::Seconds> time_stamp)
{
LLUnitImplicit<F64, LLUnits::Seconds> time_stamp = LLTimer::getTotalSeconds();
LLUnitImplicit<F64, LLUnits::Seconds> delta_time = time_stamp - mLastSampleTimeStamp;
if (mHasValue)
......@@ -579,7 +578,7 @@ class CountAccumulator
mSum = 0;
}
void flush() {}
void flush(LLUnitImplicit<F64, LLUnits::Seconds>) {}
F64 getSum() const { return mSum; }
......@@ -614,7 +613,7 @@ class TimeBlockAccumulator
TimeBlockAccumulator();
void addSamples(const self_t& other, bool /*append*/);
void reset(const self_t* other);
void flush() {}
void flush(LLUnitImplicit<F64, LLUnits::Seconds>) {}
//
// members
......@@ -780,10 +779,10 @@ struct MemStatAccumulator
mDeallocatedCount = 0;
}
void flush()
void flush(LLUnitImplicit<F64, LLUnits::Seconds> time_stamp)
{
mSize.flush();
mChildSize.flush();
mSize.flush(time_stamp);
mChildSize.flush(time_stamp);
}
SampleAccumulator mSize,
......
......@@ -107,7 +107,9 @@ void RecordingBuffers::reset(RecordingBuffers* other)
void RecordingBuffers::flush()
{
mSamples.flush();
LLUnitImplicit<F64, LLUnits::Seconds> time_stamp = LLTimer::getTotalSeconds();
mSamples.flush(time_stamp);
}
///////////////////////////////////////////////////////////////////////
......@@ -205,7 +207,6 @@ void Recording::mergeRecording( const Recording& other)
LLUnit<F64, LLUnits::Seconds> Recording::getSum(const TraceType<TimeBlockAccumulator>& stat)
{
const TimeBlockAccumulator& accumulator = mBuffers->mStackTimers[stat.getIndex()];
update();
return (F64)(accumulator.mTotalTimeCounter - accumulator.mStartTotalTimeCounter)
/ (F64)LLTrace::TimeBlock::countsPerSecond();
}
......@@ -213,14 +214,12 @@ LLUnit<F64, LLUnits::Seconds> Recording::getSum(const TraceType<TimeBlockAccumul
LLUnit<F64, LLUnits::Seconds> Recording::getSum(const TraceType<TimeBlockAccumulator::SelfTimeFacet>& stat)
{
const TimeBlockAccumulator& accumulator = mBuffers->mStackTimers[stat.getIndex()];
update();
return (F64)(accumulator.mSelfTimeCounter) / (F64)LLTrace::TimeBlock::countsPerSecond();
}
U32 Recording::getSum(const TraceType<TimeBlockAccumulator::CallCountFacet>& stat)
{
update();
return mBuffers->mStackTimers[stat.getIndex()].mCalls;
}
......@@ -228,7 +227,6 @@ LLUnit<F64, LLUnits::Seconds> Recording::getPerSec(const TraceType<TimeBlockAccu
{
const TimeBlockAccumulator& accumulator = mBuffers->mStackTimers[stat.getIndex()];
update();
return (F64)(accumulator.mTotalTimeCounter - accumulator.mStartTotalTimeCounter)
/ ((F64)LLTrace::TimeBlock::countsPerSecond() * mElapsedSeconds.value());
}
......@@ -237,105 +235,88 @@ LLUnit<F64, LLUnits::Seconds> Recording::getPerSec(const TraceType<TimeBlockAccu
{
const TimeBlockAccumulator& accumulator = mBuffers->mStackTimers[stat.getIndex()];
update();
return (F64)(accumulator.mSelfTimeCounter)
/ ((F64)LLTrace::TimeBlock::countsPerSecond() * mElapsedSeconds.value());
}
F32 Recording::getPerSec(const TraceType<TimeBlockAccumulator::CallCountFacet>& stat)
{
update();
return (F32)mBuffers->mStackTimers[stat.getIndex()].mCalls / mElapsedSeconds.value();
}
LLUnit<F64, LLUnits::Bytes> Recording::getMin(const TraceType<MemStatAccumulator>& stat)
{
update();
return mBuffers->mMemStats[stat.getIndex()].mSize.getMin();
}
LLUnit<F64, LLUnits::Bytes> Recording::getMean(const TraceType<MemStatAccumulator>& stat)
{
update();
return mBuffers->mMemStats[stat.getIndex()].mSize.getMean();
}
LLUnit<F64, LLUnits::Bytes> Recording::getMax(const TraceType<MemStatAccumulator>& stat)
{
update();
return mBuffers->mMemStats[stat.getIndex()].mSize.getMax();
}
LLUnit<F64, LLUnits::Bytes> Recording::getStandardDeviation(const TraceType<MemStatAccumulator>& stat)
{
update();
return mBuffers->mMemStats[stat.getIndex()].mSize.getStandardDeviation();
}
LLUnit<F64, LLUnits::Bytes> Recording::getLastValue(const TraceType<MemStatAccumulator>& stat)
{
update();
return mBuffers->mMemStats[stat.getIndex()].mSize.getLastValue();
}
LLUnit<F64, LLUnits::Bytes> Recording::getMin(const TraceType<MemStatAccumulator::ChildMemFacet>& stat)
{
update();
return mBuffers->mMemStats[stat.getIndex()].mChildSize.getMin();
}
LLUnit<F64, LLUnits::Bytes> Recording::getMean(const TraceType<MemStatAccumulator::ChildMemFacet>& stat)
{
update();
return mBuffers->mMemStats[stat.getIndex()].mChildSize.getMean();
}
LLUnit<F64, LLUnits::Bytes> Recording::getMax(const TraceType<MemStatAccumulator::ChildMemFacet>& stat)
{
update();
return mBuffers->mMemStats[stat.getIndex()].mChildSize.getMax();
}
LLUnit<F64, LLUnits::Bytes> Recording::getStandardDeviation(const TraceType<MemStatAccumulator::ChildMemFacet>& stat)
{
update();
return mBuffers->mMemStats[stat.getIndex()].mChildSize.getStandardDeviation();
}
LLUnit<F64, LLUnits::Bytes> Recording::getLastValue(const TraceType<MemStatAccumulator::ChildMemFacet>& stat)
{
update();
return mBuffers->mMemStats[stat.getIndex()].mChildSize.getLastValue();
}
U32 Recording::getSum(const TraceType<MemStatAccumulator::AllocationCountFacet>& stat)
{
update();
return mBuffers->mMemStats[stat.getIndex()].mAllocatedCount;
}
U32 Recording::getSum(const TraceType<MemStatAccumulator::DeallocationCountFacet>& stat)
{
update();
return mBuffers->mMemStats[stat.getIndex()].mAllocatedCount;
}
F64 Recording::getSum( const TraceType<CountAccumulator>& stat )
{
update();
return mBuffers->mCounts[stat.getIndex()].getSum();
}
F64 Recording::getSum( const TraceType<EventAccumulator>& stat )
{
update();
return (F64)mBuffers->mEvents[stat.getIndex()].getSum();
}
F64 Recording::getPerSec( const TraceType<CountAccumulator>& stat )
{
update();
F64 sum = mBuffers->mCounts[stat.getIndex()].getSum();
return (sum != 0.0)
? (sum / mElapsedSeconds.value())
......@@ -344,79 +325,66 @@ F64 Recording::getPerSec( const TraceType<CountAccumulator>& stat )
U32 Recording::getSampleCount( const TraceType<CountAccumulator>& stat )
{
update();
return mBuffers->mCounts[stat.getIndex()].getSampleCount();
}
F64 Recording::getMin( const TraceType<SampleAccumulator>& stat )
{
update();
return mBuffers->mSamples[stat.getIndex()].getMin();
}
F64 Recording::getMax( const TraceType<SampleAccumulator>& stat )
{
update();
return mBuffers->mSamples[stat.getIndex()].getMax();
}
F64 Recording::getMean( const TraceType<SampleAccumulator>& stat )
{
update();
return mBuffers->mSamples[stat.getIndex()].getMean();
}
F64 Recording::getStandardDeviation( const TraceType<SampleAccumulator>& stat )
{
update();
return mBuffers->mSamples[stat.getIndex()].getStandardDeviation();
}
F64 Recording::getLastValue( const TraceType<SampleAccumulator>& stat )
{
update();
return mBuffers->mSamples[stat.getIndex()].getLastValue();
}
U32 Recording::getSampleCount( const TraceType<SampleAccumulator>& stat )
{
update();
return mBuffers->mSamples[stat.getIndex()].getSampleCount();
}
F64 Recording::getMin( const TraceType<EventAccumulator>& stat )
{
update();
return mBuffers->mEvents[stat.getIndex()].getMin();
}
F64 Recording::getMax( const TraceType<EventAccumulator>& stat )
{
update();
return mBuffers->mEvents[stat.getIndex()].getMax();
}
F64 Recording::getMean( const TraceType<EventAccumulator>& stat )
{
update();
return mBuffers->mEvents[stat.getIndex()].getMean();
}
F64 Recording::getStandardDeviation( const TraceType<EventAccumulator>& stat )
{
update();
return mBuffers->mEvents[stat.getIndex()].getStandardDeviation();
}
F64 Recording::getLastValue( const TraceType<EventAccumulator>& stat )
{
update();
return mBuffers->mEvents[stat.getIndex()].getLastValue();
}
U32 Recording::getSampleCount( const TraceType<EventAccumulator>& stat )
{
update();
return mBuffers->mEvents[stat.getIndex()].getSampleCount();
}
......
......@@ -111,6 +111,7 @@ ThreadRecorder::active_recording_list_t::reverse_iterator ThreadRecorder::bringU
if (mActiveRecordings.empty()) return mActiveRecordings.rend();
mActiveRecordings.back()->mPartialRecording.flush();
TimeBlock::updateTimes();
active_recording_list_t::reverse_iterator it, end_it;
for (it = mActiveRecordings.rbegin(), end_it = mActiveRecordings.rend();
......
......@@ -56,6 +56,9 @@ namespace tut
LLUnit<F32, Quatloos> float_quatloos;
ensure(float_quatloos == 0.f);
LLUnit<F32, Quatloos> float_initialize_quatloos(1);
ensure(float_initialize_quatloos == 1.f);
LLUnit<S32, Quatloos> int_quatloos;
ensure(int_quatloos == 0);
......
This diff is collapsed.
......@@ -68,8 +68,8 @@ class LLFastTimerView : public LLFloater
virtual void onClickCloseBtn();
void drawTicks();
void drawLineGraph();
void drawLegend(S32 y);
S32 drawHelp(S32 y);
void drawLegend();
void drawHelp(S32 y);
void drawBorders( S32 y, const S32 x_start, S32 barh, S32 dy);
void drawBars();
......@@ -82,7 +82,6 @@ class LLFastTimerView : public LLFloater
TimerBar()
: mTotalTime(0),
mSelfTime(0),
mVisible(true),
mStartFraction(0.f),
mEndFraction(1.f),
mFirstChild(false),
......@@ -104,29 +103,26 @@ class LLFastTimerView : public LLFloater
struct TimerBarRow
{
S32 mBottom,
mTop;
std::vector<TimerBar> mBars;
S32 mBottom,
mTop;
TimerBar* mBars;
};
LLUnit<F32, LLUnits::Seconds> updateTimerBarWidths(LLTrace::TimeBlock* time_block, TimerBarRow& row, S32 history_index, bool visible = true);
LLUnit<F32, LLUnits::Seconds> updateTimerBarWidths(LLTrace::TimeBlock* time_block, TimerBarRow& row, S32 history_index, U32& bar_index);
S32 updateTimerBarOffsets(LLTrace::TimeBlock* time_block, TimerBarRow& row, S32 timer_bar_index = 0);
S32 drawBar(LLRect bar_rect, TimerBarRow& row, S32 image_width, S32 image_height, bool hovered = false, S32 bar_index = 0);
S32 drawBar(LLRect bar_rect, TimerBarRow& row, S32 image_width, S32 image_height, bool hovered = false, bool visible = true, S32 bar_index = 0);
void setPauseState(bool pause_state);
std::deque<TimerBarRow> mTimerBarRows;
TimerBarRow mAverageTimerRow;
enum ChildAlignment
enum EDisplayType
{
ALIGN_LEFT,
ALIGN_CENTER,
ALIGN_RIGHT,
ALIGN_COUNT
} mDisplayCenter;
bool mDisplayCalls,
mDisplayHz,
mPauseHistory;
TIME,
CALLS,
HZ
} mDisplayType;
bool mPauseHistory;
LLUnit<F64, LLUnits::Seconds> mAllTimeMax,
mTotalTimeDisplay;
S32 mScrollIndex,
......@@ -137,7 +133,8 @@ class LLFastTimerView : public LLFloater
LLTrace::TimeBlock* mHoverTimer;
LLRect mToolTipRect,
mGraphRect,
mBarRect;
mBarRect,
mLegendRect;
LLFrameTimer mHighlightTimer;
LLTrace::PeriodicRecording mRecording;
};
......
......@@ -16,6 +16,27 @@
width="700">
<string name="pause" >Pause</string>
<string name="run">Run</string>
<combo_box name="time_scale_combo"
follows="left|top"
left="10"
top="5"
width="150"
height="20">
<item label="2x Average"/>
<item label="Max"/>
<item label="Recent Max"/>
<item label="100ms"/>
</combo_box>
<combo_box name="metric_combo"
follows="left|top"
left_pad="10"
top="5"
width="150"
height="20">
<item label="Time"/>
<item label="Number of Calls"/>
<item label="Hz"/>
</combo_box>
<button follows="top|right"
name="pause_btn"
left="-200"
......@@ -24,4 +45,52 @@
height="40"
label="Pause"
font="SansSerifHuge"/>
<layout_stack name="legend_stack"
orientation="horizontal"
left="0"
top="50"
right="695"
bottom="500"
follows="all">
<layout_panel name="legend_panel"
auto_resize="false"
user_resize="true"
width="220"
height="450"
min_width="100">
<panel top="0"
left="0"
width="220"
height="440"
name="legend"
follows="all"/>
</layout_panel>
<layout_panel name="timers_panel"
auto_resize="true"
user_resize="true"
height="450"
width="475"
min_width="100">
<layout_stack name="timer_bars_stack"
orientation="vertical"
left="0"
top="0"
width="475"
height="445"
follows="all">
<layout_panel name="bars_panel"
auto_resize="true"
user_resize="true"
top="0"
width="475"
height="210"/>
<layout_panel name="lines_panel"
auto_resize="false"
user_resize="true"
width="475"
min_height="50"
height="240"/>
</layout_stack>
</layout_panel>
</layout_stack>
</floater>
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment