diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
index 642c99ccce1f9702e67b0771d7a1bc6e1f92d9cf..ab8612a8ade8b0e963fa1f7d4c47ca6d59eaf8ca 100755
--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@@ -38,13 +38,6 @@ class LLMutex;
 namespace LLTrace
 {
 
-struct BlockTimerStackRecord
-{
-	class BlockTimer*	mActiveTimer;
-	class TimeBlock*	mTimeBlock;
-	U64					mChildTime;
-};
-
 class ThreadTimerStack 
 :	public BlockTimerStackRecord, 
 	public LLThreadLocalSingleton<ThreadTimerStack>
diff --git a/indra/llcommon/lltraceaccumulators.cpp b/indra/llcommon/lltraceaccumulators.cpp
index 59486964182fba6dd917b662ba177441d9602554..950c1d97d11addc90e38c45390ef0010ef1cf892 100644
--- a/indra/llcommon/lltraceaccumulators.cpp
+++ b/indra/llcommon/lltraceaccumulators.cpp
@@ -69,6 +69,16 @@ void AccumulatorBufferGroup::makePrimary()
 	}
 }
 
+//static
+void AccumulatorBufferGroup::clearPrimary()
+{
+	AccumulatorBuffer<CountAccumulator>::clearPrimary();	
+	AccumulatorBuffer<SampleAccumulator>::clearPrimary();
+	AccumulatorBuffer<EventAccumulator>::clearPrimary();
+	AccumulatorBuffer<TimeBlockAccumulator>::clearPrimary();
+	AccumulatorBuffer<MemStatAccumulator>::clearPrimary();
+}
+
 bool AccumulatorBufferGroup::isPrimary() const
 {
 	return mCounts.isPrimary();
@@ -102,11 +112,12 @@ void AccumulatorBufferGroup::reset(AccumulatorBufferGroup* other)
 	mMemStats.reset(other ? &other->mMemStats : NULL);
 }
 
-void AccumulatorBufferGroup::flush()
+void AccumulatorBufferGroup::sync()
 {
 	LLUnitImplicit<F64, LLUnits::Seconds> time_stamp = LLTimer::getTotalSeconds();
 
-	mSamples.flush(time_stamp);
+	mSamples.sync(time_stamp);
+	mMemStats.sync(time_stamp);
 }
 
 }
diff --git a/indra/llcommon/lltraceaccumulators.h b/indra/llcommon/lltraceaccumulators.h
index 7994dcc217a7c7847ab1dfa0804d66efd544f53e..825cc9e3a8ea370a198d4d85aa17d1e00203e4e1 100644
--- a/indra/llcommon/lltraceaccumulators.h
+++ b/indra/llcommon/lltraceaccumulators.h
@@ -109,12 +109,12 @@ namespace LLTrace
 			}
 		}
 
-		void flush(LLUnitImplicit<F64, LLUnits::Seconds> time_stamp)
+		void sync(LLUnitImplicit<F64, LLUnits::Seconds> time_stamp)
 		{
 			llassert(mStorageSize >= sNextStorageSlot);
 			for (size_t i = 0; i < sNextStorageSlot; i++)
 			{
-				mStorage[i].flush(time_stamp);
+				mStorage[i].sync(time_stamp);
 			}
 		}
 
@@ -128,6 +128,11 @@ namespace LLTrace
 			return LLThreadLocalSingletonPointer<ACCUMULATOR>::getInstance() == mStorage;
 		}
 
+		static void clearPrimary()
+		{
+			LLThreadLocalSingletonPointer<ACCUMULATOR>::setInstance(NULL);
+		}
+
 		LL_FORCE_INLINE static ACCUMULATOR* getPrimaryStorage() 
 		{ 
 			ACCUMULATOR* accumulator = LLThreadLocalSingletonPointer<ACCUMULATOR>::getInstance();
@@ -302,7 +307,7 @@ namespace LLTrace
 			mLastValue = other ? other->mLastValue : 0;
 		}
 
-		void flush(LLUnitImplicit<F64, LLUnits::Seconds>) {}
+		void sync(LLUnitImplicit<F64, LLUnits::Seconds>) {}
 
 		F64	getSum() const { return mSum; }
 		F64	getMin() const { return mMin; }
@@ -434,7 +439,7 @@ namespace LLTrace
 			mHasValue = other ? other->mHasValue : false;
 		}
 
-		void flush(LLUnitImplicit<F64, LLUnits::Seconds> time_stamp)
+		void sync(LLUnitImplicit<F64, LLUnits::Seconds> time_stamp)
 		{
 			LLUnitImplicit<F64, LLUnits::Seconds> delta_time = time_stamp - mLastSampleTimeStamp;
 
@@ -500,7 +505,7 @@ namespace LLTrace
 			mSum = 0;
 		}
 
-		void flush(LLUnitImplicit<F64, LLUnits::Seconds>) {}
+		void sync(LLUnitImplicit<F64, LLUnits::Seconds>) {}
 
 		F64	getSum() const { return mSum; }
 
@@ -535,7 +540,7 @@ namespace LLTrace
 		TimeBlockAccumulator();
 		void addSamples(const self_t& other, bool /*append*/);
 		void reset(const self_t* other);
-		void flush(LLUnitImplicit<F64, LLUnits::Seconds>) {}
+		void sync(LLUnitImplicit<F64, LLUnits::Seconds>) {}
 
 		//
 		// members
@@ -566,6 +571,13 @@ namespace LLTrace
 		bool						mCollapsed;
 		bool						mNeedsSorting;
 	};
+	
+	struct BlockTimerStackRecord
+	{
+		class BlockTimer*	mActiveTimer;
+		class TimeBlock*	mTimeBlock;
+		U64					mChildTime;
+	};
 
 	struct MemStatAccumulator
 	{
@@ -611,16 +623,16 @@ namespace LLTrace
 			mDeallocatedCount = 0;
 		}
 
-		void flush(LLUnitImplicit<F64, LLUnits::Seconds> time_stamp) 
+		void sync(LLUnitImplicit<F64, LLUnits::Seconds> time_stamp) 
 		{
-			mSize.flush(time_stamp);
-			mChildSize.flush(time_stamp);
+			mSize.sync(time_stamp);
+			mChildSize.sync(time_stamp);
 		}
 
 		SampleAccumulator	mSize,
-			mChildSize;
+							mChildSize;
 		int					mAllocatedCount,
-			mDeallocatedCount;
+							mDeallocatedCount;
 	};
 
 	struct AccumulatorBufferGroup : public LLRefCount
@@ -630,11 +642,12 @@ namespace LLTrace
 		void handOffTo(AccumulatorBufferGroup& other);
 		void makePrimary();
 		bool isPrimary() const;
+		static void clearPrimary();
 
 		void append(const AccumulatorBufferGroup& other);
 		void merge(const AccumulatorBufferGroup& other);
 		void reset(AccumulatorBufferGroup* other = NULL);
-		void flush();
+		void sync();
 
 		AccumulatorBuffer<CountAccumulator>	 			mCounts;
 		AccumulatorBuffer<SampleAccumulator>			mSamples;
diff --git a/indra/llcommon/lltracerecording.cpp b/indra/llcommon/lltracerecording.cpp
index c30f204fa46a266e0b3a42b8675334d9b72d1787..0938317eaaad9961f25602e80b7ec3339cd91f03 100644
--- a/indra/llcommon/lltracerecording.cpp
+++ b/indra/llcommon/lltracerecording.cpp
@@ -59,11 +59,12 @@ Recording& Recording::operator = (const Recording& other)
 
 	mBuffers = other.mBuffers;
 
-	LLStopWatchControlsMixin<Recording>::setPlayState(other_play_state);
-
 	// above call will clear mElapsedSeconds as a side effect, so copy it here
 	mElapsedSeconds = other.mElapsedSeconds;
 	mSamplingTimer = other.mSamplingTimer;
+
+	setPlayState(other_play_state);
+
 	return *this;
 }
 
@@ -82,7 +83,6 @@ void Recording::update()
 	{
 		mElapsedSeconds += mSamplingTimer.getElapsedTimeF64();
 		AccumulatorBufferGroup* buffers = mBuffers.write();
-		buffers->flush();
 		LLTrace::get_thread_recorder()->bringUpToDate(buffers);
 
 		mSamplingTimer.reset();
@@ -107,7 +107,6 @@ void Recording::handleStop()
 {
 	mElapsedSeconds += mSamplingTimer.getElapsedTimeF64();
 	AccumulatorBufferGroup* buffers = mBuffers.write();
-	buffers->flush();
 	LLTrace::get_thread_recorder()->deactivate(buffers);
 }
 
diff --git a/indra/llcommon/lltracerecording.h b/indra/llcommon/lltracerecording.h
index 38eaa47f9f9cdd4c204f8339548f834848269365..355dbabb1c11705c9248aa07e89fad2e2b48e75b 100644
--- a/indra/llcommon/lltracerecording.h
+++ b/indra/llcommon/lltracerecording.h
@@ -81,6 +81,7 @@ class LLStopWatchControlsMixin
 :	public LLStopWatchControlsMixinCommon
 {
 public:
+
 	typedef LLStopWatchControlsMixin<DERIVED> self_t;
 	virtual void splitTo(DERIVED& other)
 	{
@@ -98,6 +99,11 @@ class LLStopWatchControlsMixin
 		static_cast<self_t&>(other).handleSplitTo(*static_cast<DERIVED*>(this));
 	}
 private:
+	self_t& operator = (const self_t& other)
+	{
+		// don't do anything, derived class must implement logic
+	}
+
 	// atomically stop this object while starting the other
 	// no data can be missed in between stop and start
 	virtual void handleSplitTo(DERIVED& other) {};
diff --git a/indra/llcommon/lltracethreadrecorder.cpp b/indra/llcommon/lltracethreadrecorder.cpp
index c571e013e19ffd7db5dd45487d23a6c8e1bd5075..7192564c94898bf856ad319517767e2f932c5d04 100644
--- a/indra/llcommon/lltracethreadrecorder.cpp
+++ b/indra/llcommon/lltracethreadrecorder.cpp
@@ -87,7 +87,7 @@ ThreadRecorder::~ThreadRecorder()
 	delete[] mTimeBlockTreeNodes;
 }
 
-TimeBlockTreeNode* ThreadRecorder::getTimeBlockTreeNode(S32 index)
+TimeBlockTreeNode* ThreadRecorder::getTimeBlockTreeNode( S32 index )
 {
 	if (0 <= index && index < mNumTimeBlockTreeNodes)
 	{
@@ -99,10 +99,20 @@ TimeBlockTreeNode* ThreadRecorder::getTimeBlockTreeNode(S32 index)
 
 void ThreadRecorder::activate( AccumulatorBufferGroup* recording )
 {
+	active_recording_list_t::reverse_iterator it, end_it;
+	for (it = mActiveRecordings.rbegin(), end_it = mActiveRecordings.rend();
+		it != end_it;
+		++it)
+	{
+		llassert((*it)->mTargetRecording != recording);
+	}
+
 	ActiveRecording* active_recording = new ActiveRecording(recording);
 	if (!mActiveRecordings.empty())
 	{
-		mActiveRecordings.back()->mPartialRecording.handOffTo(active_recording->mPartialRecording);
+		AccumulatorBufferGroup& prev_active_recording = mActiveRecordings.back()->mPartialRecording;
+		prev_active_recording.sync();
+		prev_active_recording.handOffTo(active_recording->mPartialRecording);
 	}
 	mActiveRecordings.push_back(active_recording);
 
@@ -113,7 +123,7 @@ ThreadRecorder::active_recording_list_t::reverse_iterator ThreadRecorder::bringU
 {
 	if (mActiveRecordings.empty()) return mActiveRecordings.rend();
 
-	mActiveRecordings.back()->mPartialRecording.flush();
+	mActiveRecordings.back()->mPartialRecording.sync();
 	TimeBlock::updateTimes();
 
 	active_recording_list_t::reverse_iterator it, end_it;
@@ -156,18 +166,22 @@ void ThreadRecorder::deactivate( AccumulatorBufferGroup* recording )
 	active_recording_list_t::reverse_iterator it = bringUpToDate(recording);
 	if (it != mActiveRecordings.rend())
 	{
-		// and if we've found the recording we wanted to update
-		active_recording_list_t::reverse_iterator next_it = it;
-		++next_it;
-		if (next_it != mActiveRecordings.rend())
-		{
-			(*next_it)->mPartialRecording.makePrimary();
-		}
-
 		active_recording_list_t::iterator recording_to_remove = (++it).base();
+		bool was_primary = (*recording_to_remove)->mPartialRecording.isPrimary();
 		llassert((*recording_to_remove)->mTargetRecording == recording);
 		delete *recording_to_remove;
 		mActiveRecordings.erase(recording_to_remove);
+		if (was_primary)
+		{
+			if (mActiveRecordings.empty())
+			{
+				AccumulatorBufferGroup::clearPrimary();
+			}
+			else
+			{
+				mActiveRecordings.back()->mPartialRecording.makePrimary();
+			}
+		}
 	}
 }
 
@@ -202,7 +216,6 @@ SlaveThreadRecorder::~SlaveThreadRecorder()
 void SlaveThreadRecorder::pushToMaster()
 { 
 	{ LLMutexLock lock(&mSharedRecordingMutex);	
-		mThreadRecordingBuffers.flush();
 		LLTrace::get_thread_recorder()->bringUpToDate(&mThreadRecordingBuffers);
 		mSharedRecordingBuffers.append(mThreadRecordingBuffers);
 	}
@@ -213,23 +226,25 @@ void SlaveThreadRecorder::pushToMaster()
 ///////////////////////////////////////////////////////////////////////
 
 static LLFastTimer::DeclareTimer FTM_PULL_TRACE_DATA_FROM_SLAVES("Pull slave trace data");
+
 void MasterThreadRecorder::pullFromSlaveThreads()
 {
-	LLFastTimer _(FTM_PULL_TRACE_DATA_FROM_SLAVES);
+	/*LLFastTimer _(FTM_PULL_TRACE_DATA_FROM_SLAVES);
 	if (mActiveRecordings.empty()) return;
 
 	{ LLMutexLock lock(&mSlaveListMutex);
 
-		AccumulatorBufferGroup& target_recording_buffers = mActiveRecordings.back()->mPartialRecording;
-		for (slave_thread_recorder_list_t::iterator it = mSlaveThreadRecorders.begin(), end_it = mSlaveThreadRecorders.end();
-			it != end_it;
-			++it)
-		{ LLMutexLock lock(&(*it)->mSharedRecordingMutex);
+	AccumulatorBufferGroup& target_recording_buffers = mActiveRecordings.back()->mPartialRecording;
+	target_recording_buffers.sync();
+	for (slave_thread_recorder_list_t::iterator it = mSlaveThreadRecorders.begin(), end_it = mSlaveThreadRecorders.end();
+	it != end_it;
+	++it)
+	{ LLMutexLock lock(&(*it)->mSharedRecordingMutex);
 
-			target_recording_buffers.merge((*it)->mSharedRecordingBuffers);
-			(*it)->mSharedRecordingBuffers.reset();
-		}
+	target_recording_buffers.merge((*it)->mSharedRecordingBuffers);
+	(*it)->mSharedRecordingBuffers.reset();
 	}
+	}*/
 }
 
 // called by slave thread
diff --git a/indra/llcommon/lltracethreadrecorder.h b/indra/llcommon/lltracethreadrecorder.h
index 0680c2c590f4f56099ed282aad792112b19b1c8a..6b7a8e5865e5fb689bbaed262b28639444bf201c 100644
--- a/indra/llcommon/lltracethreadrecorder.h
+++ b/indra/llcommon/lltracethreadrecorder.h
@@ -71,6 +71,7 @@ namespace LLTrace
 		class BlockTimer*			mRootTimer;
 		TimeBlockTreeNode*			mTimeBlockTreeNodes;
 		size_t						mNumTimeBlockTreeNodes;
+		BlockTimerStackRecord		mBlockTimerStackRecord;
 	};
 
 	class LL_COMMON_API MasterThreadRecorder : public ThreadRecorder
@@ -105,7 +106,6 @@ namespace LLTrace
 
 	private:
 		friend class MasterThreadRecorder;
-		MasterThreadRecorder* 	mMaster;
 		LLMutex					mSharedRecordingMutex;
 		AccumulatorBufferGroup	mSharedRecordingBuffers;
 		MasterThreadRecorder&	mMasterRecorder;
diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp
index 733c9cc9df83b6ba770c549e0cfe1a327f348fff..7c5cd520dad187b9dfd41bcb6d4878eeadb722af 100755
--- a/indra/newview/llappviewer.cpp
+++ b/indra/newview/llappviewer.cpp
@@ -1294,6 +1294,8 @@ bool LLAppViewer::mainLoop()
 	{
 		LLFastTimer _(FTM_FRAME);
 		LLTrace::TimeBlock::processTimes();
+		llassert((LLTrace::get_frame_recording().getCurRecording().update(), 
+				LLTrace::get_frame_recording().getCurRecording().getSampleCount(LLStatViewer::FPS) <= 1));
 		LLTrace::get_frame_recording().nextPeriod();
 		LLTrace::TimeBlock::logStats();
 
@@ -5617,6 +5619,6 @@ void LLAppViewer::metricsSend(bool enable_reporting)
 	// Reset even if we can't report.  Rather than gather up a huge chunk of
 	// data, we'll keep to our sampling interval and retain the data
 	// resolution in time.
-	gViewerAssetStats->reset();
+	gViewerAssetStats->restart();
 }
 
diff --git a/indra/newview/llviewerassetstats.cpp b/indra/newview/llviewerassetstats.cpp
index bada565d3d279f5cc7a7444d16cc7a456220fcae..af82b61dc8de4097e5fd178342b19af875a46d7e 100755
--- a/indra/newview/llviewerassetstats.cpp
+++ b/indra/newview/llviewerassetstats.cpp
@@ -314,9 +314,9 @@ void LLViewerAssetStats::handleStop()
 }
 
 void LLViewerAssetStats::handleReset()
-	{
+{
 	reset();
-	}
+}
 
 
 void LLViewerAssetStats::reset()
@@ -328,6 +328,7 @@ void LLViewerAssetStats::reset()
 	if (mRegionHandle)
 	{
 		mCurRecording = &mRegionRecordings[mRegionHandle];
+		mCurRecording->setPlayState(getPlayState());
 	}
 }
 
@@ -346,7 +347,7 @@ void LLViewerAssetStats::setRegion(region_handle_t region_handle)
 	if (region_handle)
 	{
 		mCurRecording = &mRegionRecordings[region_handle];
-		mCurRecording->start();
+		mCurRecording->setPlayState(getPlayState());
 	}
 
 	mRegionHandle = region_handle;
@@ -493,19 +494,19 @@ void LLViewerAssetStats::getStats(AssetStats& stats, bool compact_output)
 }
 
 LLSD LLViewerAssetStats::asLLSD(bool compact_output)
-		{
+{
 	LLParamSDParser parser;
 	LLSD sd;
 	AssetStats stats;
 	getStats(stats, compact_output);
 	LLInitParam::predicate_rule_t rule = LLInitParam::default_parse_rules();
 	if (!compact_output)
-		{
+	{
 		rule.allow(LLInitParam::EMPTY);
-		}
+	}
 	parser.writeSD(sd, stats, rule);
 	return sd;
-	}
+}
 
 // ------------------------------------------------------
 // Global free-function definitions (LLViewerAssetStatsFF namespace)