diff --git a/indra/llcharacter/llkeyframewalkmotion.cpp b/indra/llcharacter/llkeyframewalkmotion.cpp
index e188b06c03850ef9d6c2b6c9fdf835102e1e509a..c6ca0b542ed581e53131484b2862c887dceed166 100755
--- a/indra/llcharacter/llkeyframewalkmotion.cpp
+++ b/indra/llcharacter/llkeyframewalkmotion.cpp
@@ -383,7 +383,7 @@ BOOL LLFlyAdjustMotion::onUpdate(F32 time, U8* joint_mask)
 	F32 target_roll = llclamp(ang_vel.mV[VZ], -4.f, 4.f) * roll_factor;
 
 	// roll is critically damped interpolation between current roll and angular velocity-derived target roll
-	mRoll = LLSmoothInterpolation::lerp(mRoll, target_roll, LLUnit<LLUnits::Milliseconds, F32>(100));
+	mRoll = LLSmoothInterpolation::lerp(mRoll, target_roll, LLUnit<F32, LLUnits::Milliseconds>(100));
 
 	LLQuaternion roll(mRoll, LLVector3(0.f, 0.f, 1.f));
 	mPelvisState->setRotation(roll);
diff --git a/indra/llcommon/llcriticaldamp.cpp b/indra/llcommon/llcriticaldamp.cpp
index 2f013fe255fb5e43456b52867b027802eb45bedb..575fc4149e00d8c651cda2b6952298ff787006af 100755
--- a/indra/llcommon/llcriticaldamp.cpp
+++ b/indra/llcommon/llcriticaldamp.cpp
@@ -81,7 +81,7 @@ void LLSmoothInterpolation::updateInterpolants()
 //-----------------------------------------------------------------------------
 // getInterpolant()
 //-----------------------------------------------------------------------------
-F32 LLSmoothInterpolation::getInterpolant(LLUnit<LLUnits::Seconds, F32> time_constant, bool use_cache)
+F32 LLSmoothInterpolation::getInterpolant(LLUnit<F32, LLUnits::Seconds> time_constant, bool use_cache)
 {
 	if (time_constant == 0.f)
 	{
diff --git a/indra/llcommon/llcriticaldamp.h b/indra/llcommon/llcriticaldamp.h
index ab5d4ba6e206321593edd115b5e761e23c49b9f9..e174643cd0e8ab019e71fe875a2f71aeb0aaf420 100755
--- a/indra/llcommon/llcriticaldamp.h
+++ b/indra/llcommon/llcriticaldamp.h
@@ -42,10 +42,10 @@ class LL_COMMON_API LLSmoothInterpolation
 	static void updateInterpolants();
 
 	// ACCESSORS
-	static F32 getInterpolant(LLUnit<LLUnits::Seconds, F32> time_constant, bool use_cache = true);
+	static F32 getInterpolant(LLUnit<F32, LLUnits::Seconds> time_constant, bool use_cache = true);
 
 	template<typename T> 
-	static T lerp(T a, T b, LLUnit<LLUnits::Seconds, F32> time_constant, bool use_cache = true)
+	static T lerp(T a, T b, LLUnit<F32, LLUnits::Seconds> time_constant, bool use_cache = true)
 	{
 		F32 interpolant = getInterpolant(time_constant, use_cache);
 		return ((a * (1.f - interpolant)) 
diff --git a/indra/llcommon/lldate.cpp b/indra/llcommon/lldate.cpp
index 2efe39e15831f19469609fa67f9685d08462f328..7892269e350f9a4c491fbbeeff545d967e9aef88 100755
--- a/indra/llcommon/lldate.cpp
+++ b/indra/llcommon/lldate.cpp
@@ -55,7 +55,7 @@ LLDate::LLDate(const LLDate& date) :
 	mSecondsSinceEpoch(date.mSecondsSinceEpoch)
 {}
 
-LLDate::LLDate(LLUnit<LLUnits::Seconds, F64> seconds_since_epoch) :
+LLDate::LLDate(LLUnit<F64, LLUnits::Seconds> seconds_since_epoch) :
 	mSecondsSinceEpoch(seconds_since_epoch.value())
 {}
 
diff --git a/indra/llcommon/lldate.h b/indra/llcommon/lldate.h
index b62a8461479e34036882384219a0ac0437ec9da2..1067ac528053de01eacbc56df8d4b694e0f58615 100755
--- a/indra/llcommon/lldate.h
+++ b/indra/llcommon/lldate.h
@@ -59,7 +59,7 @@ class LL_COMMON_API LLDate
 	 *
 	 * @param seconds_since_epoch The number of seconds since UTC epoch.
 	 */
-	LLDate(LLUnit<LLUnits::Seconds, F64> seconds_since_epoch);
+	LLDate(LLUnit<F64, LLUnits::Seconds> seconds_since_epoch);
 
 	/** 
 	 * @brief Construct a date from a string representation
diff --git a/indra/llcommon/llfasttimer.cpp b/indra/llcommon/llfasttimer.cpp
index dfc72bd2cedabf47b2e02bf7a417ee76d7886318..809a0327cabebbb2deb60a219f1fa7bf1c89c6ac 100755
--- a/indra/llcommon/llfasttimer.cpp
+++ b/indra/llcommon/llfasttimer.cpp
@@ -146,8 +146,8 @@ U64 TimeBlock::countsPerSecond()
 {
 #if LL_FASTTIMER_USE_RDTSC || !LL_WINDOWS
 	//getCPUFrequency returns MHz and sCPUClockFrequency wants to be in Hz
-	static LLUnit<LLUnits::Hertz, U64> sCPUClockFrequency = LLProcessorInfo().getCPUFrequency();
-
+	static LLUnit<U64, LLUnits::Hertz> sCPUClockFrequency = LLProcessorInfo().getCPUFrequency();
+	return sCPUClockFrequency.value();
 #else
 	// If we're not using RDTSC, each fasttimer tick is just a performance counter tick.
 	// Not redefining the clock frequency itself (in llprocessor.cpp/calculate_cpu_frequency())
@@ -159,8 +159,8 @@ U64 TimeBlock::countsPerSecond()
 		QueryPerformanceFrequency((LARGE_INTEGER*)&sCPUClockFrequency);
 		firstcall = false;
 	}
-#endif
 	return sCPUClockFrequency.value();
+#endif
 }
 #endif
 
@@ -318,11 +318,11 @@ void TimeBlock::logStats()
 			LL_DEBUGS("FastTimers") << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << LL_ENDL;
 			LL_DEBUGS("FastTimers") << "getCPUClockCount32() " << getCPUClockCount32() << LL_ENDL;
 			LL_DEBUGS("FastTimers") << "getCPUClockCount64() " << getCPUClockCount64() << LL_ENDL;
-			LL_DEBUGS("FastTimers") << "elapsed sec " << ((F64)getCPUClockCount64()) / (LLUnit<LLUnits::Hertz, F64>(LLProcessorInfo().getCPUFrequency())) << LL_ENDL;
+			LL_DEBUGS("FastTimers") << "elapsed sec " << ((F64)getCPUClockCount64()) / (LLUnit<F64, LLUnits::Hertz>(LLProcessorInfo().getCPUFrequency())) << LL_ENDL;
 		}
 		call_count++;
 
-		LLUnit<LLUnits::Seconds, F64> total_time(0);
+		LLUnit<F64, LLUnits::Seconds> total_time(0);
 		LLSD sd;
 
 		{
@@ -365,7 +365,7 @@ void TimeBlock::dumpCurTimes()
 		++it)
 	{
 		TimeBlock* timerp = (*it);
-		LLUnit<LLUnits::Seconds, F64> total_time_ms = last_frame_recording.getSum(*timerp);
+		LLUnit<F64, LLUnits::Seconds> total_time_ms = last_frame_recording.getSum(*timerp);
 		U32 num_calls = last_frame_recording.getSum(timerp->callCount());
 
 		// Don't bother with really brief times, keep output concise
@@ -449,7 +449,7 @@ void TimeBlockAccumulator::reset( const TimeBlockAccumulator* other )
 	}
 }
 
-LLUnit<LLUnits::Seconds, F64> BlockTimer::getElapsedTime()
+LLUnit<F64, LLUnits::Seconds> BlockTimer::getElapsedTime()
 {
 	U64 total_time = TimeBlock::getCPUClockCount64() - mStartTime;
 
diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
index 20514d1638b005cb062d7eb64b7b8d8c1d8338c6..fdc6997d45089de348adcd1a7ba8ad0e54137ff6 100755
--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@@ -71,7 +71,7 @@ class BlockTimer
 	BlockTimer(TimeBlock& timer);
 	~BlockTimer();
 
-	LLUnit<LLUnits::Seconds, F64> getElapsedTime();
+	LLUnit<F64, LLUnits::Seconds> getElapsedTime();
 
 private:
 
diff --git a/indra/llcommon/llprocessor.cpp b/indra/llcommon/llprocessor.cpp
index 5ddfa6fcef4ba99c32e258d1d6bfb96a912ae437..b80e813d84b2e3170c315b0b55d1cc2fc2008e32 100755
--- a/indra/llcommon/llprocessor.cpp
+++ b/indra/llcommon/llprocessor.cpp
@@ -875,7 +875,7 @@ LLProcessorInfo::LLProcessorInfo() : mImpl(NULL)
 
 
 LLProcessorInfo::~LLProcessorInfo() {}
-LLUnitImplicit<LLUnits::Megahertz, F64> LLProcessorInfo::getCPUFrequency() const { return mImpl->getCPUFrequency(); }
+LLUnitImplicit<F64, LLUnits::Megahertz> LLProcessorInfo::getCPUFrequency() const { return mImpl->getCPUFrequency(); }
 bool LLProcessorInfo::hasSSE() const { return mImpl->hasSSE(); }
 bool LLProcessorInfo::hasSSE2() const { return mImpl->hasSSE2(); }
 bool LLProcessorInfo::hasAltivec() const { return mImpl->hasAltivec(); }
diff --git a/indra/llcommon/llprocessor.h b/indra/llcommon/llprocessor.h
index fbd427f48441bfbc9dda99740b43f416ff8438ee..7f220467b0d61b91a64deb7d2b9576f3c23208b3 100755
--- a/indra/llcommon/llprocessor.h
+++ b/indra/llcommon/llprocessor.h
@@ -37,7 +37,7 @@ class LL_COMMON_API LLProcessorInfo
 	LLProcessorInfo(); 
  	~LLProcessorInfo();
 
-	LLUnitImplicit<LLUnits::Megahertz, F64> getCPUFrequency() const;
+	LLUnitImplicit<F64, LLUnits::Megahertz> getCPUFrequency() const;
 	bool hasSSE() const;
 	bool hasSSE2() const;
 	bool hasAltivec() const;
diff --git a/indra/llcommon/lltimer.cpp b/indra/llcommon/lltimer.cpp
index 838155d54dc5e79e916ca670848f276fe395d0c3..693809b622f01dfc3bb03f3214ae3c30cc22109c 100755
--- a/indra/llcommon/lltimer.cpp
+++ b/indra/llcommon/lltimer.cpp
@@ -285,14 +285,14 @@ LLTimer::~LLTimer()
 }
 
 // static
-LLUnitImplicit<LLUnits::Microseconds, U64> LLTimer::getTotalTime()
+LLUnitImplicit<U64, LLUnits::Microseconds> LLTimer::getTotalTime()
 {
 	// simply call into the implementation function.
 	return totalTime();
 }	
 
 // static
-LLUnitImplicit<LLUnits::Seconds, F64> LLTimer::getTotalSeconds()
+LLUnitImplicit<F64, LLUnits::Seconds> LLTimer::getTotalSeconds()
 {
 	return U64_to_F64(getTotalTime()) * USEC_TO_SEC_F64;
 }
@@ -341,23 +341,23 @@ U64 getElapsedTimeAndUpdate(U64& lastClockCount)
 }
 
 
-LLUnitImplicit<LLUnits::Seconds, F64> LLTimer::getElapsedTimeF64() const
+LLUnitImplicit<F64, LLUnits::Seconds> LLTimer::getElapsedTimeF64() const
 {
 	U64 last = mLastClockCount;
 	return (F64)getElapsedTimeAndUpdate(last) * gClockFrequencyInv;
 }
 
-LLUnitImplicit<LLUnits::Seconds, F32> LLTimer::getElapsedTimeF32() const
+LLUnitImplicit<F32, LLUnits::Seconds> LLTimer::getElapsedTimeF32() const
 {
 	return (F32)getElapsedTimeF64();
 }
 
-LLUnitImplicit<LLUnits::Seconds, F64> LLTimer::getElapsedTimeAndResetF64()
+LLUnitImplicit<F64, LLUnits::Seconds> LLTimer::getElapsedTimeAndResetF64()
 {
 	return (F64)getElapsedTimeAndUpdate(mLastClockCount) * gClockFrequencyInv;
 }
 
-LLUnitImplicit<LLUnits::Seconds, F32> LLTimer::getElapsedTimeAndResetF32()
+LLUnitImplicit<F32, LLUnits::Seconds> LLTimer::getElapsedTimeAndResetF32()
 {
 	return (F32)getElapsedTimeAndResetF64();
 }
@@ -370,7 +370,7 @@ void  LLTimer::setTimerExpirySec(F32 expiration)
 		+ (U64)((F32)(expiration * gClockFrequency));
 }
 
-LLUnitImplicit<LLUnits::Seconds, F32> LLTimer::getRemainingTimeF32() const
+LLUnitImplicit<F32, LLUnits::Seconds> LLTimer::getRemainingTimeF32() const
 {
 	U64 cur_ticks = get_clock_count();
 	if (cur_ticks > mExpirationTicks)
diff --git a/indra/llcommon/lltimer.h b/indra/llcommon/lltimer.h
index 0ba87d1e152693a2ceecace5e7ce66257d89f140..9e464c4b1af622335ca88df7d72155441850d654 100755
--- a/indra/llcommon/lltimer.h
+++ b/indra/llcommon/lltimer.h
@@ -67,16 +67,16 @@ class LL_COMMON_API LLTimer
 
 	// Return a high precision number of seconds since the start of
 	// this application instance.
-	static LLUnitImplicit<LLUnits::Seconds, F64> getElapsedSeconds()
+	static LLUnitImplicit<F64, LLUnits::Seconds> getElapsedSeconds()
 	{
 		return sTimer->getElapsedTimeF64();
 	}
 
 	// Return a high precision usec since epoch
-	static LLUnitImplicit<LLUnits::Microseconds, U64> getTotalTime();
+	static LLUnitImplicit<U64, LLUnits::Microseconds> getTotalTime();
 
 	// Return a high precision seconds since epoch
-	static LLUnitImplicit<LLUnits::Seconds, F64> getTotalSeconds();
+	static LLUnitImplicit<F64, LLUnits::Seconds> getTotalSeconds();
 
 
 	// MANIPULATORS
@@ -87,16 +87,16 @@ class LL_COMMON_API LLTimer
 	void setTimerExpirySec(F32 expiration);
 	BOOL checkExpirationAndReset(F32 expiration);
 	BOOL hasExpired() const;
-	LLUnitImplicit<LLUnits::Seconds, F32> getElapsedTimeAndResetF32();	// Returns elapsed time in seconds with reset
-	LLUnitImplicit<LLUnits::Seconds, F64> getElapsedTimeAndResetF64();
+	LLUnitImplicit<F32, LLUnits::Seconds> getElapsedTimeAndResetF32();	// Returns elapsed time in seconds with reset
+	LLUnitImplicit<F64, LLUnits::Seconds> getElapsedTimeAndResetF64();
 
-	LLUnitImplicit<LLUnits::Seconds, F32> getRemainingTimeF32() const;
+	LLUnitImplicit<F32, LLUnits::Seconds> getRemainingTimeF32() const;
 
 	static BOOL knownBadTimer();
 
 	// ACCESSORS
-	LLUnitImplicit<LLUnits::Seconds, F32> getElapsedTimeF32() const;			// Returns elapsed time in seconds
-	LLUnitImplicit<LLUnits::Seconds, F64> getElapsedTimeF64() const;			// Returns elapsed time in seconds
+	LLUnitImplicit<F32, LLUnits::Seconds> getElapsedTimeF32() const;			// Returns elapsed time in seconds
+	LLUnitImplicit<F64, LLUnits::Seconds> getElapsedTimeF64() const;			// Returns elapsed time in seconds
 
 	bool getStarted() const { return mStarted; }
 
diff --git a/indra/llcommon/lltrace.h b/indra/llcommon/lltrace.h
index cfe1273b4b2ac84b93fa0f89faf166768c3b6bb0..1bf853c5c067b4fe3bd0e28dce8f1336c13b9d21 100644
--- a/indra/llcommon/lltrace.h
+++ b/indra/llcommon/lltrace.h
@@ -44,27 +44,27 @@ namespace LLTrace
 {
 class Recording;
 
-typedef LLUnit<LLUnits::Bytes, F64>			Bytes;
-typedef LLUnit<LLUnits::Kibibytes, F64>		Kibibytes;
-typedef LLUnit<LLUnits::Mibibytes, F64>		Mibibytes;
-typedef LLUnit<LLUnits::Gibibytes, F64>		Gibibytes;
-typedef LLUnit<LLUnits::Bits, F64>			Bits;
-typedef LLUnit<LLUnits::Kibibits, F64>		Kibibits;
-typedef LLUnit<LLUnits::Mibibits, F64>		Mibibits;
-typedef LLUnit<LLUnits::Gibibits, F64>		Gibibits;
-
-typedef LLUnit<LLUnits::Seconds, F64>		Seconds;
-typedef LLUnit<LLUnits::Milliseconds, F64>	Milliseconds;
-typedef LLUnit<LLUnits::Minutes, F64>		Minutes;
-typedef LLUnit<LLUnits::Hours, F64>			Hours;
-typedef LLUnit<LLUnits::Milliseconds, F64>	Milliseconds;
-typedef LLUnit<LLUnits::Microseconds, F64>	Microseconds;
-typedef LLUnit<LLUnits::Nanoseconds, F64>	Nanoseconds;
-
-typedef LLUnit<LLUnits::Meters, F64>		Meters;
-typedef LLUnit<LLUnits::Kilometers, F64>	Kilometers;
-typedef LLUnit<LLUnits::Centimeters, F64>	Centimeters;
-typedef LLUnit<LLUnits::Millimeters, F64>	Millimeters;
+typedef LLUnit<F64, LLUnits::Bytes>			Bytes;
+typedef LLUnit<F64, LLUnits::Kibibytes>		Kibibytes;
+typedef LLUnit<F64, LLUnits::Mibibytes>		Mibibytes;
+typedef LLUnit<F64, LLUnits::Gibibytes>		Gibibytes;
+typedef LLUnit<F64, LLUnits::Bits>			Bits;
+typedef LLUnit<F64, LLUnits::Kibibits>		Kibibits;
+typedef LLUnit<F64, LLUnits::Mibibits>		Mibibits;
+typedef LLUnit<F64, LLUnits::Gibibits>		Gibibits;
+
+typedef LLUnit<F64, LLUnits::Seconds>		Seconds;
+typedef LLUnit<F64, LLUnits::Milliseconds>	Milliseconds;
+typedef LLUnit<F64, LLUnits::Minutes>		Minutes;
+typedef LLUnit<F64, LLUnits::Hours>			Hours;
+typedef LLUnit<F64, LLUnits::Milliseconds>	Milliseconds;
+typedef LLUnit<F64, LLUnits::Microseconds>	Microseconds;
+typedef LLUnit<F64, LLUnits::Nanoseconds>	Nanoseconds;
+
+typedef LLUnit<F64, LLUnits::Meters>		Meters;
+typedef LLUnit<F64, LLUnits::Kilometers>	Kilometers;
+typedef LLUnit<F64, LLUnits::Centimeters>	Centimeters;
+typedef LLUnit<F64, LLUnits::Millimeters>	Millimeters;
 
 void init();
 void cleanup();
@@ -216,6 +216,11 @@ class AccumulatorBuffer : public LLRefCount
 	}
 
 	size_t size() const
+	{
+		return getNumIndices();
+	}
+
+	static size_t getNumIndices() 
 	{
 		return sNextStorageSlot;
 	}
@@ -263,6 +268,7 @@ class TraceType
 	}
 
 	size_t getIndex() const { return mAccumulatorIndex; }
+	static size_t getNumIndices() { return AccumulatorBuffer<ACCUMULATOR>::getNumIndices(); }
 
 	virtual const char* getUnitLabel() { return ""; }
 
@@ -408,8 +414,8 @@ class SampleAccumulator
 
 	void sample(F64 value)
 	{
-		LLUnitImplicit<LLUnits::Seconds, F64> time_stamp = LLTimer::getTotalSeconds();
-		LLUnitImplicit<LLUnits::Seconds, F64> delta_time = time_stamp - mLastSampleTimeStamp;
+		LLUnitImplicit<F64, LLUnits::Seconds> time_stamp = LLTimer::getTotalSeconds();
+		LLUnitImplicit<F64, LLUnits::Seconds> delta_time = time_stamp - mLastSampleTimeStamp;
 		mLastSampleTimeStamp = time_stamp;
 
 		if (mHasValue)
@@ -498,8 +504,8 @@ class SampleAccumulator
 
 	void flush()
 	{
-		LLUnitImplicit<LLUnits::Seconds, F64> time_stamp = LLTimer::getTotalSeconds();
-		LLUnitImplicit<LLUnits::Seconds, F64> delta_time = time_stamp - mLastSampleTimeStamp;
+		LLUnitImplicit<F64, LLUnits::Seconds> time_stamp = LLTimer::getTotalSeconds();
+		LLUnitImplicit<F64, LLUnits::Seconds> delta_time = time_stamp - mLastSampleTimeStamp;
 
 		if (mHasValue)
 		{
@@ -528,7 +534,7 @@ class SampleAccumulator
 	F64	mMean,
 		mVarianceSum;
 
-	LLUnitImplicit<LLUnits::Seconds, F64>	mLastSampleTimeStamp,
+	LLUnitImplicit<F64, LLUnits::Seconds>	mLastSampleTimeStamp,
 											mTotalSamplingTime;
 
 	U32	mNumSamples;
@@ -578,8 +584,8 @@ class CountAccumulator
 class TimeBlockAccumulator
 {
 public:
-	typedef LLUnit<LLUnits::Seconds, F64> value_t;
-	typedef LLUnit<LLUnits::Seconds, F64> mean_t;
+	typedef LLUnit<F64, LLUnits::Seconds> value_t;
+	typedef LLUnit<F64, LLUnits::Seconds> mean_t;
 	typedef TimeBlockAccumulator self_t;
 
 	// fake classes that allows us to view different facets of underlying statistic
@@ -591,8 +597,8 @@ class TimeBlockAccumulator
 
 	struct SelfTimeFacet
 	{
-		typedef LLUnit<LLUnits::Seconds, F64> value_t;
-		typedef LLUnit<LLUnits::Seconds, F64> mean_t;
+		typedef LLUnit<F64, LLUnits::Seconds> value_t;
+		typedef LLUnit<F64, LLUnits::Seconds> mean_t;
 	};
 
 	TimeBlockAccumulator();
@@ -672,7 +678,7 @@ template<typename T, typename VALUE_T>
 void record(EventStatHandle<T>& measurement, VALUE_T value)
 {
 	T converted_value(value);
-	measurement.getPrimaryAccumulator()->record(LLUnits::rawValue(converted_value));
+	measurement.getPrimaryAccumulator()->record(LLUnits::storageValue(converted_value));
 }
 
 template <typename T = F64>
@@ -694,7 +700,7 @@ template<typename T, typename VALUE_T>
 void sample(SampleStatHandle<T>& measurement, VALUE_T value)
 {
 	T converted_value(value);
-	measurement.getPrimaryAccumulator()->sample(LLUnits::rawValue(converted_value));
+	measurement.getPrimaryAccumulator()->sample(LLUnits::storageValue(converted_value));
 }
 
 template <typename T = F64>
@@ -716,7 +722,7 @@ template<typename T, typename VALUE_T>
 void add(CountStatHandle<T>& count, VALUE_T value)
 {
 	T converted_value(value);
-	count.getPrimaryAccumulator()->add(LLUnits::rawValue(converted_value));
+	count.getPrimaryAccumulator()->add(LLUnits::storageValue(converted_value));
 }
 
 
@@ -739,8 +745,8 @@ struct MemStatAccumulator
 
 	struct ChildMemFacet
 	{
-		typedef LLUnit<LLUnits::Bytes, F64> value_t;
-		typedef LLUnit<LLUnits::Bytes, F64> mean_t;
+		typedef LLUnit<F64, LLUnits::Bytes> value_t;
+		typedef LLUnit<F64, LLUnits::Bytes> mean_t;
 	};
 
 	MemStatAccumulator()
diff --git a/indra/llcommon/lltracerecording.cpp b/indra/llcommon/lltracerecording.cpp
index d32504b0142ec242dd8a3cefc5cabaf05f7462e0..ff90da3822431d42e32a961487b10585dbed83c5 100644
--- a/indra/llcommon/lltracerecording.cpp
+++ b/indra/llcommon/lltracerecording.cpp
@@ -186,26 +186,18 @@ void Recording::handleSplitTo(Recording& other)
 
 void Recording::appendRecording( const Recording& other )
 {
-	EPlayState play_state = getPlayState();
-	{
-		pause();
-		mBuffers.write()->append(*other.mBuffers);
-		mElapsedSeconds += other.mElapsedSeconds;
-	}
-	setPlayState(play_state);
+	update();
+	mBuffers.write()->append(*other.mBuffers);
+	mElapsedSeconds += other.mElapsedSeconds;
 }
 
 void Recording::mergeRecording( const Recording& other)
 {
-	EPlayState play_state = getPlayState();
-	{
-		pause();
-		mBuffers.write()->merge(*other.mBuffers);
-	}
-	setPlayState(play_state);
+	update();
+	mBuffers.write()->merge(*other.mBuffers);
 }
 
-LLUnit<LLUnits::Seconds, F64> Recording::getSum(const TraceType<TimeBlockAccumulator>& stat)
+LLUnit<F64, LLUnits::Seconds> Recording::getSum(const TraceType<TimeBlockAccumulator>& stat)
 {
 	const TimeBlockAccumulator& accumulator = mBuffers->mStackTimers[stat.getIndex()];
 	update();
@@ -213,7 +205,7 @@ LLUnit<LLUnits::Seconds, F64> Recording::getSum(const TraceType<TimeBlockAccumul
 				/ (F64)LLTrace::TimeBlock::countsPerSecond();
 }
 
-LLUnit<LLUnits::Seconds, F64> Recording::getSum(const TraceType<TimeBlockAccumulator::SelfTimeFacet>& stat)
+LLUnit<F64, LLUnits::Seconds> Recording::getSum(const TraceType<TimeBlockAccumulator::SelfTimeFacet>& stat)
 {
 	const TimeBlockAccumulator& accumulator = mBuffers->mStackTimers[stat.getIndex()];
 	update();
@@ -227,85 +219,85 @@ U32 Recording::getSum(const TraceType<TimeBlockAccumulator::CallCountFacet>& sta
 	return mBuffers->mStackTimers[stat.getIndex()].mCalls;
 }
 
-LLUnit<LLUnits::Seconds, F64> Recording::getPerSec(const TraceType<TimeBlockAccumulator>& stat)
+LLUnit<F64, LLUnits::Seconds> Recording::getPerSec(const TraceType<TimeBlockAccumulator>& stat)
 {
 	const TimeBlockAccumulator& accumulator = mBuffers->mStackTimers[stat.getIndex()];
 
 	update();
 	return (F64)(accumulator.mTotalTimeCounter - accumulator.mStartTotalTimeCounter) 
-				/ ((F64)LLTrace::TimeBlock::countsPerSecond() * mElapsedSeconds);
+				/ ((F64)LLTrace::TimeBlock::countsPerSecond() * mElapsedSeconds.value());
 }
 
-LLUnit<LLUnits::Seconds, F64> Recording::getPerSec(const TraceType<TimeBlockAccumulator::SelfTimeFacet>& stat)
+LLUnit<F64, LLUnits::Seconds> Recording::getPerSec(const TraceType<TimeBlockAccumulator::SelfTimeFacet>& stat)
 {
 	const TimeBlockAccumulator& accumulator = mBuffers->mStackTimers[stat.getIndex()];
 
 	update();
 	return (F64)(accumulator.mSelfTimeCounter) 
-			/ ((F64)LLTrace::TimeBlock::countsPerSecond() * mElapsedSeconds);
+			/ ((F64)LLTrace::TimeBlock::countsPerSecond() * mElapsedSeconds.value());
 }
 
 F32 Recording::getPerSec(const TraceType<TimeBlockAccumulator::CallCountFacet>& stat)
 {
 	update();
-	return (F32)mBuffers->mStackTimers[stat.getIndex()].mCalls / mElapsedSeconds;
+	return (F32)mBuffers->mStackTimers[stat.getIndex()].mCalls / mElapsedSeconds.value();
 }
 
-LLUnit<LLUnits::Bytes, F64> Recording::getMin(const TraceType<MemStatAccumulator>& stat)
+LLUnit<F64, LLUnits::Bytes> Recording::getMin(const TraceType<MemStatAccumulator>& stat)
 {
 	update();
 	return mBuffers->mMemStats[stat.getIndex()].mSize.getMin();
 }
 
-LLUnit<LLUnits::Bytes, F64> Recording::getMean(const TraceType<MemStatAccumulator>& stat)
+LLUnit<F64, LLUnits::Bytes> Recording::getMean(const TraceType<MemStatAccumulator>& stat)
 {
 	update();
 	return mBuffers->mMemStats[stat.getIndex()].mSize.getMean();
 }
 
-LLUnit<LLUnits::Bytes, F64> Recording::getMax(const TraceType<MemStatAccumulator>& stat)
+LLUnit<F64, LLUnits::Bytes> Recording::getMax(const TraceType<MemStatAccumulator>& stat)
 {
 	update();
 	return mBuffers->mMemStats[stat.getIndex()].mSize.getMax();
 }
 
-LLUnit<LLUnits::Bytes, F64> Recording::getStandardDeviation(const TraceType<MemStatAccumulator>& stat)
+LLUnit<F64, LLUnits::Bytes> Recording::getStandardDeviation(const TraceType<MemStatAccumulator>& stat)
 {
 	update();
 	return mBuffers->mMemStats[stat.getIndex()].mSize.getStandardDeviation();
 }
 
-LLUnit<LLUnits::Bytes, F64> Recording::getLastValue(const TraceType<MemStatAccumulator>& stat)
+LLUnit<F64, LLUnits::Bytes> Recording::getLastValue(const TraceType<MemStatAccumulator>& stat)
 {
 	update();
 	return mBuffers->mMemStats[stat.getIndex()].mSize.getLastValue();
 }
 
-LLUnit<LLUnits::Bytes, F64> Recording::getMin(const TraceType<MemStatAccumulator::ChildMemFacet>& stat)
+LLUnit<F64, LLUnits::Bytes> Recording::getMin(const TraceType<MemStatAccumulator::ChildMemFacet>& stat)
 {
 	update();
 	return mBuffers->mMemStats[stat.getIndex()].mChildSize.getMin();
 }
 
-LLUnit<LLUnits::Bytes, F64> Recording::getMean(const TraceType<MemStatAccumulator::ChildMemFacet>& stat)
+LLUnit<F64, LLUnits::Bytes> Recording::getMean(const TraceType<MemStatAccumulator::ChildMemFacet>& stat)
 {
 	update();
 	return mBuffers->mMemStats[stat.getIndex()].mChildSize.getMean();
 }
 
-LLUnit<LLUnits::Bytes, F64> Recording::getMax(const TraceType<MemStatAccumulator::ChildMemFacet>& stat)
+LLUnit<F64, LLUnits::Bytes> Recording::getMax(const TraceType<MemStatAccumulator::ChildMemFacet>& stat)
 {
 	update();
 	return mBuffers->mMemStats[stat.getIndex()].mChildSize.getMax();
 }
 
-LLUnit<LLUnits::Bytes, F64> Recording::getStandardDeviation(const TraceType<MemStatAccumulator::ChildMemFacet>& stat)
+LLUnit<F64, LLUnits::Bytes> Recording::getStandardDeviation(const TraceType<MemStatAccumulator::ChildMemFacet>& stat)
 {
 	update();
 	return mBuffers->mMemStats[stat.getIndex()].mChildSize.getStandardDeviation();
 }
 
-LLUnit<LLUnits::Bytes, F64> Recording::getLastValue(const TraceType<MemStatAccumulator::ChildMemFacet>& stat)
+LLUnit<F64, LLUnits::Bytes> Recording::getLastValue(const TraceType<MemStatAccumulator::ChildMemFacet>& stat)
 {
 	update();
 	return mBuffers->mMemStats[stat.getIndex()].mChildSize.getLastValue();
@@ -341,7 +333,7 @@ F64 Recording::getPerSec( const TraceType<CountAccumulator>& stat )
 	update();
 	F64 sum = mBuffers->mCounts[stat.getIndex()].getSum();
 	return  (sum != 0.0) 
-		? (sum / mElapsedSeconds)
+		? (sum / mElapsedSeconds.value())
 		: 0.0;
 }
 
@@ -430,6 +422,7 @@ U32 Recording::getSampleCount( const TraceType<EventAccumulator>& stat )
 PeriodicRecording::PeriodicRecording( U32 num_periods, EPlayState state) 
 :	mAutoResize(num_periods == 0),
 	mCurPeriod(0),
+	mNumPeriods(0),
 	mRecordingPeriods(num_periods ? num_periods : 1)
 {
 	setPlayState(state);
@@ -443,9 +436,20 @@ void PeriodicRecording::nextPeriod()
 	}
 
 	Recording& old_recording = getCurRecording();
-
 	mCurPeriod = (mCurPeriod + 1) % mRecordingPeriods.size();
 	old_recording.splitTo(getCurRecording());
+
+	mNumPeriods = llmin(mRecordingPeriods.size(), mNumPeriods + 1);
+}
+
+void PeriodicRecording::appendRecording(Recording& recording)
+{
+	// if I have a recording of any length, then close it off and start a fresh one
+	if (getCurRecording().getDuration().value())
+	{
+		nextPeriod();
+	}
+	getCurRecording().appendRecording(recording);
 }
 
 
@@ -453,77 +457,77 @@ void PeriodicRecording::appendPeriodicRecording( PeriodicRecording& other )
 {
 	if (other.mRecordingPeriods.empty()) return;
 
-	EPlayState play_state = getPlayState();
-	pause();
-
-	EPlayState other_play_state = other.getPlayState();
-	other.pause();
-
-	U32 other_recording_count = other.mRecordingPeriods.size();
-
-	Recording& other_oldest_recording = other.mRecordingPeriods[(other.mCurPeriod + 1) % other.mRecordingPeriods.size()];
+	getCurRecording().update();
+	other.getCurRecording().update();
 
 	// if I have a recording of any length, then close it off and start a fresh one
 	if (getCurRecording().getDuration().value())
 	{
 		nextPeriod();
 	}
-	getCurRecording().appendRecording(other_oldest_recording);
 
-	if (other_recording_count > 1)
+	if (mAutoResize)
 	{
-		if (mAutoResize)
+		S32 other_index = (other.mCurPeriod + 1) % other.mRecordingPeriods.size();
+		S32 end_index = (other.mCurPeriod) % other.mRecordingPeriods.size(); 
+
+		do
 		{
-			for (S32 other_index = (other.mCurPeriod + 2) % other_recording_count,
-				end_index = (other.mCurPeriod + 1) % other_recording_count; 
-				other_index != end_index; 
-				other_index = (other_index + 1) % other_recording_count)
+			if (other.mRecordingPeriods[other_index].getDuration().value())
 			{
-				llassert(other.mRecordingPeriods[other_index].getDuration() != 0.f 
-							&& (mRecordingPeriods.empty() 
-								|| other.mRecordingPeriods[other_index].getDuration() != mRecordingPeriods.back().getDuration()));
 				mRecordingPeriods.push_back(other.mRecordingPeriods[other_index]);
 			}
-
-			mCurPeriod = mRecordingPeriods.size() - 1;
+			other_index = (other_index + 1) % other.mRecordingPeriods.size();
 		}
-		else
+		while(other_index != end_index);
+
+		mCurPeriod = mRecordingPeriods.size() - 1;
+		mNumPeriods = mRecordingPeriods.size();
+	}
+	else
+	{
+		//FIXME: get proper number of recordings from other...might not have used all its slots
+		size_t num_to_copy = llmin(	mRecordingPeriods.size(), other.getNumRecordedPeriods());
+		std::vector<Recording>::iterator src_it = other.mRecordingPeriods.begin() 
+													+ (	(other.mCurPeriod + 1									// oldest period
+															+ (other.mRecordingPeriods.size() - num_to_copy))	// minus room for copy
+														% other.mRecordingPeriods.size());
+		std::vector<Recording>::iterator dest_it = mRecordingPeriods.begin() + mCurPeriod;
+
+		for(size_t i = 0; i < num_to_copy; i++)
 		{
-			size_t num_to_copy = llmin(	mRecordingPeriods.size(), other.mRecordingPeriods.size() - 1);
-			std::vector<Recording>::iterator src_it = other.mRecordingPeriods.begin() 
-														+ (	(other.mCurPeriod + 1									// oldest period
-																+ (other.mRecordingPeriods.size() - num_to_copy))	// minus room for copy
-															% other.mRecordingPeriods.size());
-			std::vector<Recording>::iterator dest_it = mRecordingPeriods.begin() + ((mCurPeriod + 1) % mRecordingPeriods.size());
-
-			for(S32 i = 0; i < num_to_copy; i++)
-			{
-				*dest_it = *src_it;
+			*dest_it = *src_it;
 
-				if (++src_it == other.mRecordingPeriods.end())
-				{
-					src_it = other.mRecordingPeriods.begin();
-				}
+			if (++src_it == other.mRecordingPeriods.end())
+			{
+				src_it = other.mRecordingPeriods.begin();
+			}
 
-				if (++dest_it == mRecordingPeriods.end())
-				{
-					dest_it = mRecordingPeriods.begin();
-				}
+			if (++dest_it == mRecordingPeriods.end())
+			{
+				dest_it = mRecordingPeriods.begin();
 			}
-		
-			mCurPeriod = (mCurPeriod + num_to_copy) % mRecordingPeriods.size();
 		}
+		
+		// want argument to % to be positive, otherwise result could be negative and thus out of bounds
+		llassert(num_to_copy >= 1);
+		// advance to last recording period copied, so we can check if the last period had actually carried any data, in which case we'll advance below
+		// using nextPeriod() which retains continuity (mLastValue, etc)
+		mCurPeriod = (mCurPeriod + num_to_copy - 1) % mRecordingPeriods.size();
+		mNumPeriods = llmin(mRecordingPeriods.size(), mNumPeriods + num_to_copy);
 	}
 
-	nextPeriod();
-
-	setPlayState(play_state);
-	other.setPlayState(other_play_state);
+	if (getCurRecording().getDuration().value())
+	{
+		//call this to chain last period copied to new active period
+		nextPeriod();
+	}
+	getCurRecording().setPlayState(getPlayState());
 }
 
-LLUnit<LLUnits::Seconds, F64> PeriodicRecording::getDuration() const
+LLUnit<F64, LLUnits::Seconds> PeriodicRecording::getDuration() const
 {
-	LLUnit<LLUnits::Seconds, F64> duration;
+	LLUnit<F64, LLUnits::Seconds> duration;
 	size_t num_periods = mRecordingPeriods.size();
 	for (size_t i = 1; i <= num_periods; i++)
 	{
@@ -615,7 +619,7 @@ void PeriodicRecording::handleSplitTo(PeriodicRecording& other)
 F64 PeriodicRecording::getPeriodMean( const TraceType<EventAccumulator>& stat, size_t num_periods /*= U32_MAX*/ )
 {
 	size_t total_periods = mRecordingPeriods.size();
-	num_periods = llmin(num_periods, total_periods);
+	num_periods = llmin(num_periods, isStarted() ? total_periods - 1 : total_periods);
 
 	F64 mean = 0;
 	if (num_periods <= 0) { return mean; }
@@ -643,7 +647,7 @@ F64 PeriodicRecording::getPeriodMean( const TraceType<EventAccumulator>& stat, s
 F64 PeriodicRecording::getPeriodMin( const TraceType<EventAccumulator>& stat, size_t num_periods /*= U32_MAX*/ )
 {
 	size_t total_periods = mRecordingPeriods.size();
-	num_periods = llmin(num_periods, total_periods);
+	num_periods = llmin(num_periods, isStarted() ? total_periods - 1 : total_periods);
 
 	F64 min_val = std::numeric_limits<F64>::max();
 	for (S32 i = 1; i <= num_periods; i++)
@@ -657,7 +661,7 @@ F64 PeriodicRecording::getPeriodMin( const TraceType<EventAccumulator>& stat, si
 F64 PeriodicRecording::getPeriodMax( const TraceType<EventAccumulator>& stat, size_t num_periods /*= U32_MAX*/ )
 {
 	size_t total_periods = mRecordingPeriods.size();
-	num_periods = llmin(num_periods, total_periods);
+	num_periods = llmin(num_periods, isStarted() ? total_periods - 1 : total_periods);
 
 	F64 max_val = std::numeric_limits<F64>::min();
 	for (S32 i = 1; i <= num_periods; i++)
@@ -671,7 +675,7 @@ F64 PeriodicRecording::getPeriodMax( const TraceType<EventAccumulator>& stat, si
 F64 PeriodicRecording::getPeriodMin( const TraceType<SampleAccumulator>& stat, size_t num_periods /*= U32_MAX*/ )
 {
 	size_t total_periods = mRecordingPeriods.size();
-	num_periods = llmin(num_periods, total_periods);
+	num_periods = llmin(num_periods, isStarted() ? total_periods - 1 : total_periods);
 
 	F64 min_val = std::numeric_limits<F64>::max();
 	for (S32 i = 1; i <= num_periods; i++)
@@ -685,7 +689,7 @@ F64 PeriodicRecording::getPeriodMin( const TraceType<SampleAccumulator>& stat, s
 F64 PeriodicRecording::getPeriodMax(const TraceType<SampleAccumulator>& stat, size_t num_periods /*= U32_MAX*/)
 {
 	size_t total_periods = mRecordingPeriods.size();
-	num_periods = llmin(num_periods, total_periods);
+	num_periods = llmin(num_periods, isStarted() ? total_periods - 1 : total_periods);
 
 	F64 max_val = std::numeric_limits<F64>::min();
 	for (S32 i = 1; i <= num_periods; i++)
@@ -700,9 +704,9 @@ F64 PeriodicRecording::getPeriodMax(const TraceType<SampleAccumulator>& stat, si
 F64 PeriodicRecording::getPeriodMean( const TraceType<SampleAccumulator>& stat, size_t num_periods /*= U32_MAX*/ )
 {
 	size_t total_periods = mRecordingPeriods.size();
-	num_periods = llmin(num_periods, total_periods);
+	num_periods = llmin(num_periods, isStarted() ? total_periods - 1 : total_periods);
 
-	LLUnit<LLUnits::Seconds, F64> total_duration = 0.f;
+	LLUnit<F64, LLUnits::Seconds> total_duration = 0.f;
 
 	F64 mean = 0;
 	if (num_periods <= 0) { return mean; }
@@ -712,7 +716,7 @@ F64 PeriodicRecording::getPeriodMean( const TraceType<SampleAccumulator>& stat,
 		S32 index = (mCurPeriod + total_periods - i) % total_periods;
 		if (mRecordingPeriods[index].getDuration() > 0.f)
 		{
-			LLUnit<LLUnits::Seconds, F64> recording_duration = mRecordingPeriods[index].getDuration();
+			LLUnit<F64, LLUnits::Seconds> recording_duration = mRecordingPeriods[index].getDuration();
 			mean += mRecordingPeriods[index].getMean(stat) * recording_duration.value();
 			total_duration += recording_duration;
 		}
@@ -734,13 +738,11 @@ F64 PeriodicRecording::getPeriodMean( const TraceType<SampleAccumulator>& stat,
 void ExtendableRecording::extend()
 {
 	// stop recording to get latest data
-	mPotentialRecording.stop();
+	mPotentialRecording.update();
 	// push the data back to accepted recording
 	mAcceptedRecording.appendRecording(mPotentialRecording);
 	// flush data, so we can start from scratch
 	mPotentialRecording.reset();
-	// go back to play state we were in initially
-	mPotentialRecording.setPlayState(getPlayState());
 }
 
 void ExtendableRecording::handleStart()
@@ -777,15 +779,10 @@ ExtendablePeriodicRecording::ExtendablePeriodicRecording()
 
 void ExtendablePeriodicRecording::extend()
 {
-	llassert(mPotentialRecording.getPlayState() == getPlayState());
-	// stop recording to get latest data
-	mPotentialRecording.pause();
 	// push the data back to accepted recording
 	mAcceptedRecording.appendPeriodicRecording(mPotentialRecording);
 	// flush data, so we can start from scratch
 	mPotentialRecording.reset();
-	// go back to play state we were in initially
-	mPotentialRecording.setPlayState(getPlayState());
 }
 
 
diff --git a/indra/llcommon/lltracerecording.h b/indra/llcommon/lltracerecording.h
index 4651bfcb61d7c09f58f954ea182cf29ea1c5747a..e3cef77b06251e92ce1044dbb7dfdd6c4b69436b 100644
--- a/indra/llcommon/lltracerecording.h
+++ b/indra/llcommon/lltracerecording.h
@@ -148,26 +148,26 @@ namespace LLTrace
 		void makeUnique() { mBuffers.makeUnique(); }
 
 		// Timer accessors
-		LLUnit<LLUnits::Seconds, F64> getSum(const TraceType<TimeBlockAccumulator>& stat);
-		LLUnit<LLUnits::Seconds, F64> getSum(const TraceType<TimeBlockAccumulator::SelfTimeFacet>& stat);
+		LLUnit<F64, LLUnits::Seconds> getSum(const TraceType<TimeBlockAccumulator>& stat);
+		LLUnit<F64, LLUnits::Seconds> getSum(const TraceType<TimeBlockAccumulator::SelfTimeFacet>& stat);
 		U32 getSum(const TraceType<TimeBlockAccumulator::CallCountFacet>& stat);
 
-		LLUnit<LLUnits::Seconds, F64> getPerSec(const TraceType<TimeBlockAccumulator>& stat);
-		LLUnit<LLUnits::Seconds, F64> getPerSec(const TraceType<TimeBlockAccumulator::SelfTimeFacet>& stat);
+		LLUnit<F64, LLUnits::Seconds> getPerSec(const TraceType<TimeBlockAccumulator>& stat);
+		LLUnit<F64, LLUnits::Seconds> getPerSec(const TraceType<TimeBlockAccumulator::SelfTimeFacet>& stat);
 		F32 getPerSec(const TraceType<TimeBlockAccumulator::CallCountFacet>& stat);
 
 		// Memory accessors
-		LLUnit<LLUnits::Bytes, F64> getMin(const TraceType<MemStatAccumulator>& stat);
-		LLUnit<LLUnits::Bytes, F64> getMean(const TraceType<MemStatAccumulator>& stat);
-		LLUnit<LLUnits::Bytes, F64> getMax(const TraceType<MemStatAccumulator>& stat);
-		LLUnit<LLUnits::Bytes, F64> getStandardDeviation(const TraceType<MemStatAccumulator>& stat);
-		LLUnit<LLUnits::Bytes, F64> getLastValue(const TraceType<MemStatAccumulator>& stat);
-
-		LLUnit<LLUnits::Bytes, F64> getMin(const TraceType<MemStatAccumulator::ChildMemFacet>& stat);
-		LLUnit<LLUnits::Bytes, F64> getMean(const TraceType<MemStatAccumulator::ChildMemFacet>& stat);
-		LLUnit<LLUnits::Bytes, F64> getMax(const TraceType<MemStatAccumulator::ChildMemFacet>& stat);
-		LLUnit<LLUnits::Bytes, F64> getStandardDeviation(const TraceType<MemStatAccumulator::ChildMemFacet>& stat);
-		LLUnit<LLUnits::Bytes, F64> getLastValue(const TraceType<MemStatAccumulator::ChildMemFacet>& stat);
+		LLUnit<F64, LLUnits::Bytes> getMin(const TraceType<MemStatAccumulator>& stat);
+		LLUnit<F64, LLUnits::Bytes> getMean(const TraceType<MemStatAccumulator>& stat);
+		LLUnit<F64, LLUnits::Bytes> getMax(const TraceType<MemStatAccumulator>& stat);
+		LLUnit<F64, LLUnits::Bytes> getStandardDeviation(const TraceType<MemStatAccumulator>& stat);
+		LLUnit<F64, LLUnits::Bytes> getLastValue(const TraceType<MemStatAccumulator>& stat);
+
+		LLUnit<F64, LLUnits::Bytes> getMin(const TraceType<MemStatAccumulator::ChildMemFacet>& stat);
+		LLUnit<F64, LLUnits::Bytes> getMean(const TraceType<MemStatAccumulator::ChildMemFacet>& stat);
+		LLUnit<F64, LLUnits::Bytes> getMax(const TraceType<MemStatAccumulator::ChildMemFacet>& stat);
+		LLUnit<F64, LLUnits::Bytes> getStandardDeviation(const TraceType<MemStatAccumulator::ChildMemFacet>& stat);
+		LLUnit<F64, LLUnits::Bytes> getLastValue(const TraceType<MemStatAccumulator::ChildMemFacet>& stat);
 
 		U32 getSum(const TraceType<MemStatAccumulator::AllocationCountFacet>& stat);
 		U32 getSum(const TraceType<MemStatAccumulator::DeallocationCountFacet>& stat);
@@ -273,7 +273,7 @@ namespace LLTrace
 
 		U32 getSampleCount(const TraceType<EventAccumulator>& stat);
 
-		LLUnit<LLUnits::Seconds, F64> getDuration() const { return LLUnit<LLUnits::Seconds, F64>(mElapsedSeconds); }
+		LLUnit<F64, LLUnits::Seconds> getDuration() const { return mElapsedSeconds; }
 
 	protected:
 		friend class ThreadRecorder;
@@ -288,7 +288,7 @@ namespace LLTrace
 		class ThreadRecorder* getThreadRecorder(); 
 
 		LLTimer				mSamplingTimer;
-		F64					mElapsedSeconds;
+		LLUnit<F64, LLUnits::Seconds>			mElapsedSeconds;
 		LLCopyOnWritePointer<RecordingBuffers>	mBuffers;
 	};
 
@@ -299,11 +299,12 @@ namespace LLTrace
 		PeriodicRecording(U32 num_periods, EPlayState state = STOPPED);
 
 		void nextPeriod();
-		U32 getNumPeriods() { return mRecordingPeriods.size(); }
+		size_t getNumRecordedPeriods() { return mNumPeriods; }
 
-		LLUnit<LLUnits::Seconds, F64> getDuration() const;
+		LLUnit<F64, LLUnits::Seconds> getDuration() const;
 
 		void appendPeriodicRecording(PeriodicRecording& other);
+		void appendRecording(Recording& recording);
 		Recording& getLastRecording();
 		const Recording& getLastRecording() const;
 		Recording& getCurRecording();
@@ -317,7 +318,7 @@ namespace LLTrace
 		typename T::value_t getPeriodMin(const TraceType<T>& stat, size_t num_periods = U32_MAX)
 		{
 			size_t total_periods = mRecordingPeriods.size();
-			num_periods = llmin(num_periods, total_periods);
+			num_periods = llmin(num_periods, isStarted() ? total_periods - 1 : total_periods);
 
 			typename T::value_t min_val = std::numeric_limits<typename T::value_t>::max();
 			for (S32 i = 1; i <= num_periods; i++)
@@ -346,7 +347,7 @@ namespace LLTrace
 		F64 getPeriodMinPerSec(const TraceType<T>& stat, size_t num_periods = U32_MAX)
 		{
 			size_t total_periods = mRecordingPeriods.size();
-			num_periods = llmin(num_periods, total_periods);
+			num_periods = llmin(num_periods, isStarted() ? total_periods - 1 : total_periods);
 
 			F64 min_val = std::numeric_limits<F64>::max();
 			for (S32 i = 1; i <= num_periods; i++)
@@ -362,7 +363,7 @@ namespace LLTrace
 		typename T::value_t getPeriodMax(const TraceType<T>& stat, size_t num_periods = U32_MAX)
 		{
 			size_t total_periods = mRecordingPeriods.size();
-			num_periods = llmin(num_periods, total_periods);
+			num_periods = llmin(num_periods, isStarted() ? total_periods - 1 : total_periods);
 
 			typename T::value_t max_val = std::numeric_limits<typename T::value_t>::min();
 			for (S32 i = 1; i <= num_periods; i++)
@@ -391,7 +392,7 @@ namespace LLTrace
 		F64 getPeriodMaxPerSec(const TraceType<T>& stat, size_t num_periods = U32_MAX)
 		{
 			size_t total_periods = mRecordingPeriods.size();
-			num_periods = llmin(num_periods, total_periods);
+			num_periods = llmin(num_periods, isStarted() ? total_periods - 1 : total_periods);
 
 			F64 max_val = std::numeric_limits<F64>::min();
 			for (S32 i = 1; i <= num_periods; i++)
@@ -407,7 +408,7 @@ namespace LLTrace
 		typename T::mean_t getPeriodMean(const TraceType<T >& stat, size_t num_periods = U32_MAX)
 		{
 			size_t total_periods = mRecordingPeriods.size();
-			num_periods = llmin(num_periods, total_periods);
+			num_periods = llmin(num_periods, isStarted() ? total_periods - 1 : total_periods);
 
 			typename T::mean_t mean = 0;
 			if (num_periods <= 0) { return mean; }
@@ -442,7 +443,7 @@ namespace LLTrace
 		typename T::mean_t getPeriodMeanPerSec(const TraceType<T>& stat, size_t num_periods = U32_MAX)
 		{
 			size_t total_periods = mRecordingPeriods.size();
-			num_periods = llmin(num_periods, total_periods);
+			num_periods = llmin(num_periods, isStarted() ? total_periods - 1 : total_periods);
 
 			typename T::mean_t mean = 0;
 			if (num_periods <= 0) { return mean; }
@@ -468,8 +469,9 @@ namespace LLTrace
 
 	private:
 		std::vector<Recording>	mRecordingPeriods;
-		const bool	mAutoResize;
-		S32			mCurPeriod;
+		const bool				mAutoResize;
+		size_t					mCurPeriod;
+		size_t					mNumPeriods;
 	};
 
 	PeriodicRecording& get_frame_recording();
diff --git a/indra/llcommon/llunit.h b/indra/llcommon/llunit.h
index f48cbe0e11ffeb29edc3c81f011978e0a0a4f4c9..5b961c81f068c3fc3ed49da3bf94b28e2245fa4a 100644
--- a/indra/llcommon/llunit.h
+++ b/indra/llcommon/llunit.h
@@ -35,31 +35,31 @@ namespace LLUnits
 {
 
 template<typename DERIVED_UNITS_TAG, typename BASE_UNITS_TAG, typename VALUE_TYPE>
-struct ConversionFactor
+struct Convert
 {
-	static F64 get()
+	static VALUE_TYPE get(VALUE_TYPE val)
 	{
 		// spurious use of dependent type to stop gcc from triggering the static assertion before instantiating the template
 		llstatic_assert_template(DERIVED_UNITS_TAG, false,  "Cannot convert between types.");
-        return 0;
+        return val;
 	}
 };
 
 template<typename BASE_UNITS_TAG, typename VALUE_TYPE>
-struct ConversionFactor<BASE_UNITS_TAG, BASE_UNITS_TAG, VALUE_TYPE>
+struct Convert<BASE_UNITS_TAG, BASE_UNITS_TAG, VALUE_TYPE>
 {
-	static F64 get() 
+	static VALUE_TYPE get(VALUE_TYPE val)
 	{ 
-		return 1; 
+		return val; 
 	}
 };
 
 }
 
-template<typename UNIT_TYPE, typename STORAGE_TYPE>
+template<typename STORAGE_TYPE, typename UNIT_TYPE>
 struct LLUnit
 {
-	typedef LLUnit<UNIT_TYPE, STORAGE_TYPE> self_t;
+	typedef LLUnit<STORAGE_TYPE, UNIT_TYPE> self_t;
 	typedef STORAGE_TYPE storage_t;
 
 	// value initialization
@@ -68,11 +68,16 @@ struct LLUnit
 	{}
 
 	// unit initialization and conversion
-	template<typename OTHER_UNIT, typename OTHER_STORAGE>
-	LLUnit(LLUnit<OTHER_UNIT, OTHER_STORAGE> other)
+	template<typename OTHER_STORAGE, typename OTHER_UNIT>
+	LLUnit(LLUnit<OTHER_STORAGE, OTHER_UNIT> other)
 	:	mValue(convert(other))
 	{}
 	
+	bool operator == (const self_t& other)
+	{
+		return mValue = other.mValue;
+	}
+
 	// value assignment
 	self_t& operator = (storage_t value)
 	{
@@ -81,8 +86,8 @@ struct LLUnit
 	}
 
 	// unit assignment
-	template<typename OTHER_UNIT, typename OTHER_STORAGE>
-	self_t& operator = (LLUnit<OTHER_UNIT, OTHER_STORAGE> other)
+	template<typename OTHER_STORAGE, typename OTHER_UNIT>
+	self_t& operator = (LLUnit<OTHER_STORAGE, OTHER_UNIT> other)
 	{
 		mValue = convert(other);
 		return *this;
@@ -93,9 +98,9 @@ struct LLUnit
 		return mValue;
 	}
 
-	template<typename NEW_UNIT_TYPE> LLUnit<NEW_UNIT_TYPE, STORAGE_TYPE> as()
+	template<typename NEW_UNIT_TYPE> LLUnit<STORAGE_TYPE, NEW_UNIT_TYPE> as()
 	{
-		return LLUnit<NEW_UNIT_TYPE, STORAGE_TYPE>(*this);
+		return LLUnit<STORAGE_TYPE, NEW_UNIT_TYPE>(*this);
 	}
 
 
@@ -104,8 +109,8 @@ struct LLUnit
 		mValue += value;
 	}
 
-	template<typename OTHER_UNIT, typename OTHER_STORAGE>
-	void operator += (LLUnit<OTHER_UNIT, OTHER_STORAGE> other)
+	template<typename OTHER_STORAGE, typename OTHER_UNIT>
+	void operator += (LLUnit<OTHER_STORAGE, OTHER_UNIT> other)
 	{
 		mValue += convert(other);
 	}
@@ -115,8 +120,8 @@ struct LLUnit
 		mValue -= value;
 	}
 
-	template<typename OTHER_UNIT, typename OTHER_STORAGE>
-	void operator -= (LLUnit<OTHER_UNIT, OTHER_STORAGE> other)
+	template<typename OTHER_STORAGE, typename OTHER_UNIT>
+	void operator -= (LLUnit<OTHER_STORAGE, OTHER_UNIT> other)
 	{
 		mValue -= convert(other);
 	}
@@ -127,7 +132,7 @@ struct LLUnit
 	}
 
 	template<typename OTHER_UNIT, typename OTHER_STORAGE>
-	void operator *= (LLUnit<OTHER_UNIT, OTHER_STORAGE> multiplicand)
+	void operator *= (LLUnit<OTHER_STORAGE, OTHER_UNIT> multiplicand)
 	{
 		// spurious use of dependent type to stop gcc from triggering the static assertion before instantiating the template
 		llstatic_assert_template(OTHER_UNIT, false, "Multiplication of unit types not supported.");
@@ -139,37 +144,43 @@ struct LLUnit
 	}
 
 	template<typename OTHER_UNIT, typename OTHER_STORAGE>
-	void operator /= (LLUnit<OTHER_UNIT, OTHER_STORAGE> divisor)
+	void operator /= (LLUnit<OTHER_STORAGE, OTHER_UNIT> divisor)
 	{
 		// spurious use of dependent type to stop gcc from triggering the static assertion before instantiating the template
 		llstatic_assert_template(OTHER_UNIT, false, "Illegal in-place division of unit types.");
 	}
 
-	template<typename SOURCE_UNITS, typename SOURCE_STORAGE>
-	static storage_t convert(LLUnit<SOURCE_UNITS, SOURCE_STORAGE> v) 
+	template<typename SOURCE_STORAGE, typename SOURCE_UNITS>
+	static storage_t convert(LLUnit<SOURCE_STORAGE, SOURCE_UNITS> v) 
 	{ 
-		return (storage_t)(v.value() 
-			* LLUnits::ConversionFactor<SOURCE_UNITS, typename UNIT_TYPE::base_unit_t, SOURCE_STORAGE>::get() 
-			* LLUnits::ConversionFactor<typename UNIT_TYPE::base_unit_t, UNIT_TYPE, STORAGE_TYPE>::get()); 
+		return (storage_t)LLUnits::Convert<typename UNIT_TYPE::base_unit_t, UNIT_TYPE, STORAGE_TYPE>::get((STORAGE_TYPE)
+							LLUnits::Convert<SOURCE_UNITS, typename UNIT_TYPE::base_unit_t, SOURCE_STORAGE>::get(v.value())); 
 	}
 
+	template<typename SOURCE_STORAGE>
+	static storage_t convert(LLUnit<SOURCE_STORAGE, UNIT_TYPE> v) 
+	{ 
+		return (storage_t)(v.value());
+	}
+
+
 protected:
 	storage_t mValue;
 };
 
-template<typename UNIT_TYPE, typename STORAGE_TYPE>
-struct LLUnitImplicit : public LLUnit<UNIT_TYPE, STORAGE_TYPE>
+template<typename STORAGE_TYPE, typename UNIT_TYPE>
+struct LLUnitImplicit : public LLUnit<STORAGE_TYPE, UNIT_TYPE>
 {
-	typedef LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> self_t;
-	typedef typename LLUnit<UNIT_TYPE, STORAGE_TYPE>::storage_t storage_t;
-	typedef LLUnit<UNIT_TYPE, STORAGE_TYPE> base_t;
+	typedef LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> self_t;
+	typedef typename LLUnit<STORAGE_TYPE, UNIT_TYPE>::storage_t storage_t;
+	typedef LLUnit<STORAGE_TYPE, UNIT_TYPE> base_t;
 
 	LLUnitImplicit(storage_t value = storage_t())
 	:	base_t(value)
 	{}
 
-	template<typename OTHER_UNIT, typename OTHER_STORAGE>
-	LLUnitImplicit(LLUnit<OTHER_UNIT, OTHER_STORAGE> other)
+	template<typename OTHER_STORAGE, typename OTHER_UNIT>
+	LLUnitImplicit(LLUnit<OTHER_STORAGE, OTHER_UNIT> other)
 	:	base_t(convert(other))
 	{}
 
@@ -184,50 +195,50 @@ struct LLUnitImplicit : public LLUnit<UNIT_TYPE, STORAGE_TYPE>
 //
 // operator +
 //
-template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>
-LLUnit<UNIT_TYPE1, STORAGE_TYPE1> operator + (LLUnit<UNIT_TYPE1, STORAGE_TYPE1> first, LLUnit<UNIT_TYPE2, STORAGE_TYPE2> second)
+template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>
+LLUnit<STORAGE_TYPE1, UNIT_TYPE1> operator + (LLUnit<STORAGE_TYPE1, UNIT_TYPE1> first, LLUnit<STORAGE_TYPE2, UNIT_TYPE2> second)
 {
-	LLUnit<UNIT_TYPE1, STORAGE_TYPE1> result(first);
+	LLUnit<STORAGE_TYPE1, UNIT_TYPE1> result(first);
 	result += second;
 	return result;
 }
 
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
-LLUnit<UNIT_TYPE, STORAGE_TYPE> operator + (LLUnit<UNIT_TYPE, STORAGE_TYPE> first, SCALAR_TYPE second)
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
+LLUnit<STORAGE_TYPE, UNIT_TYPE> operator + (LLUnit<STORAGE_TYPE, UNIT_TYPE> first, SCALAR_TYPE second)
 {
-	LLUnit<UNIT_TYPE, STORAGE_TYPE> result(first);
+	LLUnit<STORAGE_TYPE, UNIT_TYPE> result(first);
 	result += second;
 	return result;
 }
 
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
-LLUnit<UNIT_TYPE, STORAGE_TYPE> operator + (SCALAR_TYPE first, LLUnit<UNIT_TYPE, STORAGE_TYPE> second)
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
+LLUnit<STORAGE_TYPE, UNIT_TYPE> operator + (SCALAR_TYPE first, LLUnit<STORAGE_TYPE, UNIT_TYPE> second)
 {
-	LLUnit<UNIT_TYPE, STORAGE_TYPE> result(first);
+	LLUnit<STORAGE_TYPE, UNIT_TYPE> result(first);
 	result += second;
 	return result;
 }
 
-template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>
-LLUnitImplicit<UNIT_TYPE1, STORAGE_TYPE1> operator + (LLUnitImplicit<UNIT_TYPE1, STORAGE_TYPE1> first, LLUnit<UNIT_TYPE2, STORAGE_TYPE2> second)
+template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>
+LLUnitImplicit<STORAGE_TYPE1, UNIT_TYPE1> operator + (LLUnitImplicit<STORAGE_TYPE1, UNIT_TYPE1> first, LLUnit<STORAGE_TYPE2, UNIT_TYPE2> second)
 {
-	LLUnitImplicit<UNIT_TYPE1, STORAGE_TYPE1> result(first);
+	LLUnitImplicit<STORAGE_TYPE1, UNIT_TYPE1> result(first);
 	result += second;
 	return result;
 }
 
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
-LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> operator + (LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> first, SCALAR_TYPE second)
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
+LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> operator + (LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> first, SCALAR_TYPE second)
 {
-	LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> result(first);
+	LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> result(first);
 	result += second;
 	return result;
 }
 
-template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>
-LLUnitImplicit<UNIT_TYPE1, STORAGE_TYPE1> operator + (LLUnitImplicit<UNIT_TYPE1, STORAGE_TYPE1> first, LLUnitImplicit<UNIT_TYPE2, STORAGE_TYPE2> second)
+template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>
+LLUnitImplicit<STORAGE_TYPE1, UNIT_TYPE1> operator + (LLUnitImplicit<STORAGE_TYPE1, UNIT_TYPE1> first, LLUnitImplicit<STORAGE_TYPE2, UNIT_TYPE2> second)
 {
-	LLUnitImplicit<UNIT_TYPE1, STORAGE_TYPE1> result(first);
+	LLUnitImplicit<STORAGE_TYPE1, UNIT_TYPE1> result(first);
 	result += second;
 	return result;
 }
@@ -235,50 +246,50 @@ LLUnitImplicit<UNIT_TYPE1, STORAGE_TYPE1> operator + (LLUnitImplicit<UNIT_TYPE1,
 //
 // operator -
 //
-template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>
-LLUnit<UNIT_TYPE1, STORAGE_TYPE1> operator - (LLUnit<UNIT_TYPE1, STORAGE_TYPE1> first, LLUnit<UNIT_TYPE2, STORAGE_TYPE2> second)
+template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>
+LLUnit<STORAGE_TYPE1, UNIT_TYPE1> operator - (LLUnit<STORAGE_TYPE1, UNIT_TYPE1> first, LLUnit<STORAGE_TYPE2, UNIT_TYPE2> second)
 {
-	LLUnit<UNIT_TYPE1, STORAGE_TYPE1> result(first);
+	LLUnit<STORAGE_TYPE1, UNIT_TYPE1> result(first);
 	result -= second;
 	return result;
 }
 
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
-LLUnit<UNIT_TYPE, STORAGE_TYPE> operator - (LLUnit<UNIT_TYPE, STORAGE_TYPE> first, SCALAR_TYPE second)
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
+LLUnit<STORAGE_TYPE, UNIT_TYPE> operator - (LLUnit<STORAGE_TYPE, UNIT_TYPE> first, SCALAR_TYPE second)
 {
-	LLUnit<UNIT_TYPE, STORAGE_TYPE> result(first);
+	LLUnit<STORAGE_TYPE, UNIT_TYPE> result(first);
 	result -= second;
 	return result;
 }
 
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
-LLUnit<UNIT_TYPE, STORAGE_TYPE> operator - (SCALAR_TYPE first, LLUnit<UNIT_TYPE, STORAGE_TYPE> second)
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
+LLUnit<STORAGE_TYPE, UNIT_TYPE> operator - (SCALAR_TYPE first, LLUnit<STORAGE_TYPE, UNIT_TYPE> second)
 {
-	LLUnit<UNIT_TYPE, STORAGE_TYPE> result(first);
+	LLUnit<STORAGE_TYPE, UNIT_TYPE> result(first);
 	result -= second;
 	return result;
 }
 
-template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>
-LLUnitImplicit<UNIT_TYPE1, STORAGE_TYPE1> operator - (LLUnitImplicit<UNIT_TYPE1, STORAGE_TYPE1> first, LLUnitImplicit<UNIT_TYPE2, STORAGE_TYPE2> second)
+template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>
+LLUnitImplicit<STORAGE_TYPE1, UNIT_TYPE1> operator - (LLUnitImplicit<STORAGE_TYPE1, UNIT_TYPE1> first, LLUnitImplicit<STORAGE_TYPE2, UNIT_TYPE2> second)
 {
-	LLUnitImplicit<UNIT_TYPE1, STORAGE_TYPE1> result(first);
+	LLUnitImplicit<STORAGE_TYPE1, UNIT_TYPE1> result(first);
 	result -= second;
 	return result;
 }
 
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
-LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> operator - (LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> first, SCALAR_TYPE second)
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
+LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> operator - (LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> first, SCALAR_TYPE second)
 {
-	LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> result(first);
+	LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> result(first);
 	result -= second;
 	return result;
 }
 
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
-LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> operator - (SCALAR_TYPE first, LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> second)
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
+LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> operator - (SCALAR_TYPE first, LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> second)
 {
-	LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> result(first);
+	LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> result(first);
 	result -= second;
 	return result;
 }
@@ -286,102 +297,100 @@ LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> operator - (SCALAR_TYPE first, LLUnitImp
 //
 // operator *
 //
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
-LLUnit<UNIT_TYPE, STORAGE_TYPE> operator * (SCALAR_TYPE first, LLUnit<UNIT_TYPE, STORAGE_TYPE> second)
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
+LLUnit<STORAGE_TYPE, UNIT_TYPE> operator * (SCALAR_TYPE first, LLUnit<STORAGE_TYPE, UNIT_TYPE> second)
 {
-	return LLUnit<UNIT_TYPE, STORAGE_TYPE>((STORAGE_TYPE)(first * second.value()));
+	return LLUnit<STORAGE_TYPE, UNIT_TYPE>((STORAGE_TYPE)(first * second.value()));
 }
 
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
-LLUnit<UNIT_TYPE, STORAGE_TYPE> operator * (LLUnit<UNIT_TYPE, STORAGE_TYPE> first, SCALAR_TYPE second)
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
+LLUnit<STORAGE_TYPE, UNIT_TYPE> operator * (LLUnit<STORAGE_TYPE, UNIT_TYPE> first, SCALAR_TYPE second)
 {
-	return LLUnit<UNIT_TYPE, STORAGE_TYPE>((STORAGE_TYPE)(first.value() * second));
+	return LLUnit<STORAGE_TYPE, UNIT_TYPE>((STORAGE_TYPE)(first.value() * second));
 }
 
-template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>
-LLUnit<UNIT_TYPE1, STORAGE_TYPE1> operator * (LLUnit<UNIT_TYPE1, STORAGE_TYPE1>, LLUnit<UNIT_TYPE2, STORAGE_TYPE2>)
+template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>
+LLUnit<STORAGE_TYPE1, UNIT_TYPE1> operator * (LLUnit<STORAGE_TYPE1, UNIT_TYPE1>, LLUnit<STORAGE_TYPE2, UNIT_TYPE2>)
 {
 	// spurious use of dependent type to stop gcc from triggering the static assertion before instantiating the template
 	llstatic_assert_template(STORAGE_TYPE1, false, "Multiplication of unit types results in new unit type - not supported.");
-	return LLUnit<UNIT_TYPE1, STORAGE_TYPE1>();
+	return LLUnit<STORAGE_TYPE1, UNIT_TYPE1>();
 }
 
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
-LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> operator * (SCALAR_TYPE first, LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> second)
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
+LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> operator * (SCALAR_TYPE first, LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> second)
 {
-	return LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE>(first * second.value());
+	return LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE>(first * second.value());
 }
 
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
-LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> operator * (LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> first, SCALAR_TYPE second)
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
+LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> operator * (LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> first, SCALAR_TYPE second)
 {
-	return LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE>(first.value() * second);
+	return LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE>(first.value() * second);
 }
 
-template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>
-LLUnitImplicit<UNIT_TYPE1, STORAGE_TYPE1> operator * (LLUnitImplicit<UNIT_TYPE1, STORAGE_TYPE1>, LLUnitImplicit<UNIT_TYPE2, STORAGE_TYPE2>)
+template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>
+LLUnitImplicit<STORAGE_TYPE1, UNIT_TYPE1> operator * (LLUnitImplicit<STORAGE_TYPE1, UNIT_TYPE1>, LLUnitImplicit<STORAGE_TYPE2, UNIT_TYPE2>)
 {
 	// spurious use of dependent type to stop gcc from triggering the static assertion before instantiating the template
 	llstatic_assert_template(STORAGE_TYPE1, false, "Multiplication of unit types results in new unit type - not supported.");
-	return LLUnitImplicit<UNIT_TYPE1, STORAGE_TYPE1>();
+	return LLUnitImplicit<STORAGE_TYPE1, UNIT_TYPE1>();
 }
 
 //
 // operator /
 //
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
-SCALAR_TYPE operator / (SCALAR_TYPE first, LLUnit<UNIT_TYPE, STORAGE_TYPE> second)
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
+SCALAR_TYPE operator / (SCALAR_TYPE first, LLUnit<STORAGE_TYPE, UNIT_TYPE> second)
 {
 	return SCALAR_TYPE(first / second.value());
 }
 
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
-LLUnit<UNIT_TYPE, STORAGE_TYPE> operator / (LLUnit<UNIT_TYPE, STORAGE_TYPE> first, SCALAR_TYPE second)
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
+LLUnit<STORAGE_TYPE, UNIT_TYPE> operator / (LLUnit<STORAGE_TYPE, UNIT_TYPE> first, SCALAR_TYPE second)
 {
-	return LLUnit<UNIT_TYPE, STORAGE_TYPE>((STORAGE_TYPE)(first.value() / second));
+	return LLUnit<STORAGE_TYPE, UNIT_TYPE>((STORAGE_TYPE)(first.value() / second));
 }
 
-template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>
-STORAGE_TYPE1 operator / (LLUnit<UNIT_TYPE1, STORAGE_TYPE1> first, LLUnit<UNIT_TYPE2, STORAGE_TYPE2> second)
+template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>
+STORAGE_TYPE1 operator / (LLUnit<STORAGE_TYPE1, UNIT_TYPE1> first, LLUnit<STORAGE_TYPE2, UNIT_TYPE2> second)
 {
-	// spurious use of dependent type to stop gcc from triggering the static assertion before instantiating the template
-	return STORAGE_TYPE1(first.value() / second.value());
+	return STORAGE_TYPE1(first.value() / first.convert(second));
 }
 
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>
-LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> operator / (LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> first, SCALAR_TYPE second)
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>
+LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> operator / (LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> first, SCALAR_TYPE second)
 {
-	return LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE>((STORAGE_TYPE)(first.value() / second));
+	return LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE>((STORAGE_TYPE)(first.value() / second));
 }
 
-template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>
-STORAGE_TYPE1 operator / (LLUnitImplicit<UNIT_TYPE1, STORAGE_TYPE1> first, LLUnitImplicit<UNIT_TYPE2, STORAGE_TYPE2> second)
+template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>
+STORAGE_TYPE1 operator / (LLUnitImplicit<STORAGE_TYPE1, UNIT_TYPE1> first, LLUnitImplicit<STORAGE_TYPE2, UNIT_TYPE2> second)
 {
-	// spurious use of dependent type to stop gcc from triggering the static assertion before instantiating the template
-	return STORAGE_TYPE1(first.value() / second.value());
+	return STORAGE_TYPE1(first.value() / first.convert(second));
 }
 
 #define COMPARISON_OPERATORS(op)                                                                                     \
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>                                            \
-bool operator op (SCALAR_TYPE first, LLUnit<UNIT_TYPE, STORAGE_TYPE> second)                                         \
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>                                            \
+bool operator op (SCALAR_TYPE first, LLUnit<STORAGE_TYPE, UNIT_TYPE> second)                                         \
 {                                                                                                                    \
 	return first op second.value();                                                                                  \
 }                                                                                                                    \
 	                                                                                                                 \
-template<typename UNIT_TYPE, typename STORAGE_TYPE, typename SCALAR_TYPE>                                            \
-bool operator op (LLUnit<UNIT_TYPE, STORAGE_TYPE> first, SCALAR_TYPE second)                                         \
+template<typename STORAGE_TYPE, typename UNIT_TYPE, typename SCALAR_TYPE>                                            \
+bool operator op (LLUnit<STORAGE_TYPE, UNIT_TYPE> first, SCALAR_TYPE second)                                         \
 {                                                                                                                    \
 	return first.value() op second;                                                                                  \
 }                                                                                                                    \
 	                                                                                                                 \
-template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>                   \
-bool operator op (LLUnitImplicit<UNIT_TYPE1, STORAGE_TYPE1> first, LLUnitImplicit<UNIT_TYPE2, STORAGE_TYPE2> second) \
+template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>                   \
+bool operator op (LLUnitImplicit<STORAGE_TYPE1, UNIT_TYPE1> first, LLUnitImplicit<STORAGE_TYPE2, UNIT_TYPE2> second) \
 {                                                                                                                    \
 	return first.value() op first.convert(second);                                                                   \
 }                                                                                                                    \
 	                                                                                                                 \
-template<typename UNIT_TYPE1, typename STORAGE_TYPE1, typename UNIT_TYPE2, typename STORAGE_TYPE2>                   \
-	bool operator op (LLUnit<UNIT_TYPE1, STORAGE_TYPE1> first, LLUnit<UNIT_TYPE2, STORAGE_TYPE2> second)             \
+template<typename STORAGE_TYPE1, typename UNIT_TYPE1, typename STORAGE_TYPE2, typename UNIT_TYPE2>                   \
+	bool operator op (LLUnit<STORAGE_TYPE1, UNIT_TYPE1> first, LLUnit<STORAGE_TYPE2, UNIT_TYPE2> second)             \
 {                                                                                                                    \
 	return first.value() op first.convert(second);                                                                   \
 }
@@ -401,7 +410,7 @@ struct LLGetUnitLabel
 };
 
 template<typename T, typename STORAGE_T>
-struct LLGetUnitLabel<LLUnit<T, STORAGE_T> >
+struct LLGetUnitLabel<LLUnit<STORAGE_T, T> >
 {
 	static const char* getUnitLabel() { return T::getUnitLabel(); }
 };
@@ -411,70 +420,147 @@ struct LLGetUnitLabel<LLUnit<T, STORAGE_T> >
 //
 namespace LLUnits
 {
+
+template<typename VALUE_TYPE>
+struct LinearOps
+{
+	typedef LinearOps<VALUE_TYPE> self_t;
+	LinearOps(VALUE_TYPE val) : mValue (val) {}
+
+	operator VALUE_TYPE() const { return mValue; }
+	VALUE_TYPE mValue;
+
+	template<typename T>
+	self_t operator * (T other)
+	{
+		return mValue * other;
+	}
+
+	template<typename T>
+	self_t operator / (T other)
+	{
+		return mValue / other;
+	}
+
+	template<typename T>
+	self_t operator + (T other)
+	{
+		return mValue + other;
+	}
+
+	template<typename T>
+	self_t operator - (T other)
+	{
+		return mValue - other;
+	}
+};
+
+template<typename VALUE_TYPE>
+struct InverseLinearOps
+{
+	typedef InverseLinearOps<VALUE_TYPE> self_t;
+
+	InverseLinearOps(VALUE_TYPE val) : mValue (val) {}
+	operator VALUE_TYPE() const { return mValue; }
+	VALUE_TYPE mValue;
+
+	template<typename T>
+	self_t operator * (T other)
+	{
+		return mValue / other;
+	}
+
+	template<typename T>
+	self_t operator / (T other)
+	{
+		return mValue * other;
+	}
+
+	template<typename T>
+	self_t operator + (T other)
+	{
+		return mValue - other;
+	}
+
+	template<typename T>
+	self_t operator - (T other)
+	{
+		return mValue + other;
+	}
+};
+
+
 template<typename T>
-T rawValue(T val) { return val; }
+T storageValue(T val) { return val; }
 
 template<typename UNIT_TYPE, typename STORAGE_TYPE> 
-STORAGE_TYPE rawValue(LLUnit<UNIT_TYPE, STORAGE_TYPE> val) { return val.value(); }
+STORAGE_TYPE storageValue(LLUnit<STORAGE_TYPE, UNIT_TYPE> val) { return val.value(); }
 
 template<typename UNIT_TYPE, typename STORAGE_TYPE> 
-STORAGE_TYPE rawValue(LLUnitImplicit<UNIT_TYPE, STORAGE_TYPE> val) { return val.value(); }
+STORAGE_TYPE storageValue(LLUnitImplicit<STORAGE_TYPE, UNIT_TYPE> val) { return val.value(); }
 
-#define LL_DECLARE_DERIVED_UNIT(conversion_factor, base_unit_name, unit_name, unit_label)		\
+#define LL_DECLARE_BASE_UNIT(base_unit_name, unit_label) \
+struct base_unit_name { typedef base_unit_name base_unit_t; static const char* getUnitLabel() { return unit_label; }}
+
+#define LL_DECLARE_DERIVED_UNIT(unit_name, unit_label, base_unit_name, conversion_operation)	\
 struct unit_name                                                                                \
 {                                                                                               \
 	typedef base_unit_name base_unit_t;                                                         \
 	static const char* getUnitLabel() { return unit_label; }									\
 };                                                                                              \
 template<typename STORAGE_TYPE>                                                                 \
-struct ConversionFactor<unit_name, base_unit_name, STORAGE_TYPE>                                \
+struct Convert<unit_name, base_unit_name, STORAGE_TYPE>                                         \
 {                                                                                               \
-	static F64 get()                                                                            \
+	static STORAGE_TYPE get(STORAGE_TYPE val)                                                   \
 	{                                                                                           \
-		return (F64)conversion_factor;                                                          \
+		return (LinearOps<STORAGE_TYPE>(val) conversion_operation).mValue;                      \
 	}                                                                                           \
 };                                                                                              \
 	                                                                                            \
 template<typename STORAGE_TYPE>                                                                 \
-struct ConversionFactor<base_unit_name, unit_name, STORAGE_TYPE>						        \
+struct Convert<base_unit_name, unit_name, STORAGE_TYPE>						                    \
 {                                                                                               \
-	static F64 get()                                                                            \
+	static STORAGE_TYPE get(STORAGE_TYPE val)                                                   \
 	{                                                                                           \
-		return (F64)(1.0 / (conversion_factor));                                                \
+		return (InverseLinearOps<STORAGE_TYPE>(val) conversion_operation).mValue;               \
 	}                                                                                           \
 }
 
-#define LL_DECLARE_BASE_UNIT(base_unit_name, unit_label) \
-struct base_unit_name { typedef base_unit_name base_unit_t; static const char* getUnitLabel() { return unit_label; }}
-
 LL_DECLARE_BASE_UNIT(Bytes, "B");
-LL_DECLARE_DERIVED_UNIT(1024, Bytes, Kibibytes, "KiB");
-LL_DECLARE_DERIVED_UNIT(1024 * 1024, Bytes, Mibibytes, "MiB");
-LL_DECLARE_DERIVED_UNIT(1024 * 1024 * 1024, Bytes, Gibibytes, "GiB");
-LL_DECLARE_DERIVED_UNIT(1.0 / 8.0, Bytes, Bits, "b");
-LL_DECLARE_DERIVED_UNIT(1024 / 8, Bytes, Kibibits, "Kib");
-LL_DECLARE_DERIVED_UNIT(1024 / 8, Bytes, Mibibits, "Mib");
-LL_DECLARE_DERIVED_UNIT(1024 * 1024 * 1024 / 8, Bytes, Gibibits, "Gib");
+LL_DECLARE_DERIVED_UNIT(Kilobytes, "KB", Bytes, * 1000);
+LL_DECLARE_DERIVED_UNIT(Megabytes, "MB", Bytes, * 1000 * 1000);
+LL_DECLARE_DERIVED_UNIT(Gigabytes, "GB", Bytes, * 1000 * 1000 * 1000);
+LL_DECLARE_DERIVED_UNIT(Kibibytes, "KiB", Bytes, * 1024);
+LL_DECLARE_DERIVED_UNIT(Mibibytes, "MiB", Bytes, * 1024 * 1024);
+LL_DECLARE_DERIVED_UNIT(Gibibytes, "GiB", Bytes, * 1024 * 1024 * 1024);
+
+LL_DECLARE_DERIVED_UNIT(Bits, "b", Bytes, / 8);
+LL_DECLARE_DERIVED_UNIT(Kilobits, "Kb", Bytes, * (1000 / 8));
+LL_DECLARE_DERIVED_UNIT(Megabits, "Mb", Bytes, * (1000 / 8));
+LL_DECLARE_DERIVED_UNIT(Gigabits, "Gb", Bytes, * (1000 * 1000 * 1000 / 8));
+LL_DECLARE_DERIVED_UNIT(Kibibits, "Kib", Bytes, * (1024 / 8));
+LL_DECLARE_DERIVED_UNIT(Mibibits, "Mib", Bytes, * (1024 / 8));
+LL_DECLARE_DERIVED_UNIT(Gibibits, "Gib", Bytes, * (1024 * 1024 * 1024 / 8));
 
 LL_DECLARE_BASE_UNIT(Seconds, "s");
-LL_DECLARE_DERIVED_UNIT(60, Seconds, Minutes, "min");
-LL_DECLARE_DERIVED_UNIT(60 * 60, Seconds, Hours, "h");
-LL_DECLARE_DERIVED_UNIT(1.0 / 1000.0, Seconds, Milliseconds, "ms");
-LL_DECLARE_DERIVED_UNIT(1.0 / 1000000.0, Seconds, Microseconds, "\x09\x3cs");
-LL_DECLARE_DERIVED_UNIT(1.0 / 1000000000.0, Seconds, Nanoseconds, "ns");
+LL_DECLARE_DERIVED_UNIT(Minutes, "min", Seconds, * 60);
+LL_DECLARE_DERIVED_UNIT(Hours, "h", Seconds, * 60 * 60);
+LL_DECLARE_DERIVED_UNIT(Milliseconds, "ms", Seconds, / 1000);
+LL_DECLARE_DERIVED_UNIT(Microseconds, "\x09\x3cs", Seconds, / 1000000);
+LL_DECLARE_DERIVED_UNIT(Nanoseconds, "ns", Seconds, / 1000000000);
 
 LL_DECLARE_BASE_UNIT(Meters, "m");
-LL_DECLARE_DERIVED_UNIT(1000, Meters, Kilometers, "km");
-LL_DECLARE_DERIVED_UNIT(1.0 / 100.0, Meters, Centimeters, "cm");
-LL_DECLARE_DERIVED_UNIT(1.0 / 1000.0, Meters, Millimeters, "mm");
+LL_DECLARE_DERIVED_UNIT(Kilometers, "km", Meters, * 1000);
+LL_DECLARE_DERIVED_UNIT(Centimeters, "cm", Meters, * 100);
+LL_DECLARE_DERIVED_UNIT(Millimeters, "mm", Meters, * 1000);
 
 LL_DECLARE_BASE_UNIT(Hertz, "Hz");
-LL_DECLARE_DERIVED_UNIT(1000, Hertz, Kilohertz, "KHz");
-LL_DECLARE_DERIVED_UNIT(1000 * 1000, Hertz, Megahertz, "MHz");
-LL_DECLARE_DERIVED_UNIT(1000 * 1000 * 1000, Hertz, Gigahertz, "GHz");
+LL_DECLARE_DERIVED_UNIT(Kilohertz, "KHz", Hertz, * 1000);
+LL_DECLARE_DERIVED_UNIT(Megahertz, "MHz", Hertz, * 1000 * 1000);
+LL_DECLARE_DERIVED_UNIT(Gigahertz, "GHz", Hertz, * 1000 * 1000 * 1000);
 
 LL_DECLARE_BASE_UNIT(Radians, "rad");
-LL_DECLARE_DERIVED_UNIT(0.01745329251994, Radians, Degrees, "deg");
+LL_DECLARE_DERIVED_UNIT(Degrees, "deg", Radians, * 0.01745329251994);
 
 
 } // namespace LLUnits
diff --git a/indra/llcommon/tests/llunits_test.cpp b/indra/llcommon/tests/llunits_test.cpp
index 33e30f9688c87f90cd7647cbc76d4ac1d0f20f03..747e8d1827f040f797adb4014f0b67df5045f682 100644
--- a/indra/llcommon/tests/llunits_test.cpp
+++ b/indra/llcommon/tests/llunits_test.cpp
@@ -34,8 +34,8 @@ namespace LLUnits
 {
 	// using powers of 2 to allow strict floating point equality
 	LL_DECLARE_BASE_UNIT(Quatloos, "Quat");
-	LL_DECLARE_DERIVED_UNIT(4, Quatloos, Latinum, "Lat");
-	LL_DECLARE_DERIVED_UNIT((1.0 / 4.0), Quatloos, Solari, "Sol");
+	LL_DECLARE_DERIVED_UNIT(Latinum, "Lat", Quatloos, * 4);
+	LL_DECLARE_DERIVED_UNIT(Solari, "Sol", Quatloos, / 4);
 }
 
 namespace tut
@@ -53,105 +53,107 @@ namespace tut
 	template<> template<>
 	void units_object_t::test<1>()
 	{
-		LLUnit<Quatloos, F32> float_quatloos;
-		ensure(float_quatloos.value() == 0.f);
+		LLUnit<F32, Quatloos> float_quatloos;
+		ensure(float_quatloos == 0.f);
 
-		LLUnit<Quatloos, S32> int_quatloos;
-		ensure(int_quatloos.value() == 0);
+		LLUnit<S32, Quatloos> int_quatloos;
+		ensure(int_quatloos == 0);
 
 		int_quatloos = 42;
-		ensure(int_quatloos.value() == 42);
+		ensure(int_quatloos == 42);
 		float_quatloos = int_quatloos;
-		ensure(float_quatloos.value() == 42.f);
+		ensure(float_quatloos == 42.f);
 
 		int_quatloos = float_quatloos;
-		ensure(int_quatloos.value() == 42);
+		ensure(int_quatloos == 42);
 
 		float_quatloos = 42.1f;
-		ensure(float_quatloos.value() == 42.1f);
+		ensure(float_quatloos == 42.1f);
 		int_quatloos = float_quatloos;
-		ensure(int_quatloos.value() == 42);
-		LLUnit<Quatloos, U32> unsigned_int_quatloos(float_quatloos);
-		ensure(unsigned_int_quatloos.value() == 42);
+		ensure(int_quatloos == 42);
+		LLUnit<U32, Quatloos> unsigned_int_quatloos(float_quatloos);
+		ensure(unsigned_int_quatloos == 42);
 	}
 
 	// conversions to/from base unit
 	template<> template<>
 	void units_object_t::test<2>()
 	{
-		LLUnit<Quatloos, F32> quatloos(1.f);
-		ensure(quatloos.value() == 1.f);
-		LLUnit<Latinum, F32> latinum_bars(quatloos);
-		ensure(latinum_bars.value() == 1.f / 4.f);
+		LLUnit<F32, Quatloos> quatloos(1.f);
+		ensure(quatloos == 1.f);
+		LLUnit<F32, Latinum> latinum_bars(quatloos);
+		ensure(latinum_bars == 1.f / 4.f);
 
 		latinum_bars = 256;
 		quatloos = latinum_bars;
-		ensure(quatloos.value() == 1024);
+		ensure(quatloos == 1024);
 
-		LLUnit<Solari, F32> solari(quatloos);
-		ensure(solari.value() == 4096);
+		LLUnit<F32, Solari> solari(quatloos);
+		ensure(solari == 4096);
 	}
 
 	// conversions across non-base units
 	template<> template<>
 	void units_object_t::test<3>()
 	{
-		LLUnit<Solari, F32> solari = 4.f;
-		LLUnit<Latinum, F32> latinum_bars = solari;
-		ensure(latinum_bars.value() == 0.25f);
+		LLUnit<F32, Solari> solari = 4.f;
+		LLUnit<F32, Latinum> latinum_bars = solari;
+		ensure(latinum_bars == 0.25f);
 	}
 
 	// math operations
 	template<> template<>
 	void units_object_t::test<4>()
 	{
-		LLUnit<Quatloos, F32> quatloos = 1.f;
+		LLUnit<F32, Quatloos> quatloos = 1.f;
 		quatloos *= 4.f;
-		ensure(quatloos.value() == 4);
+		ensure(quatloos == 4);
 		quatloos = quatloos * 2;
-		ensure(quatloos.value() == 8);
+		ensure(quatloos == 8);
 		quatloos = 2.f * quatloos;
-		ensure(quatloos.value() == 16);
+		ensure(quatloos == 16);
 
 		quatloos += 4.f;
-		ensure(quatloos.value() == 20);
+		ensure(quatloos == 20);
 		quatloos += 4;
-		ensure(quatloos.value() == 24);
+		ensure(quatloos == 24);
 		quatloos = quatloos + 4;
-		ensure(quatloos.value() == 28);
+		ensure(quatloos == 28);
 		quatloos = 4 + quatloos;
-		ensure(quatloos.value() == 32);
+		ensure(quatloos == 32);
 		quatloos += quatloos * 3;
-		ensure(quatloos.value() == 128);
+		ensure(quatloos == 128);
 
 		quatloos -= quatloos / 4 * 3;
-		ensure(quatloos.value() == 32);
+		ensure(quatloos == 32);
 		quatloos = quatloos - 8;
-		ensure(quatloos.value() == 24);
+		ensure(quatloos == 24);
 		quatloos -= 4;
-		ensure(quatloos.value() == 20);
+		ensure(quatloos == 20);
 		quatloos -= 4.f;
-		ensure(quatloos.value() == 16);
+		ensure(quatloos == 16);
 
 		quatloos *= 2.f;
-		ensure(quatloos.value() == 32);
+		ensure(quatloos == 32);
 		quatloos = quatloos * 2.f;
-		ensure(quatloos.value() == 64);
+		ensure(quatloos == 64);
 		quatloos = 0.5f * quatloos;
-		ensure(quatloos.value() == 32);
+		ensure(quatloos == 32);
 
 		quatloos /= 2.f;
-		ensure(quatloos.value() == 16);
+		ensure(quatloos == 16);
 		quatloos = quatloos / 4;
-		ensure(quatloos.value() == 4);
+		ensure(quatloos == 4);
 
-		F32 ratio = quatloos / LLUnit<Quatloos, F32>(4.f);
+		F32 ratio = quatloos / LLUnit<F32, Quatloos>(4.f);
+		ensure(ratio == 1);
+		ratio = quatloos / LLUnit<F32, Solari>(16.f);
 		ensure(ratio == 1);
 
-		quatloos += LLUnit<Solari, F32>(4.f);
-		ensure(quatloos.value() == 5);
-		quatloos -= LLUnit<Latinum, F32>(1.f);
-		ensure(quatloos.value() == 1);
+		quatloos += LLUnit<F32, Solari>(4.f);
+		ensure(quatloos == 5);
+		quatloos -= LLUnit<F32, Latinum>(1.f);
+		ensure(quatloos == 1);
 	}
 
 	// implicit units
@@ -159,16 +161,16 @@ namespace tut
 	void units_object_t::test<5>()
 	{
 		// 0-initialized
-		LLUnit<Quatloos, F32> quatloos(0);
+		LLUnit<F32, Quatloos> quatloos(0);
 		// initialize implicit unit from explicit
-		LLUnitImplicit<Quatloos, F32> quatloos_implicit = quatloos + 1;
-		ensure(quatloos_implicit.value() == 1);
+		LLUnitImplicit<F32, Quatloos> quatloos_implicit = quatloos + 1;
+		ensure(quatloos_implicit == 1);
 
 		// assign implicit to explicit, or perform math operations
 		quatloos = quatloos_implicit;
-		ensure(quatloos.value() == 1);
+		ensure(quatloos == 1);
 		quatloos += quatloos_implicit;
-		ensure(quatloos.value() == 2);
+		ensure(quatloos == 2);
 
 		// math operations on implicits
 		quatloos_implicit = 1;
diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp
index 2c3fcfcec11dcc08cca4174239d6bc8de300205e..cb99a651c66fcc944574c4535365a3619373937f 100644
--- a/indra/llrender/llimagegl.cpp
+++ b/indra/llrender/llimagegl.cpp
@@ -51,9 +51,9 @@ U32 wpo2(U32 i);
 
 U32 LLImageGL::sUniqueCount				= 0;
 U32 LLImageGL::sBindCount				= 0;
-LLUnit<LLUnits::Bytes, S32> LLImageGL::sGlobalTextureMemory		= 0;
-LLUnit<LLUnits::Bytes, S32> LLImageGL::sBoundTextureMemory		= 0;
-LLUnit<LLUnits::Bytes, S32> LLImageGL::sCurBoundTextureMemory	= 0;
+LLUnit<S32, LLUnits::Bytes> LLImageGL::sGlobalTextureMemory		= 0;
+LLUnit<S32, LLUnits::Bytes> LLImageGL::sBoundTextureMemory		= 0;
+LLUnit<S32, LLUnits::Bytes> LLImageGL::sCurBoundTextureMemory	= 0;
 S32 LLImageGL::sCount					= 0;
 LLImageGL::dead_texturelist_t LLImageGL::sDeadTextureList[LLTexUnit::TT_NONE];
 U32 LLImageGL::sCurTexName = 1;
diff --git a/indra/llrender/llimagegl.h b/indra/llrender/llimagegl.h
index 2b568e5e0f73fb31137657423bf8cfe3925d9263..227ccc90bd74abfbd6552ca489a80c999c5d909e 100755
--- a/indra/llrender/llimagegl.h
+++ b/indra/llrender/llimagegl.h
@@ -246,9 +246,9 @@ class LLImageGL : public LLRefCount
 	static F32 sLastFrameTime;
 
 	// Global memory statistics
-	static LLUnit<LLUnits::Bytes, S32> sGlobalTextureMemory;	// Tracks main memory texmem
-	static LLUnit<LLUnits::Bytes, S32> sBoundTextureMemory;	// Tracks bound texmem for last completed frame
-	static LLUnit<LLUnits::Bytes, S32> sCurBoundTextureMemory;		// Tracks bound texmem for current frame
+	static LLUnit<S32, LLUnits::Bytes> sGlobalTextureMemory;	// Tracks main memory texmem
+	static LLUnit<S32, LLUnits::Bytes> sBoundTextureMemory;	// Tracks bound texmem for last completed frame
+	static LLUnit<S32, LLUnits::Bytes> sCurBoundTextureMemory;		// Tracks bound texmem for current frame
 	static U32 sBindCount;					// Tracks number of texture binds for current frame
 	static U32 sUniqueCount;				// Tracks number of unique texture binds for current frame
 	static BOOL sGlobalUseAnisotropic;
diff --git a/indra/llui/llstatbar.cpp b/indra/llui/llstatbar.cpp
index 6966df8213fdc63145ef5074cf66fb0650cc98be..d3cc2733e66f196a9127f52d9dae63300c5a3865 100755
--- a/indra/llui/llstatbar.cpp
+++ b/indra/llui/llstatbar.cpp
@@ -284,7 +284,7 @@ void LLStatBar::draw()
 		// draw background bar.
 		gl_rect_2d(bar_left, bar_top, bar_right, bar_bottom, LLColor4(0.f, 0.f, 0.f, 0.25f));
 
-		if (frame_recording.getNumPeriods() == 0)
+		if (frame_recording.getNumRecordedPeriods() == 0)
 		{
 			// No data, don't draw anything...
 			return;
@@ -315,7 +315,7 @@ void LLStatBar::draw()
 
 		if (mDisplayHistory && (mCountFloatp || mEventFloatp || mSampleFloatp))
 		{
-			const S32 num_values = frame_recording.getNumPeriods() - 1;
+			const S32 num_values = frame_recording.getNumRecordedPeriods() - 1;
 			F32 begin = 0;
 			F32 end = 0;
 			S32 i;
diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp
index 42bf9b657bbcffd7ce95d13b7391abbc201b4f90..ef24ba21eeaa82049b12be089606fb2583a00052 100755
--- a/indra/newview/llappviewer.cpp
+++ b/indra/newview/llappviewer.cpp
@@ -296,7 +296,7 @@ LLPumpIO* gServicePump = NULL;
 
 U64 gFrameTime = 0;
 F32 gFrameTimeSeconds = 0.f;
-LLUnit<LLUnits::Seconds, F32> gFrameIntervalSeconds = 0.f;
+LLUnit<F32, LLUnits::Seconds> gFrameIntervalSeconds = 0.f;
 F32 gFPSClamped = 10.f;						// Pretend we start at target rate.
 F32 gFrameDTClamped = 0.f;					// Time between adjacent checks to network for packets
 U64	gStartTime = 0; // gStartTime is "private", used only to calculate gFrameTimeSeconds
diff --git a/indra/newview/llappviewer.h b/indra/newview/llappviewer.h
index 2e75de445fcde368f16b1a1012b24eb02213f6af..ad662d8ea1ae261c0652ddae87ba0e694594bd52 100755
--- a/indra/newview/llappviewer.h
+++ b/indra/newview/llappviewer.h
@@ -337,7 +337,7 @@ extern LLPumpIO* gServicePump;
 
 extern U64      gFrameTime;					// The timestamp of the most-recently-processed frame
 extern F32		gFrameTimeSeconds;			// Loses msec precision after ~4.5 hours...
-extern LLUnit<LLUnits::Seconds, F32>		gFrameIntervalSeconds;		// Elapsed time between current and previous gFrameTimeSeconds
+extern LLUnit<F32, LLUnits::Seconds>		gFrameIntervalSeconds;		// Elapsed time between current and previous gFrameTimeSeconds
 extern F32		gFPSClamped;				// Frames per second, smoothed, weighted toward last frame
 extern F32		gFrameDTClamped;
 extern U64		gStartTime;
diff --git a/indra/newview/llfasttimerview.cpp b/indra/newview/llfasttimerview.cpp
index 40526d3357fa5a7dc0d034e63661b804d587a076..8e061ec87c7ea0f8181f3b5858a72045567ed71a 100755
--- a/indra/newview/llfasttimerview.cpp
+++ b/indra/newview/llfasttimerview.cpp
@@ -101,21 +101,15 @@ LLFastTimerView::LLFastTimerView(const LLSD& key)
 	mScrollIndex(0),
 	mHoverID(NULL),
 	mHoverBarIndex(-1),
-	mPrintStats(-1),
-	mRecording(&get_frame_recording()),
-	mPauseHistory(false)
+	mStatsIndex(-1),
+	mPauseHistory(false),
+	mRecording(512)
 {
-	mTimerBars = new std::vector<TimerBar>[MAX_VISIBLE_HISTORY + 1];
+	mTimerBarRows.resize(MAX_VISIBLE_HISTORY);
 }
 
 LLFastTimerView::~LLFastTimerView()
 {
-	if (mRecording != &get_frame_recording())
-	{
-		delete mRecording;
-	}
-	mRecording = NULL;
-	delete [] mTimerBars;
 }
 
 void LLFastTimerView::onPause()
@@ -130,16 +124,11 @@ void LLFastTimerView::setPauseState(bool pause_state)
 	// reset scroll to bottom when unpausing
 	if (!pause_state)
 	{
-		if (mRecording != &get_frame_recording())
-		{
-			delete mRecording;
-		}
-		mRecording = &get_frame_recording();
+		
 		getChild<LLButton>("pause_btn")->setLabel(getString("pause"));
 	}
 	else
 	{
-		mRecording = new PeriodicRecording(get_frame_recording());
 		mScrollIndex = 0;
 
 		getChild<LLButton>("pause_btn")->setLabel(getString("run"));
@@ -175,7 +164,7 @@ BOOL LLFastTimerView::handleRightMouseDown(S32 x, S32 y, MASK mask)
 	{
 		S32 bar_idx = MAX_VISIBLE_HISTORY - ((y - mBarRect.mBottom) * (MAX_VISIBLE_HISTORY + 2) / mBarRect.getHeight());
 		bar_idx = llclamp(bar_idx, 0, MAX_VISIBLE_HISTORY);
-		mPrintStats = mScrollIndex + bar_idx;
+		mStatsIndex = mScrollIndex + bar_idx;
 		return TRUE;
 	}
 	return LLFloater::handleRightMouseDown(x, y, mask);
@@ -262,8 +251,8 @@ BOOL LLFastTimerView::handleHover(S32 x, S32 y, MASK mask)
 	if (hasMouseCapture())
 	{
 		F32 lerp = llclamp(1.f - (F32) (x - mGraphRect.mLeft) / (F32) mGraphRect.getWidth(), 0.f, 1.f);
-		mScrollIndex = llround( lerp * (F32)(mRecording->getNumPeriods() - MAX_VISIBLE_HISTORY));
-		mScrollIndex = llclamp(	mScrollIndex, 0, (S32)mRecording->getNumPeriods());
+		mScrollIndex = llround( lerp * (F32)(mRecording.getNumRecordedPeriods() - MAX_VISIBLE_HISTORY));
+		mScrollIndex = llclamp(	mScrollIndex, 0, (S32)mRecording.getNumRecordedPeriods());
 		return TRUE;
 	}
 	mHoverTimer = NULL;
@@ -272,7 +261,7 @@ BOOL LLFastTimerView::handleHover(S32 x, S32 y, MASK mask)
 	if(mPauseHistory && mBarRect.pointInRect(x, y))
 	{
 		mHoverBarIndex = llmin((mBarRect.mTop - y) / (mBarRect.getHeight() / (MAX_VISIBLE_HISTORY + 2)) - 1,
-								(S32)mRecording->getNumPeriods() - 1,
+								(S32)mRecording.getNumRecordedPeriods() - 1,
 								MAX_VISIBLE_HISTORY);
 		if (mHoverBarIndex == 0)
 		{
@@ -289,7 +278,8 @@ BOOL LLFastTimerView::handleHover(S32 x, S32 y, MASK mask)
 			++it, ++i)
 		{
 			// is mouse over bar for this timer?
-			if (mTimerBars[mHoverBarIndex][i].mVisibleRect.pointInRect(x, y))
+			TimerBarRow& row = mHoverBarIndex == 0 ? mAverageTimerRow : mTimerBarRows[mHoverBarIndex - 1];
+			if (row.mBars[i].mVisibleRect.pointInRect(x, y - row.mBottom))
 			{
 				mHoverID = (*it);
 				if (mHoverTimer != *it)
@@ -301,7 +291,8 @@ BOOL LLFastTimerView::handleHover(S32 x, S32 y, MASK mask)
 					mHoverTimer = (*it);
 				}
 
-				mToolTipRect = mTimerBars[mHoverBarIndex][i].mVisibleRect;
+				mToolTipRect = row.mBars[i].mVisibleRect;
+				mToolTipRect.translate(0, row.mBottom);
 			}
 
 			if ((*it)->getCollapsed())
@@ -329,11 +320,11 @@ static std::string get_tooltip(TimeBlock& timer, S32 history_index, PeriodicReco
 	if (history_index == 0)
 	{
 		// by default, show average number of call
-		tooltip = llformat("%s (%d ms, %d calls)", timer.getName().c_str(), (S32)LLUnit<LLUnits::Milliseconds, F64>(frame_recording.getPeriodMean(timer)).value(), (S32)frame_recording.getPeriodMean(timer.callCount()));
+		tooltip = llformat("%s (%d ms, %d calls)", timer.getName().c_str(), (S32)LLUnit<F64, LLUnits::Milliseconds>(frame_recording.getPeriodMean(timer)).value(), (S32)frame_recording.getPeriodMean(timer.callCount()));
 	}
 	else
 	{
-		tooltip = llformat("%s (%d ms, %d calls)", timer.getName().c_str(), (S32)LLUnit<LLUnits::Milliseconds, F64>(frame_recording.getPrevRecording(history_index).getSum(timer)).value(), (S32)frame_recording.getPrevRecording(history_index).getSum(timer.callCount()));
+		tooltip = llformat("%s (%d ms, %d calls)", timer.getName().c_str(), (S32)LLUnit<F64, LLUnits::Milliseconds>(frame_recording.getPrevRecording(history_index).getSum(timer)).value(), (S32)frame_recording.getPrevRecording(history_index).getSum(timer.callCount()));
 	}
 	return tooltip;
 }
@@ -348,7 +339,7 @@ BOOL LLFastTimerView::handleToolTip(S32 x, S32 y, MASK mask)
 			LLRect screen_rect;
 			localRectToScreen(mToolTipRect, &screen_rect);
 
-			std::string tooltip = get_tooltip(*mHoverTimer, mHoverBarIndex > 0 ? mScrollIndex + mHoverBarIndex : 0, *mRecording);
+			std::string tooltip = get_tooltip(*mHoverTimer, mHoverBarIndex > 0 ? mScrollIndex + mHoverBarIndex : 0, mRecording);
 
 			LLToolTipMgr::instance().show(LLToolTip::Params()
 				.message(tooltip)
@@ -366,7 +357,7 @@ BOOL LLFastTimerView::handleToolTip(S32 x, S32 y, MASK mask)
 			TimeBlock* idp = getLegendID(y);
 			if (idp)
 			{
-				LLToolTipMgr::instance().show(get_tooltip(*idp, 0, *mRecording));
+				LLToolTipMgr::instance().show(get_tooltip(*idp, 0, mRecording));
 
 				return TRUE;
 			}
@@ -381,7 +372,7 @@ BOOL LLFastTimerView::handleScrollWheel(S32 x, S32 y, S32 clicks)
 	setPauseState(true);
 	mScrollIndex = llclamp(	mScrollIndex + clicks,
 							0,
-							llmin((S32)mRecording->getNumPeriods(), (S32)mRecording->getNumPeriods() - MAX_VISIBLE_HISTORY));
+							llmin((S32)mRecording.getNumRecordedPeriods(), (S32)mRecording.getNumRecordedPeriods() - MAX_VISIBLE_HISTORY));
 	return TRUE;
 }
 
@@ -389,12 +380,19 @@ static TimeBlock FTM_RENDER_TIMER("Timers", true);
 static const S32 MARGIN = 10;
 static const S32 LEGEND_WIDTH = 220;
 
-static std::map<TimeBlock*, LLColor4> sTimerColors;
+static std::vector<LLColor4> sTimerColors;
 
 void LLFastTimerView::draw()
 {
 	LLFastTimer t(FTM_RENDER_TIMER);
 	
+	if (!mPauseHistory)
+	{
+		mRecording.appendRecording(LLTrace::get_frame_recording().getLastRecording());
+		mTimerBarRows.pop_back();
+		mTimerBarRows.push_front(TimerBarRow());
+	}
+
 	generateUniqueColors();
 
 	// Draw the window background
@@ -417,11 +415,20 @@ void LLFastTimerView::draw()
 	printLineStats();
 	LLView::draw();
 		
-	mAllTimeMax = llmax(mAllTimeMax, mRecording->getLastRecording().getSum(FTM_FRAME));
+	mAllTimeMax = llmax(mAllTimeMax, mRecording.getLastRecording().getSum(FTM_FRAME));
 	mHoverID = NULL;
 	mHoverBarIndex = -1;
 }
 
+void LLFastTimerView::onOpen(const LLSD& key)
+{
+	if (mRecording.getNumRecordedPeriods() == 0)
+	{
+		mRecording.appendPeriodicRecording(LLTrace::get_frame_recording());
+	}
+}
+
+
 void saveChart(const std::string& label, const char* suffix, LLImageRaw* scratch)
 {
 	//read result back into raw image
@@ -828,7 +835,7 @@ void LLFastTimerView::doAnalysisDefault(std::string baseline, std::string target
 	LLSD current = analyzePerformanceLogDefault(target_is);
 	target_is.close();
 
-	//output comparision
+	//output comparison
 	std::ofstream os(output.c_str());
 
 	LLSD::Real session_time = current["SessionTime"].asReal();
@@ -936,7 +943,7 @@ void	LLFastTimerView::onClickCloseBtn()
 void LLFastTimerView::printLineStats()
 {
 	// Output stats for clicked bar to log
-	if (mPrintStats >= 0)
+	if (mStatsIndex >= 0)
 	{
 		std::string legend_stat;
 		bool first = true;
@@ -974,16 +981,16 @@ void LLFastTimerView::printLineStats()
 			}
 			first = false;
 
-			LLUnit<LLUnits::Seconds, F32> ticks;
-			if (mPrintStats > 0)
+			LLUnit<F32, LLUnits::Seconds> ticks;
+			if (mStatsIndex == 0)
 			{
-				ticks = mRecording->getPrevRecording(mPrintStats).getSum(*idp);
+				ticks = mRecording.getPeriodMean(*idp);
 			}
 			else
 			{
-				ticks = mRecording->getPeriodMean(*idp);
+				ticks = mRecording.getPrevRecording(mStatsIndex).getSum(*idp);
 			}
-			LLUnit<LLUnits::Milliseconds, F32> ms = ticks;
+			LLUnit<F32, LLUnits::Milliseconds> ms = ticks;
 
 			timer_stat += llformat("%.1f",ms.value());
 
@@ -993,7 +1000,7 @@ void LLFastTimerView::printLineStats()
 			}
 		}
 		llinfos << timer_stat << llendl;
-		mPrintStats = -1;
+		mStatsIndex = -1;
 	}
 }
 
@@ -1009,7 +1016,7 @@ void LLFastTimerView::drawLineGraph()
 	LLLocalClipRect clip(mGraphRect);
 
 	//normalize based on last frame's maximum
-	static LLUnit<LLUnits::Seconds, F32> max_time = 0.000001;
+	static LLUnit<F32, LLUnits::Seconds> max_time = 0.000001;
 	static U32 max_calls = 0;
 	static F32 alpha_interp = 0.f;
 
@@ -1020,7 +1027,7 @@ void LLFastTimerView::drawLineGraph()
 	else if (mDisplayHz)
 		axis_label = llformat("%d Hz", (int)(1.f / max_time.value()));
 	else
-		axis_label = llformat("%4.2f ms", LLUnit<LLUnits::Milliseconds, F32>(max_time).value());
+		axis_label = llformat("%4.2f ms", LLUnit<F32, LLUnits::Milliseconds>(max_time).value());
 
 	x = mGraphRect.mRight - LLFontGL::getFontMonospace()->getWidth(axis_label)-5;
 	y = mGraphRect.mTop - LLFontGL::getFontMonospace()->getLineHeight();
@@ -1030,10 +1037,10 @@ void LLFastTimerView::drawLineGraph()
 
 	//highlight visible range
 	{
-		S32 first_frame = mRecording->getNumPeriods() - mScrollIndex;
+		S32 first_frame = mRecording.getNumRecordedPeriods() - mScrollIndex;
 		S32 last_frame = first_frame - MAX_VISIBLE_HISTORY;
 
-		F32 frame_delta = ((F32) (mGraphRect.getWidth()))/(mRecording->getNumPeriods()-1);
+		F32 frame_delta = ((F32) (mGraphRect.getWidth()))/(mRecording.getNumRecordedPeriods()-1);
 
 		F32 right = (F32) mGraphRect.mLeft + frame_delta*first_frame;
 		F32 left = (F32) mGraphRect.mLeft + frame_delta*last_frame;
@@ -1055,7 +1062,7 @@ void LLFastTimerView::drawLineGraph()
 		}
 	}
 
-	LLUnit<LLUnits::Seconds, F32> cur_max = 0;
+	LLUnit<F32, LLUnits::Seconds> cur_max = 0;
 	U32 cur_max_calls = 0;
 	for(timer_tree_iterator_t it = begin_timer_tree(FTM_FRAME);
 		it != end_timer_tree();
@@ -1070,7 +1077,7 @@ void LLFastTimerView::drawLineGraph()
 			glLineWidth(3);
 		}
 
-		const F32 * col = sTimerColors[idp].mV;// ft_display_table[idx].color->mV;
+		const F32 * col = sTimerColors[idp->getIndex()].mV;// ft_display_table[idx].color->mV;
 
 		F32 alpha = 1.f;
 
@@ -1085,12 +1092,13 @@ void LLFastTimerView::drawLineGraph()
 
 		gGL.color4f(col[0], col[1], col[2], alpha);				
 		gGL.begin(LLRender::TRIANGLE_STRIP);
-		for (U32 j = mRecording->getNumPeriods();
+		for (U32 j = mRecording.getNumRecordedPeriods();
 			j > 0;
 			j--)
 		{
-			LLUnit<LLUnits::Seconds, F32> time = llmax(mRecording->getPrevRecording(j).getSum(*idp), LLUnit<LLUnits::Seconds, F64>(0.000001));
-			U32 calls = mRecording->getPrevRecording(j).getSum(idp->callCount());
+			LLTrace::Recording& recording = mRecording.getPrevRecording(j);
+			LLUnit<F32, LLUnits::Seconds> time = llmax(recording.getSum(*idp), LLUnit<F64, LLUnits::Seconds>(0.000001));
+			U32 calls = recording.getSum(idp->callCount());
 
 			if (alpha == 1.f)
 			{ 
@@ -1098,7 +1106,7 @@ void LLFastTimerView::drawLineGraph()
 				cur_max = llmax(cur_max, time);
 				cur_max_calls = llmax(cur_max_calls, calls);
 			}
-			F32 x = mGraphRect.mRight - j * (F32)(mGraphRect.getWidth())/(mRecording->getNumPeriods()-1);
+			F32 x = mGraphRect.mRight - j * (F32)(mGraphRect.getWidth())/(mRecording.getNumRecordedPeriods()-1);
 			F32 y = mDisplayHz 
 				? mGraphRect.mBottom + (1.f / time.value()) * ((F32) mGraphRect.getHeight() / (1.f / max_time.value()))
 				: mGraphRect.mBottom + time / max_time * (F32)mGraphRect.getHeight();
@@ -1124,7 +1132,7 @@ void LLFastTimerView::drawLineGraph()
 	max_time = lerp(max_time.value(), cur_max.value(), LLSmoothInterpolation::getInterpolant(0.1f));
 	if (max_time - cur_max <= 1 ||  cur_max - max_time  <= 1)
 	{
-		max_time = llmax(LLUnit<LLUnits::Microseconds, F32>(1), LLUnit<LLUnits::Microseconds, F32>(cur_max));
+		max_time = llmax(LLUnit<F32, LLUnits::Microseconds>(1), LLUnit<F32, LLUnits::Microseconds>(cur_max));
 	}
 
 	max_calls = llround(lerp((F32)max_calls, (F32) cur_max_calls, LLSmoothInterpolation::getInterpolant(0.1f)));
@@ -1183,20 +1191,20 @@ void LLFastTimerView::drawLegend( S32 y )
 				scale_offset = llfloor(sinf(mHighlightTimer.getElapsedTimeF32() * 6.f) * 2.f);
 			}
 			bar_rect.stretch(scale_offset);
-			gl_rect_2d(bar_rect, sTimerColors[idp]);
+			gl_rect_2d(bar_rect, sTimerColors[idp->getIndex()]);
 
-			LLUnit<LLUnits::Milliseconds, F32> ms = 0;
+			LLUnit<F32, LLUnits::Milliseconds> ms = 0;
 			S32 calls = 0;
 			if (mHoverBarIndex > 0 && mHoverID)
 			{
 				S32 hidx = mScrollIndex + mHoverBarIndex;
-				ms = mRecording->getPrevRecording(hidx).getSum(*idp);
-				calls = mRecording->getPrevRecording(hidx).getSum(idp->callCount());
+				ms = mRecording.getPrevRecording(hidx).getSum(*idp);
+				calls = mRecording.getPrevRecording(hidx).getSum(idp->callCount());
 			}
 			else
 			{
-				ms = LLUnit<LLUnits::Seconds, F64>(mRecording->getPeriodMean(*idp));
-				calls = (S32)mRecording->getPeriodMean(idp->callCount());
+				ms = LLUnit<F64, LLUnits::Seconds>(mRecording.getPeriodMean(*idp));
+				calls = (S32)mRecording.getPeriodMean(idp->callCount());
 			}
 
 			std::string timer_label;
@@ -1254,7 +1262,8 @@ void LLFastTimerView::generateUniqueColors()
 {
 	// generate unique colors
 	{
-		sTimerColors[&FTM_FRAME] = LLColor4::grey;
+		sTimerColors.reserve(LLTrace::TimeBlock::getNumIndices());
+		sTimerColors[FTM_FRAME.getIndex()] = LLColor4::grey;
 
 		F32 hue = 0.f;
 
@@ -1274,7 +1283,7 @@ void LLFastTimerView::generateUniqueColors()
 			LLColor4 child_color;
 			child_color.setHSL(hue, saturation, lightness);
 
-			sTimerColors[idp] = child_color;
+			sTimerColors[idp->getIndex()] = child_color;
 		}
 	}
 }
@@ -1315,7 +1324,7 @@ void LLFastTimerView::drawTicks()
 {
 	// Draw MS ticks
 	{
-		LLUnit<LLUnits::Milliseconds, U32> ms = mTotalTimeDisplay;
+		LLUnit<U32, LLUnits::Milliseconds> ms = mTotalTimeDisplay;
 		std::string tick_label;
 		S32 x;
 		S32 barw = mBarRect.getWidth();
@@ -1382,127 +1391,127 @@ void LLFastTimerView::updateTotalTime()
 	switch(mDisplayMode)
 	{
 	case 0:
-		mTotalTimeDisplay = mRecording->getPeriodMean(FTM_FRAME)*2;
+		mTotalTimeDisplay = mRecording.getPeriodMean(FTM_FRAME)*2;
 		break;
 	case 1:
 		mTotalTimeDisplay = mAllTimeMax;
 		break;
 	case 2:
 		// Calculate the max total ticks for the current history
-		mTotalTimeDisplay = mRecording->getPeriodMax(FTM_FRAME);
+		mTotalTimeDisplay = mRecording.getPeriodMax(FTM_FRAME);
 		break;
 	default:
-		mTotalTimeDisplay = LLUnit<LLUnits::Milliseconds, F32>(100);
+		mTotalTimeDisplay = LLUnit<F32, LLUnits::Milliseconds>(100);
 		break;
 	}
 
-	mTotalTimeDisplay = LLUnit<LLUnits::Milliseconds, F32>(llceil(mTotalTimeDisplay.as<LLUnits::Milliseconds>().value() / 20.f) * 20.f);
+	mTotalTimeDisplay = LLUnit<F32, LLUnits::Milliseconds>(llceil(mTotalTimeDisplay.as<LLUnits::Milliseconds>().value() / 20.f) * 20.f);
 }
 
 void LLFastTimerView::drawBars()
 {
-	updateTotalTime();
-	if (mTotalTimeDisplay <= 0.0) return;
-
 	LLLocalClipRect clip(mBarRect);
 
 	S32 bar_height = mBarRect.getHeight() / (MAX_VISIBLE_HISTORY + 2);
-	S32 vpad = llmax(1, bar_height / 4); // spacing between bars
+	const S32 vpad = llmax(1, bar_height / 4); // spacing between bars
 	bar_height -= vpad;
 
+	updateTotalTime();
+	if (mTotalTimeDisplay <= 0.0) return;
+
 	drawTicks();
-	S32 y = mBarRect.mTop - ((S32)LLFontGL::getFontMonospace()->getLineHeight() + 4);
-	drawBorders(y, mBarRect.mLeft, bar_height, vpad);
+	const S32 bars_top = mBarRect.mTop - ((S32)LLFontGL::getFontMonospace()->getLineHeight() + 4);
+	drawBorders(bars_top, mBarRect.mLeft, bar_height, vpad);
 
 	// Draw bars for each history entry
-	// Special: -1 = show running average
+	// Special: 0 = show running average
 	LLPointer<LLUIImage> bar_image = LLUI::getUIImage("Rounded_Square");
+
+	const S32 image_width = bar_image->getTextureWidth();
+	const S32 image_height = bar_image->getTextureHeight();
+
 	gGL.getTexUnit(0)->bind(bar_image->getImage());
-	const S32 histmax = llmin((S32)mRecording->getNumPeriods(), MAX_VISIBLE_HISTORY) + 1;
+	{	
+		const S32 histmax = llmin((S32)mRecording.getNumRecordedPeriods(), MAX_VISIBLE_HISTORY);
 
-	for (S32 bar_index = 0; bar_index < histmax && y > LINE_GRAPH_HEIGHT; bar_index++)
-	{
-		S32 history_index = (bar_index > 0)
-			? bar_index + mScrollIndex
-			: -1;
-		mTimerBars[bar_index].clear();
-		mTimerBars[bar_index].reserve(LLInstanceTracker<LLTrace::TimeBlock>::instanceCount());
-
-		updateTimerBarWidths(&FTM_FRAME, mTimerBars[bar_index], history_index, true);
-		LLRect frame_bar_rect(mBarRect.mLeft, y, mBarRect.mLeft + mTimerBars[bar_index][0].mWidth, y-bar_height);
-		mTimerBars[bar_index][0].mVisibleRect = frame_bar_rect;
-		updateTimerBarFractions(&FTM_FRAME, 0, mTimerBars[bar_index]);
-		drawBar(&FTM_FRAME, frame_bar_rect, mTimerBars[bar_index], 0, bar_image);
-				
-		y -= (bar_height + vpad);
-		if (bar_index == 0)
-			y -= bar_height;
-	}
+		llassert(mTimerBarRows.size() >= histmax);
+
+		// update widths
+		updateTimerBarWidths(&FTM_FRAME, mAverageTimerRow, -1);
+		mAverageTimerRow.mBars[0].mVisibleRect = LLRect(mBarRect.mLeft, 0, mBarRect.mLeft + mAverageTimerRow.mBars[0].mWidth, -bar_height);
+		updateTimerBarFractions(&FTM_FRAME, mAverageTimerRow);
+
+		for (S32 history_index = 0; history_index < histmax; history_index++)
+		{
+			TimerBarRow& row = mTimerBarRows[history_index];
+			if (row.mBars.empty())
+			{
+				row.mBars.reserve(LLInstanceTracker<LLTrace::TimeBlock>::instanceCount());
+				updateTimerBarWidths(&FTM_FRAME, row, history_index);
+				row.mBars[0].mVisibleRect = LLRect(mBarRect.mLeft, 0, mBarRect.mLeft + row.mBars[0].mWidth, -1);
+				updateTimerBarFractions(&FTM_FRAME, row);
+			}
+		}
+
+		// draw bars
+		LLRect frame_bar_rect(	mBarRect.mLeft, 
+								bars_top, 
+								mBarRect.mLeft + mAverageTimerRow.mBars[0].mWidth, 
+								bars_top - bar_height);
+		mAverageTimerRow.mBottom = frame_bar_rect.mBottom;
+		drawBar(&FTM_FRAME, frame_bar_rect, mAverageTimerRow, image_width, image_height, false);
+		frame_bar_rect.translate(0, -(bar_height + vpad + bar_height));
+
+		for(S32 bar_index = mScrollIndex; bar_index < llmin(histmax, mScrollIndex + MAX_VISIBLE_HISTORY); ++bar_index)
+		{
+			TimerBarRow& row = mTimerBarRows[bar_index];
+			row.mBottom = frame_bar_rect.mBottom;
+			drawBar(&FTM_FRAME, frame_bar_rect, row, image_width, image_height, false);
+
+			frame_bar_rect.translate(0, -(bar_height + vpad));
+		}
+
+	}	
 	gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
 }
 
 static LLFastTimer::DeclareTimer FTM_UPDATE_TIMER_BAR_WIDTHS("Update timer bar widths");
 
-S32 LLFastTimerView::updateTimerBarWidths(LLTrace::TimeBlock* time_block, std::vector<TimerBar>& bars, S32 history_index, bool visible)
+S32 LLFastTimerView::updateTimerBarWidths(LLTrace::TimeBlock* time_block, TimerBarRow& row, S32 history_index, bool visible)
 {
+	std::vector<TimerBar>& bars = row.mBars;
 	LLFastTimer _(FTM_UPDATE_TIMER_BAR_WIDTHS);
-	F32 self_time_frame_fraction = history_index == -1
-		? (mRecording->getPeriodMean(time_block->selfTime()) / mTotalTimeDisplay) 
-		: (mRecording->getPrevRecording(history_index).getSum(time_block->selfTime()) / mTotalTimeDisplay);
+	const F32 self_time_frame_fraction = history_index == -1
+										? (mRecording.getPeriodMean(time_block->selfTime()) / mTotalTimeDisplay) 
+										: (mRecording.getPrevRecording(history_index).getSum(time_block->selfTime()) / mTotalTimeDisplay);
 
-	S32 self_time_width = llround(self_time_frame_fraction * (F32)mBarRect.getWidth());
+	const S32 self_time_width = llround(self_time_frame_fraction * (F32)mBarRect.getWidth());
 	S32 full_width = self_time_width;
 
-	bool children_visible = visible;
-
 	// reserve a spot for this bar to be rendered before its children
 	// even though we don't know its size yet
-	S32 bar_rect_index = bars.size();
-	if (visible)
-	{
-		bars.push_back(TimerBar());
-	}
+	bars.push_back(TimerBar());
+	TimerBar& timer_bar = bars.back();
 
-	if (time_block->getCollapsed())
-	{
-		children_visible = false;
-	}
+	const bool children_visible = visible && !time_block->getCollapsed();
 	for (TimeBlock::child_iter it = time_block->beginChildren(), end_it = time_block->endChildren(); it != end_it; ++it)
 	{
-		full_width += updateTimerBarWidths(*it, bars, history_index, children_visible);
+		full_width += updateTimerBarWidths(*it, row, history_index, children_visible);
 	}
 
-	if (visible)
-	{
-		TimerBar& timer_bar = bars[bar_rect_index];
-
-		timer_bar.mWidth = full_width;
-		timer_bar.mSelfWidth = self_time_width;
-		timer_bar.mColor = sTimerColors[time_block];
-
-		BOOL is_child_of_hover_item = (time_block == mHoverID);
-		TimeBlock* next_parent = time_block->getParent();
-		while(!is_child_of_hover_item && next_parent)
-		{
-			is_child_of_hover_item = (mHoverID == next_parent);
-			if (next_parent->getParent() == next_parent) break;
-			next_parent = next_parent->getParent();
-		}
-
-		if (mHoverID != NULL 
-			&& time_block != mHoverID 
-			&& !is_child_of_hover_item)
-		{
-			timer_bar.mColor = lerp(timer_bar.mColor, LLColor4::grey, 0.8f);
-		}
-	}
+	timer_bar.mWidth     = full_width;
+	timer_bar.mSelfWidth = self_time_width;
+	timer_bar.mTimeBlock = time_block;
+	timer_bar.mVisible   = visible;
+	
 	return full_width;
 }
 
 static LLFastTimer::DeclareTimer FTM_UPDATE_TIMER_BAR_FRACTIONS("Update timer bar fractions");
 
-S32 LLFastTimerView::updateTimerBarFractions(LLTrace::TimeBlock* time_block, S32 timer_bar_index, std::vector<TimerBar>& bars)
+S32 LLFastTimerView::updateTimerBarFractions(LLTrace::TimeBlock* time_block, TimerBarRow& row, S32 timer_bar_index)
 {
+	std::vector<TimerBar>& bars = row.mBars;
 	LLFastTimer _(FTM_UPDATE_TIMER_BAR_FRACTIONS);
 	TimerBar& timer_bar = bars[timer_bar_index];
 	S32 child_time_width = timer_bar.mWidth - timer_bar.mSelfWidth;
@@ -1518,11 +1527,6 @@ S32 LLFastTimerView::updateTimerBarFractions(LLTrace::TimeBlock* time_block, S32
 	}
 	children_rect.mRight = children_rect.mLeft + timer_bar.mWidth - timer_bar.mSelfWidth;
 
-	if (children_rect.getHeight() > MIN_BAR_HEIGHT)
-	{
-		children_rect.mTop -= 1;
-		children_rect.mBottom += 1;
-	}
 	timer_bar.mChildrenRect = children_rect;
 
 	//now loop through children and figure out portion of bar image covered by each bar, now that we know the
@@ -1548,7 +1552,7 @@ S32 LLFastTimerView::updateTimerBarFractions(LLTrace::TimeBlock* time_block, S32
 				children_rect.mLeft + llround(child_timer_bar.mEndFraction * children_rect.getWidth()), 
 				children_rect.mBottom);
 
-			timer_bar_index = updateTimerBarFractions(child_time_block, timer_bar_index, bars);
+			timer_bar_index = updateTimerBarFractions(child_time_block, row, timer_bar_index);
 
 			bar_fraction_start = child_timer_bar.mEndFraction;
 		}
@@ -1556,25 +1560,29 @@ S32 LLFastTimerView::updateTimerBarFractions(LLTrace::TimeBlock* time_block, S32
 	return timer_bar_index;
 }
 
-S32 LLFastTimerView::drawBar(LLTrace::TimeBlock* time_block, LLRect bar_rect, std::vector<TimerBar>& bars, S32 bar_index, LLPointer<LLUIImage>& bar_image)
+S32 LLFastTimerView::drawBar(LLTrace::TimeBlock* time_block, LLRect bar_rect, TimerBarRow& row, S32 image_width, S32 image_height, bool hovered, S32 bar_index)
 {
-	TimerBar& timer_bar = bars[bar_index];
+	TimerBar& timer_bar = row.mBars[bar_index];
+
+	hovered |= mHoverID == time_block;
 
 	// animate scale of bar when hovering over that particular timer
 	if (bar_rect.getWidth() > 0)
 	{
 		LLRect render_rect(bar_rect);
 		S32 scale_offset = 0;
-		if (time_block == mHoverID)
+		if (mHoverID == time_block)
 		{
 			scale_offset = llfloor(sinf(mHighlightTimer.getElapsedTimeF32() * 6.f) * 3.f);
 			render_rect.mTop += scale_offset;
 			render_rect.mBottom -= scale_offset;
 		}
 
-		gGL.color4fv(timer_bar.mColor.mV);
+		LLColor4 color = sTimerColors[time_block->getIndex()];
+		if (!hovered) color = lerp(color, LLColor4::grey, 0.8f);
+		gGL.color4fv(color.mV);
 		gl_segmented_rect_2d_fragment_tex(render_rect,
-			bar_image->getTextureWidth(), bar_image->getTextureHeight(), 
+			image_width, image_height, 
 			16, 
 			timer_bar.mStartFraction, timer_bar.mEndFraction);
 	}
@@ -1584,7 +1592,20 @@ S32 LLFastTimerView::drawBar(LLTrace::TimeBlock* time_block, LLRect bar_rect, st
 		for (TimeBlock::child_iter it = time_block->beginChildren(), end_it = time_block->endChildren(); it != end_it; ++it)
 		{
 			++bar_index;
-			bar_index = drawBar(*it, timer_bar.mChildrenRect, bars, bar_index, bar_image);
+			LLRect children_rect = timer_bar.mChildrenRect;
+			children_rect.translate(0, row.mBottom);
+			if (bar_rect.getHeight() > MIN_BAR_HEIGHT)
+			{
+				// shrink as we go down a level
+				children_rect.mTop = bar_rect.mTop - 1;
+				children_rect.mBottom = bar_rect.mBottom + 1;
+			}
+			else
+			{
+				children_rect.mTop = bar_rect.mTop;
+				children_rect.mBottom = bar_rect.mBottom;
+			}
+			bar_index = drawBar(*it, children_rect, row, image_width, image_height, hovered, bar_index);
 		}
 	}
 
diff --git a/indra/newview/llfasttimerview.h b/indra/newview/llfasttimerview.h
index 341adacd65aba80c9b60b0d55ef53e771d09b4df..c20cadd6d7dbc4fe016f57319c7ec382d5634285 100755
--- a/indra/newview/llfasttimerview.h
+++ b/indra/newview/llfasttimerview.h
@@ -31,6 +31,7 @@
 #include "llfasttimer.h"
 #include "llunit.h"
 #include "lltracerecording.h"
+#include <deque>
 
 class LLFastTimerView : public LLFloater
 {
@@ -60,13 +61,11 @@ class LLFastTimerView : public LLFloater
 	virtual BOOL handleToolTip(S32 x, S32 y, MASK mask);
 	virtual BOOL handleScrollWheel(S32 x, S32 y, S32 clicks);
 	virtual void draw();
-
+	virtual void onOpen(const LLSD& key);
 	LLTrace::TimeBlock* getLegendID(S32 y);
 
-protected:
-	virtual	void	onClickCloseBtn();
-
 private:	
+	virtual	void	onClickCloseBtn();
 	void drawTicks();
 	void drawLineGraph();
 	void drawLegend(S32 y);
@@ -87,47 +86,53 @@ class LLFastTimerView : public LLFloater
 			mStartFraction(0.f),
 			mEndFraction(1.f)
 		{}
-		S32			mWidth;
-		S32			mSelfWidth;
-		LLRect		mVisibleRect,
-					mChildrenRect;
-		LLColor4	mColor;
-		bool		mVisible;
-		F32			mStartFraction,
-					mEndFraction;
+		S32					mWidth;
+		S32					mSelfWidth;
+		LLRect				mVisibleRect,
+							mChildrenRect;
+		LLTrace::TimeBlock* mTimeBlock;
+		bool				mVisible;
+		F32					mStartFraction,
+							mEndFraction;
 	};
-	S32 updateTimerBarWidths(LLTrace::TimeBlock* time_block, std::vector<TimerBar>& bars, S32 history_index, bool visible);
-	S32 updateTimerBarFractions(LLTrace::TimeBlock* time_block, S32 timer_bar_index, std::vector<TimerBar>& bars);
-	S32 drawBar(LLTrace::TimeBlock* time_block, LLRect bar_rect, std::vector<TimerBar>& bars, S32 bar_index, LLPointer<LLUIImage>& bar_image);
+
+	struct TimerBarRow
+	{
+		S32						mBottom;
+		std::vector<TimerBar>	mBars;
+	};
+
+	S32 updateTimerBarWidths(LLTrace::TimeBlock* time_block, TimerBarRow& row, S32 history_index, bool visible = true);
+	S32 updateTimerBarFractions(LLTrace::TimeBlock* time_block, TimerBarRow& row, S32 timer_bar_index = 0);
+	S32 drawBar(LLTrace::TimeBlock* time_block, LLRect bar_rect, TimerBarRow& row, S32 image_width, S32 image_height, bool hovered, S32 bar_index = 0);
 	void setPauseState(bool pause_state);
 
-	std::vector<TimerBar>* mTimerBars;
-	S32 mDisplayMode;
+	std::deque<TimerBarRow> mTimerBarRows;
+	TimerBarRow				mAverageTimerRow;
 
-	typedef enum child_alignment
+	enum ChildAlignment
 	{
 		ALIGN_LEFT,
 		ALIGN_CENTER,
 		ALIGN_RIGHT,
 		ALIGN_COUNT
-	} ChildAlignment;
-
-	ChildAlignment mDisplayCenter;
-	bool                          mDisplayCalls,
-								  mDisplayHz;
-	LLUnit<LLUnits::Seconds, F64> mAllTimeMax,
-								  mTotalTimeDisplay;
-	LLRect mBarRect;
-	S32	mScrollIndex;
-	LLTrace::TimeBlock*           mHoverID;
-	LLTrace::TimeBlock*           mHoverTimer;
-	LLRect					mToolTipRect;
-	S32 mHoverBarIndex;
-	LLFrameTimer mHighlightTimer;
-	S32 mPrintStats;
-	LLRect mGraphRect;
-	LLTrace::PeriodicRecording*	  mRecording;
-	bool						  mPauseHistory;
+	}								mDisplayCenter;
+	bool							mDisplayCalls,
+									mDisplayHz,
+									mPauseHistory;
+	LLUnit<F64, LLUnits::Seconds>	mAllTimeMax,
+									mTotalTimeDisplay;
+	S32								mScrollIndex,
+									mHoverBarIndex,
+									mStatsIndex;
+	S32								mDisplayMode;
+	LLTrace::TimeBlock*				mHoverID;
+	LLTrace::TimeBlock*				mHoverTimer;
+	LLRect							mToolTipRect,
+									mGraphRect,
+									mBarRect;
+	LLFrameTimer					mHighlightTimer;
+	LLTrace::PeriodicRecording		mRecording;
 };
 
 #endif
diff --git a/indra/newview/llscenemonitor.cpp b/indra/newview/llscenemonitor.cpp
index dccf8a2a17c2add661602d7b7c1da056c01a6eb2..3d9e0ab4c3a482a75c4a9fd9df425217f78ca20e 100644
--- a/indra/newview/llscenemonitor.cpp
+++ b/indra/newview/llscenemonitor.cpp
@@ -503,9 +503,9 @@ void LLSceneMonitor::dumpToFile(std::string file_name)
 	os << std::setprecision(3);
 
 	PeriodicRecording& scene_load_recording = mSceneLoadRecording.getAcceptedRecording();
-	const U32 frame_count = scene_load_recording.getNumPeriods();
+	const U32 frame_count = scene_load_recording.getNumRecordedPeriods();
 
-	LLUnit<LLUnits::Seconds, F64> frame_time;
+	LLUnit<F64, LLUnits::Seconds> frame_time;
 
 	os << "Stat";
 	for (S32 frame = 0; frame < frame_count; frame++)
diff --git a/indra/newview/lltexturefetch.h b/indra/newview/lltexturefetch.h
index 2530beb722ba410c18928a2c45f5bf0dde3534ac..7fc58e230cdbb6c42e504cb62c2190ef2bf91c33 100755
--- a/indra/newview/lltexturefetch.h
+++ b/indra/newview/lltexturefetch.h
@@ -330,7 +330,7 @@ class LLTextureFetch : public LLWorkerThread
 	LLTextureInfo mTextureInfo;
 
 	// XXX possible delete
-	LLUnit<LLUnits::Bits, U32> mHTTPTextureBits;												// Mfnq
+	LLUnit<U32, LLUnits::Bits> mHTTPTextureBits;												// Mfnq
 
 	// XXX possible delete
 	//debug use
diff --git a/indra/newview/lltextureview.cpp b/indra/newview/lltextureview.cpp
index c9ec5d9bf69a5fba126b97cae7c9c0d4e48bd224..766b66efa0ef7973766c3051f65ced0b056de5eb 100755
--- a/indra/newview/lltextureview.cpp
+++ b/indra/newview/lltextureview.cpp
@@ -507,17 +507,17 @@ class LLGLTexMemBar : public LLView
 
 void LLGLTexMemBar::draw()
 {
-	LLUnit<LLUnits::Mibibytes, S32> bound_mem = LLViewerTexture::sBoundTextureMemory;
- 	LLUnit<LLUnits::Mibibytes, S32> max_bound_mem = LLViewerTexture::sMaxBoundTextureMem;
-	LLUnit<LLUnits::Mibibytes, S32> total_mem = LLViewerTexture::sTotalTextureMemory;
-	LLUnit<LLUnits::Mibibytes, S32> max_total_mem = LLViewerTexture::sMaxTotalTextureMem;
+	LLUnit<S32, LLUnits::Mibibytes> bound_mem = LLViewerTexture::sBoundTextureMemory;
+ 	LLUnit<S32, LLUnits::Mibibytes> max_bound_mem = LLViewerTexture::sMaxBoundTextureMem;
+	LLUnit<S32, LLUnits::Mibibytes> total_mem = LLViewerTexture::sTotalTextureMemory;
+	LLUnit<S32, LLUnits::Mibibytes> max_total_mem = LLViewerTexture::sMaxTotalTextureMem;
 	F32 discard_bias = LLViewerTexture::sDesiredDiscardBias;
 	F32 cache_usage = (F32)LLTrace::Mibibytes(LLAppViewer::getTextureCache()->getUsage()).value() ;
 	F32 cache_max_usage = (F32)LLTrace::Mibibytes(LLAppViewer::getTextureCache()->getMaxUsage()).value() ;
 	S32 line_height = LLFontGL::getFontMonospace()->getLineHeight();
 	S32 v_offset = 0;//(S32)((texture_bar_height + 2.2f) * mTextureView->mNumTextureBars + 2.0f);
-	LLUnit<LLUnits::Bytes, F32> total_texture_downloaded = gTotalTextureData;
-	LLUnit<LLUnits::Bytes, F32> total_object_downloaded = gTotalObjectData;
+	LLUnit<F32, LLUnits::Bytes> total_texture_downloaded = gTotalTextureData;
+	LLUnit<F32, LLUnits::Bytes> total_object_downloaded = gTotalObjectData;
 	U32 total_http_requests = LLAppViewer::getTextureFetch()->getTotalNumHTTPRequests() ;
 	U32 total_active_cached_objects = LLWorld::getInstance()->getNumOfActiveCachedObjects();
 	U32 total_objects = gObjectList.getNumObjects();
diff --git a/indra/newview/llviewerassetstats.cpp b/indra/newview/llviewerassetstats.cpp
index 3a6ee636d48c8012f1c312adbd6e9b302273f625..dc4c9fe4ad9fc5975ea3f9b906cd20dc7d59a9d9 100755
--- a/indra/newview/llviewerassetstats.cpp
+++ b/indra/newview/llviewerassetstats.cpp
@@ -486,10 +486,10 @@ void LLViewerAssetStats::getStats(AssetStats& stats, bool compact_output)
 		grid_from_region_handle(it->first, &grid_x, &grid_y);
 		r.grid_x(grid_x);
 		r.grid_y(grid_y);
-		r.duration(LLUnit<LLUnits::Microseconds, F64>(rec.getDuration()).value());
+		r.duration(LLUnit<F64, LLUnits::Microseconds>(rec.getDuration()).value());
 	}
 
-	stats.duration(mCurRecording ? LLUnit<LLUnits::Microseconds, F64>(mCurRecording->getDuration()).value() : 0.0);
+	stats.duration(mCurRecording ? LLUnit<F64, LLUnits::Microseconds>(mCurRecording->getDuration()).value() : 0.0);
 }
 
 LLSD LLViewerAssetStats::asLLSD(bool compact_output)
diff --git a/indra/newview/llviewerassetstats.h b/indra/newview/llviewerassetstats.h
index 1a8770f8a7f56ef75682504d3ccb6dc9b53b4ef1..e03b7c53a6a9deb33140f2f7d86dcc731f59b7be 100755
--- a/indra/newview/llviewerassetstats.h
+++ b/indra/newview/llviewerassetstats.h
@@ -83,7 +83,7 @@ class LLViewerAssetStats : public LLStopWatchControlsMixin<LLViewerAssetStats>
 	 * for compatibility with the pre-existing timestamp on the texture
 	 * fetcher class, LLTextureFetch.
 	 */
-	typedef LLUnit<LLUnits::Microseconds, U64> duration_t;
+	typedef LLUnit<U64, LLUnits::Microseconds> duration_t;
 
 	/**
 	 * Type for the region identifier used in stats.  Currently uses
diff --git a/indra/newview/llviewermessage.cpp b/indra/newview/llviewermessage.cpp
index f2a3ffc3dc7bd2659fee7d479d11c9ed1fb00190..0309acdad2410988e59408f0a45a4a3b0481bf32 100755
--- a/indra/newview/llviewermessage.cpp
+++ b/indra/newview/llviewermessage.cpp
@@ -4480,18 +4480,18 @@ void send_agent_update(BOOL force_send, BOOL send_reliable)
 
 // *TODO: Remove this dependency, or figure out a better way to handle
 // this hack.
-extern LLUnit<LLUnits::Bits, U32> gObjectData;
+extern LLUnit<U32, LLUnits::Bits> gObjectData;
 
 void process_object_update(LLMessageSystem *mesgsys, void **user_data)
 {	
 	// Update the data counters
 	if (mesgsys->getReceiveCompressedSize())
 	{
-		gObjectData += (LLUnit<LLUnits::Bytes, U32>)mesgsys->getReceiveCompressedSize();
+		gObjectData += (LLUnit<U32, LLUnits::Bytes>)mesgsys->getReceiveCompressedSize();
 	}
 	else
 	{
-		gObjectData += (LLUnit<LLUnits::Bytes, U32>)mesgsys->getReceiveSize();
+		gObjectData += (LLUnit<U32, LLUnits::Bytes>)mesgsys->getReceiveSize();
 	}
 
 	// Update the object...
@@ -4503,11 +4503,11 @@ void process_compressed_object_update(LLMessageSystem *mesgsys, void **user_data
 	// Update the data counters
 	if (mesgsys->getReceiveCompressedSize())
 	{
-		gObjectData += (LLUnit<LLUnits::Bytes, U32>)mesgsys->getReceiveCompressedSize();
+		gObjectData += (LLUnit<U32, LLUnits::Bytes>)mesgsys->getReceiveCompressedSize();
 	}
 	else
 	{
-		gObjectData += (LLUnit<LLUnits::Bytes, U32>)mesgsys->getReceiveSize();
+		gObjectData += (LLUnit<U32, LLUnits::Bytes>)mesgsys->getReceiveSize();
 	}
 
 	// Update the object...
@@ -4519,11 +4519,11 @@ void process_cached_object_update(LLMessageSystem *mesgsys, void **user_data)
 	// Update the data counters
 	if (mesgsys->getReceiveCompressedSize())
 	{
-		gObjectData += (LLUnit<LLUnits::Bytes, U32>)mesgsys->getReceiveCompressedSize();
+		gObjectData += (LLUnit<U32, LLUnits::Bytes>)mesgsys->getReceiveCompressedSize();
 	}
 	else
 	{
-		gObjectData += (LLUnit<LLUnits::Bytes, U32>)mesgsys->getReceiveSize();
+		gObjectData += (LLUnit<U32, LLUnits::Bytes>)mesgsys->getReceiveSize();
 	}
 
 	// Update the object...
@@ -4535,11 +4535,11 @@ void process_terse_object_update_improved(LLMessageSystem *mesgsys, void **user_
 {
 	if (mesgsys->getReceiveCompressedSize())
 	{
-		gObjectData += (LLUnit<LLUnits::Bytes, U32>)mesgsys->getReceiveCompressedSize();
+		gObjectData += (LLUnit<U32, LLUnits::Bytes>)mesgsys->getReceiveCompressedSize();
 	}
 	else
 	{
-		gObjectData += (LLUnit<LLUnits::Bytes, U32>)mesgsys->getReceiveSize();
+		gObjectData += (LLUnit<U32, LLUnits::Bytes>)mesgsys->getReceiveSize();
 	}
 
 	gObjectList.processCompressedObjectUpdate(mesgsys, user_data, OUT_TERSE_IMPROVED);
diff --git a/indra/newview/llviewerstats.cpp b/indra/newview/llviewerstats.cpp
index be477f7f9aa0f368eaa2af07d422aa80c1d5e7e9..7ddee48b3834103f7690a512187ffce5bba647cc 100755
--- a/indra/newview/llviewerstats.cpp
+++ b/indra/newview/llviewerstats.cpp
@@ -290,13 +290,13 @@ F32		gAveLandCompression = 0.f,
 		gWorstLandCompression = 0.f, 
 		gWorstWaterCompression = 0.f;
 
-LLUnit<LLUnits::Bytes, U32>		gTotalWorldData = 0, 
+LLUnit<U32, LLUnits::Bytes>		gTotalWorldData = 0, 
 						gTotalObjectData = 0, 
 						gTotalTextureData = 0;
 U32						gSimPingCount = 0;
-LLUnit<LLUnits::Bits, U32>		gObjectData = 0;
+LLUnit<U32, LLUnits::Bits>		gObjectData = 0;
 F32		gAvgSimPing = 0.f;
-LLUnit<LLUnits::Bytes, U32>		gTotalTextureBytesPerBoostLevel[LLViewerTexture::MAX_GL_IMAGE_CATEGORY] = {0};
+LLUnit<U32, LLUnits::Bytes>		gTotalTextureBytesPerBoostLevel[LLViewerTexture::MAX_GL_IMAGE_CATEGORY] = {0};
 
 extern U32  gVisCompared;
 extern U32  gVisTested;
@@ -334,8 +334,8 @@ void update_statistics()
 
 	typedef LLInstanceTracker<LLTrace::TraceType<LLTrace::TimeBlockAccumulator>, std::string> trace_type_t;
 
-	LLUnit<LLUnits::Seconds, F64> idle_secs = last_frame_recording.getSum(*trace_type_t::getInstance("Idle"));
-	LLUnit<LLUnits::Seconds, F64> network_secs = last_frame_recording.getSum(*trace_type_t::getInstance("Network"));
+	LLUnit<F64, LLUnits::Seconds> idle_secs = last_frame_recording.getSum(*trace_type_t::getInstance("Idle"));
+	LLUnit<F64, LLUnits::Seconds> network_secs = last_frame_recording.getSum(*trace_type_t::getInstance("Network"));
 
 	record(LLStatViewer::FRAME_STACKTIME, last_frame_recording.getSum(*trace_type_t::getInstance("Frame")));
 	record(LLStatViewer::UPDATE_STACKTIME, idle_secs - network_secs);
diff --git a/indra/newview/llviewerstats.h b/indra/newview/llviewerstats.h
index bfba7bca9a4e5bc3408a36963d98fd270aa8e0db..4e48a61264dbdb7e06aeb97896debd40cd1c00f2 100755
--- a/indra/newview/llviewerstats.h
+++ b/indra/newview/llviewerstats.h
@@ -331,7 +331,7 @@ void update_statistics();
 void send_stats();
 
 extern LLFrameTimer gTextureTimer;
-extern LLUnit<LLUnits::Bytes, U32>	gTotalTextureData;
-extern LLUnit<LLUnits::Bytes, U32>  gTotalObjectData;
-extern LLUnit<LLUnits::Bytes, U32>  gTotalTextureBytesPerBoostLevel[] ;
+extern LLUnit<U32, LLUnits::Bytes>	gTotalTextureData;
+extern LLUnit<U32, LLUnits::Bytes>  gTotalObjectData;
+extern LLUnit<U32, LLUnits::Bytes>  gTotalTextureBytesPerBoostLevel[] ;
 #endif // LL_LLVIEWERSTATS_H
diff --git a/indra/newview/llviewertexture.cpp b/indra/newview/llviewertexture.cpp
index e0a88bfad62a7b1ee2cc2dbf5ef7bc9655610434..f468df0674ea4802a93002e87cfb78e694ecfb38 100755
--- a/indra/newview/llviewertexture.cpp
+++ b/indra/newview/llviewertexture.cpp
@@ -66,8 +66,8 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 // extern
-const LLUnit<LLUnits::Mibibytes, S32> gMinVideoRam = 32;
-const LLUnit<LLUnits::Mibibytes, S32> gMaxVideoRam = 512;
+const LLUnit<S32, LLUnits::Mibibytes> gMinVideoRam = 32;
+const LLUnit<S32, LLUnits::Mibibytes> gMaxVideoRam = 512;
 
 
 // statics
@@ -88,11 +88,11 @@ S32 LLViewerTexture::sAuxCount = 0;
 LLFrameTimer LLViewerTexture::sEvaluationTimer;
 F32 LLViewerTexture::sDesiredDiscardBias = 0.f;
 F32 LLViewerTexture::sDesiredDiscardScale = 1.1f;
-LLUnit<LLUnits::Bytes, S32> LLViewerTexture::sBoundTextureMemory = 0;
-LLUnit<LLUnits::Bytes, S32> LLViewerTexture::sTotalTextureMemory = 0;
-LLUnit<LLUnits::Mibibytes, S32> LLViewerTexture::sMaxBoundTextureMem = 0;
-LLUnit<LLUnits::Mibibytes, S32> LLViewerTexture::sMaxTotalTextureMem = 0;
-LLUnit<LLUnits::Bytes, S32> LLViewerTexture::sMaxDesiredTextureMem = 0 ;
+LLUnit<S32, LLUnits::Bytes> LLViewerTexture::sBoundTextureMemory = 0;
+LLUnit<S32, LLUnits::Bytes> LLViewerTexture::sTotalTextureMemory = 0;
+LLUnit<S32, LLUnits::Mibibytes> LLViewerTexture::sMaxBoundTextureMem = 0;
+LLUnit<S32, LLUnits::Mibibytes> LLViewerTexture::sMaxTotalTextureMem = 0;
+LLUnit<S32, LLUnits::Bytes> LLViewerTexture::sMaxDesiredTextureMem = 0 ;
 S8  LLViewerTexture::sCameraMovingDiscardBias = 0 ;
 F32 LLViewerTexture::sCameraMovingBias = 0.0f ;
 S32 LLViewerTexture::sMaxSculptRez = 128 ; //max sculpt image size
@@ -542,7 +542,7 @@ void LLViewerTexture::updateClass(const F32 velocity, const F32 angular_velocity
 		sTotalTextureMemory >= sMaxTotalTextureMem)
 	{
 		//when texture memory overflows, lower down the threshold to release the textures more aggressively.
-		sMaxDesiredTextureMem = llmin(sMaxDesiredTextureMem * 0.75f, LLUnit<LLUnits::Bytes, S32>(gMaxVideoRam));
+		sMaxDesiredTextureMem = llmin(sMaxDesiredTextureMem * 0.75f, LLUnit<S32, LLUnits::Bytes>(gMaxVideoRam));
 	
 		// If we are using more texture memory than we should,
 		// scale up the desired discard level
diff --git a/indra/newview/llviewertexture.h b/indra/newview/llviewertexture.h
index e939731cf20d8a28927963ad269ecfffe577c2dd..529b812f41de7069c09017d4bc5c7362b9edc460 100755
--- a/indra/newview/llviewertexture.h
+++ b/indra/newview/llviewertexture.h
@@ -39,8 +39,8 @@
 #include <map>
 #include <list>
 
-extern const LLUnit<LLUnits::Mibibytes, S32> gMinVideoRam;
-extern const LLUnit<LLUnits::Mibibytes, S32> gMaxVideoRam;
+extern const LLUnit<S32, LLUnits::Mibibytes> gMinVideoRam;
+extern const LLUnit<S32, LLUnits::Mibibytes> gMaxVideoRam;
 
 class LLFace;
 class LLImageGL ;
@@ -205,11 +205,11 @@ class LLViewerTexture : public LLGLTexture
 	static LLFrameTimer sEvaluationTimer;
 	static F32 sDesiredDiscardBias;
 	static F32 sDesiredDiscardScale;
-	static LLUnit<LLUnits::Bytes, S32> sBoundTextureMemory;
-	static LLUnit<LLUnits::Bytes, S32> sTotalTextureMemory;
-	static LLUnit<LLUnits::Mibibytes, S32> sMaxBoundTextureMem;
-	static LLUnit<LLUnits::Mibibytes, S32> sMaxTotalTextureMem;
-	static LLUnit<LLUnits::Bytes, S32> sMaxDesiredTextureMem ;
+	static LLUnit<S32, LLUnits::Bytes> sBoundTextureMemory;
+	static LLUnit<S32, LLUnits::Bytes> sTotalTextureMemory;
+	static LLUnit<S32, LLUnits::Mibibytes> sMaxBoundTextureMem;
+	static LLUnit<S32, LLUnits::Mibibytes> sMaxTotalTextureMem;
+	static LLUnit<S32, LLUnits::Bytes> sMaxDesiredTextureMem ;
 	static S8  sCameraMovingDiscardBias;
 	static F32 sCameraMovingBias;
 	static S32 sMaxSculptRez ;
diff --git a/indra/newview/llviewerwindow.cpp b/indra/newview/llviewerwindow.cpp
index 349849a26745463212de7478900e966f2eb5b836..97f7baa98d69999ed0051148219b5b180b2584cb 100755
--- a/indra/newview/llviewerwindow.cpp
+++ b/indra/newview/llviewerwindow.cpp
@@ -738,7 +738,7 @@ class LLDebugText
 			{
 				if(gTotalTextureBytesPerBoostLevel[i] > 0)
 				{
-					addText(xpos, ypos, llformat("Boost_Level %d:  %.3f MB", i, LLUnit<LLUnits::Mibibytes, F32>(gTotalTextureBytesPerBoostLevel[i]).value()));
+					addText(xpos, ypos, llformat("Boost_Level %d:  %.3f MB", i, LLUnit<F32, LLUnits::Mibibytes>(gTotalTextureBytesPerBoostLevel[i]).value()));
 					ypos += y_inc;
 				}
 			}
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index dd5c153d55b2b05536a0446ba94ffd307c431c49..7cf30e1661f261b38ace33737c4cd89d52123f1a 100755
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -2792,7 +2792,7 @@ void LLPipeline::updateGeom(F32 max_dtime)
 		
 	S32 count = 0;
 	
-	max_dtime = llmax(update_timer.getElapsedTimeF32()+0.001f, LLUnitImplicit<LLUnits::Seconds, F32>(max_dtime));
+	max_dtime = llmax(update_timer.getElapsedTimeF32()+0.001f, LLUnitImplicit<F32, LLUnits::Seconds>(max_dtime));
 	LLSpatialGroup* last_group = NULL;
 	LLSpatialBridge* last_bridge = NULL;