diff --git a/indra/cmake/00-Common.cmake b/indra/cmake/00-Common.cmake
index 92291e26763661ae76dc4349809919c25ba4f7ba..85e1f0115e1d40342ad80105c4b7c62473abbf92 100644
--- a/indra/cmake/00-Common.cmake
+++ b/indra/cmake/00-Common.cmake
@@ -331,4 +331,10 @@ else()
   add_definitions(-URELEASE_SHOW_ASSERT)
 endif()
 
+option(DISABLE_TIMING "Disable all but core fast timers" OFF)
+if(DISABLE_TIMING)
+  add_definitions(-DAL_DISABLE_TIMERS=1)
+endif()
+
+
 endif(NOT DEFINED ${CMAKE_CURRENT_LIST_FILE}_INCLUDED)
diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
index 5628a05b00d1d94f407192a39644d5d61aee6c98..fd160b9df9369afacf3bb7ab54343d00dcfaab20 100644
--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@@ -38,7 +38,13 @@
 #define LL_FAST_TIMER_ON 1
 #define LL_FASTTIMER_USE_RDTSC 1
 
+#if AL_ENABLE_ALL_TIMERS
 #define LL_RECORD_BLOCK_TIME(timer_stat) const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(timer_stat)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__);
+#else
+#define LL_RECORD_BLOCK_TIME(timer_stat)
+#endif
+
+#define LL_ALWAYS_RECORD_BLOCK_TIME(timer_stat) const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(timer_stat)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__);
 
 namespace LLTrace
 {
@@ -65,46 +71,20 @@ class BlockTimer
 	//
 	// Windows implementation of CPU clock
 	//
-
-	//
-	// NOTE: put back in when we aren't using platform sdk anymore
-	//
-	// because MS has different signatures for these functions in winnt.h
-	// need to rename them to avoid conflicts
-	//#define _interlockedbittestandset _renamed_interlockedbittestandset
-	//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
-	//#include <intrin.h>
-	//#undef _interlockedbittestandset
-	//#undef _interlockedbittestandreset
-
-	//inline U32 getCPUClockCount32()
-	//{
-	//	U64 time_stamp = __rdtsc();
-	//	return (U32)(time_stamp >> 8);
-	//}
-	//
-	//// return full timer value, *not* shifted by 8 bits
-	//inline U64 getCPUClockCount64()
-	//{
-	//	return __rdtsc();
-	//}
-
-	
+#if LL_FASTTIMER_USE_RDTSC
 
 	// shift off lower 8 bits for lower resolution but longer term timing
 	// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
-#if LL_FASTTIMER_USE_RDTSC
 	static U32 getCPUClockCount32()
 	{
-		unsigned __int64 val = __rdtsc();
-		val = val >> 8;
-		return static_cast<U32>(val);
+		U64 time_stamp = __rdtsc() >> 8U;
+		return static_cast<U32>(time_stamp);
 	}
 
 	// return full timer value, *not* shifted by 8 bits
 	static U64 getCPUClockCount64()
 	{
-		return static_cast<U64>( __rdtsc() );
+		return static_cast<U64>(__rdtsc());
 	}
 
 #else
@@ -112,7 +92,7 @@ class BlockTimer
 	// These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures.
 	static U32 getCPUClockCount32()
 	{
-		return (U32)(get_clock_count()>>8);
+		return (U32)(get_clock_count() >> 8);
 	}
 
 	static U64 getCPUClockCount64()
diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp
index eb81dfe510404245c20a58d8dc2d5e2ad6896395..03cc4af6b50edd0c3c90c90e22d87c84a8eacdb1 100644
--- a/indra/newview/llappviewer.cpp
+++ b/indra/newview/llappviewer.cpp
@@ -1396,7 +1396,7 @@ bool LLAppViewer::doFrame()
 	LLEventPump& mainloop(LLEventPumps::instance().obtain("mainloop"));
 	LLSD newFrame;
 
-	LL_RECORD_BLOCK_TIME(FTM_FRAME);
+	LL_ALWAYS_RECORD_BLOCK_TIME(FTM_FRAME);
 	LLTrace::BlockTimer::processTimes();
 	LLTrace::get_frame_recording().nextPeriod();
 	LLTrace::BlockTimer::logStats();
@@ -1470,7 +1470,7 @@ bool LLAppViewer::doFrame()
 			{
 				pauseMainloopTimeout(); // *TODO: Remove. Messages shouldn't be stalling for 20+ seconds!
 
-				LL_RECORD_BLOCK_TIME(FTM_IDLE);
+				LL_ALWAYS_RECORD_BLOCK_TIME(FTM_IDLE);
 				idle();
 
 				resumeMainloopTimeout();
@@ -1506,7 +1506,7 @@ bool LLAppViewer::doFrame()
 
 		// Sleep and run background threads
 		{
-			LL_RECORD_BLOCK_TIME(FTM_SLEEP);
+			LL_ALWAYS_RECORD_BLOCK_TIME(FTM_SLEEP);
 
 			// yield some time to the os based on command line option
 			static LLCachedControl<S32> yield_time(gSavedSettings, "YieldTime", -1);
diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp
index 74bc476d2dea80d07310405bf3043dd61ac6b7a8..8df373dd5e1439c863667a8f1ae9acd6962217b9 100644
--- a/indra/newview/llviewerdisplay.cpp
+++ b/indra/newview/llviewerdisplay.cpp
@@ -282,7 +282,7 @@ static std::string STR_DISPLAY_DONE("Display:Done");
 // Paint the display!
 void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)
 {
-	LL_RECORD_BLOCK_TIME(FTM_RENDER);
+	LL_ALWAYS_RECORD_BLOCK_TIME(FTM_RENDER);
 
 	if (gWindowResized)
 	{ //skip render on frames where window has been resized