Skip to content
Snippets Groups Projects
Commit 1e49e9ae authored by Brad Payne (Vir Linden)'s avatar Brad Payne (Vir Linden)
Browse files

Moving timer code for AMD issue diagnosis

parent ed64630a
No related branches found
No related tags found
No related merge requests found
......@@ -27,155 +27,10 @@
#ifndef LL_FASTTIMER_H
#define LL_FASTTIMER_H
// Temporarily(?) de-inlined these functions to simplify diagnosis of problems.
// Implementation of getCPUClockCount32() and getCPUClockCount64 are now in llfastertimer_class.cpp.
// pull in the actual class definition
#include "llfasttimer_class.h"
//
// Important note: These implementations must be FAST!
//
#if LL_WINDOWS
//
// Windows implementation of CPU clock
//
//
// NOTE: put back in when we aren't using platform sdk anymore
//
// because MS has different signatures for these functions in winnt.h
// need to rename them to avoid conflicts
//#define _interlockedbittestandset _renamed_interlockedbittestandset
//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
//#include <intrin.h>
//#undef _interlockedbittestandset
//#undef _interlockedbittestandreset
//inline U32 LLFastTimer::getCPUClockCount32()
//{
// U64 time_stamp = __rdtsc();
// return (U32)(time_stamp >> 8);
//}
//
//// return full timer value, *not* shifted by 8 bits
//inline U64 LLFastTimer::getCPUClockCount64()
//{
// return __rdtsc();
//}
// shift off lower 8 bits for lower resolution but longer term timing
// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
#ifdef USE_RDTSC
inline U32 LLFastTimer::getCPUClockCount32()
{
U32 ret_val;
__asm
{
_emit 0x0f
_emit 0x31
shr eax,8
shl edx,24
or eax, edx
mov dword ptr [ret_val], eax
}
return ret_val;
}
// return full timer value, *not* shifted by 8 bits
inline U64 LLFastTimer::getCPUClockCount64()
{
U64 ret_val;
__asm
{
_emit 0x0f
_emit 0x31
mov eax,eax
mov edx,edx
mov dword ptr [ret_val+4], edx
mov dword ptr [ret_val], eax
}
return ret_val;
}
#else
LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp
// These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures.
inline U32 LLFastTimer::getCPUClockCount32()
{
return (U32)(get_clock_count()>>8);
}
inline U64 LLFastTimer::getCPUClockCount64()
{
return get_clock_count();
}
#endif
#endif
#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
//
// Linux and Solaris implementation of CPU clock - non-x86.
// This is accurate but SLOW! Only use out of desperation.
//
// Try to use the MONOTONIC clock if available, this is a constant time counter
// with nanosecond resolution (but not necessarily accuracy) and attempts are
// made to synchronize this value between cores at kernel start. It should not
// be affected by CPU frequency. If not available use the REALTIME clock, but
// this may be affected by NTP adjustments or other user activity affecting
// the system time.
inline U64 LLFastTimer::getCPUClockCount64()
{
struct timespec tp;
#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
#endif
clock_gettime(CLOCK_REALTIME,&tp);
return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;
}
inline U32 LLFastTimer::getCPUClockCount32()
{
return (U32)(LLFastTimer::getCPUClockCount64() >> 8);
}
#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
//
// Mac+Linux+Solaris FAST x86 implementation of CPU clock
inline U32 LLFastTimer::getCPUClockCount32()
{
U64 x;
__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
return (U32)(x >> 8);
}
inline U64 LLFastTimer::getCPUClockCount64()
{
U64 x;
__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
return x;
}
#endif
#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__)))
//
// Mac PPC (deprecated) implementation of CPU clock
//
// Just use gettimeofday implementation for now
inline U32 LLFastTimer::getCPUClockCount32()
{
return (U32)(get_clock_count()>>8);
}
inline U64 LLFastTimer::getCPUClockCount64()
{
return get_clock_count();
}
#endif
#endif // LL_LLFASTTIMER_H
......@@ -35,7 +35,9 @@
#include <boost/bind.hpp>
#if LL_WINDOWS
#include "lltimer.h"
#elif LL_LINUX || LL_SOLARIS
#include <sys/time.h>
#include <sched.h>
......@@ -481,6 +483,19 @@ void LLFastTimer::NamedTimer::resetFrame()
{
if (sLog)
{ //output current frame counts to performance log
static S32 call_count = 0;
if (call_count % 100 == 0)
{
llinfos << "countsPerSecond (32 bit): " << countsPerSecond() << llendl;
llinfos << "get_clock_count (64 bit): " << get_clock_count() << llendl;
llinfos << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << llendl;
llinfos << "getCPUClockCount32() " << getCPUClockCount32() << llendl;
llinfos << "getCPUClockCount64() " << getCPUClockCount64() << llendl;
llinfos << "elapsed sec " << ((F64)getCPUClockCount64())/((F64)LLProcessorInfo().getCPUFrequency()*1000000.0) << llendl;
}
call_count++;
F64 iclock_freq = 1000.0 / countsPerSecond(); // good place to calculate clock frequency
F64 total_time = 0;
......@@ -762,3 +777,152 @@ LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state)
//////////////////////////////////////////////////////////////////////////////
//
// Important note: These implementations must be FAST!
//
#if LL_WINDOWS
//
// Windows implementation of CPU clock
//
//
// NOTE: put back in when we aren't using platform sdk anymore
//
// because MS has different signatures for these functions in winnt.h
// need to rename them to avoid conflicts
//#define _interlockedbittestandset _renamed_interlockedbittestandset
//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
//#include <intrin.h>
//#undef _interlockedbittestandset
//#undef _interlockedbittestandreset
//inline U32 LLFastTimer::getCPUClockCount32()
//{
// U64 time_stamp = __rdtsc();
// return (U32)(time_stamp >> 8);
//}
//
//// return full timer value, *not* shifted by 8 bits
//inline U64 LLFastTimer::getCPUClockCount64()
//{
// return __rdtsc();
//}
// shift off lower 8 bits for lower resolution but longer term timing
// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
#ifdef USE_RDTSC
inline U32 LLFastTimer::getCPUClockCount32()
{
U32 ret_val;
__asm
{
_emit 0x0f
_emit 0x31
shr eax,8
shl edx,24
or eax, edx
mov dword ptr [ret_val], eax
}
return ret_val;
}
// return full timer value, *not* shifted by 8 bits
inline U64 LLFastTimer::getCPUClockCount64()
{
U64 ret_val;
__asm
{
_emit 0x0f
_emit 0x31
mov eax,eax
mov edx,edx
mov dword ptr [ret_val+4], edx
mov dword ptr [ret_val], eax
}
return ret_val;
}
#else
//LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp
// These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures.
inline U32 LLFastTimer::getCPUClockCount32()
{
return (U32)(get_clock_count()>>8);
}
inline U64 LLFastTimer::getCPUClockCount64()
{
return get_clock_count();
}
#endif
#endif
#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
//
// Linux and Solaris implementation of CPU clock - non-x86.
// This is accurate but SLOW! Only use out of desperation.
//
// Try to use the MONOTONIC clock if available, this is a constant time counter
// with nanosecond resolution (but not necessarily accuracy) and attempts are
// made to synchronize this value between cores at kernel start. It should not
// be affected by CPU frequency. If not available use the REALTIME clock, but
// this may be affected by NTP adjustments or other user activity affecting
// the system time.
inline U64 LLFastTimer::getCPUClockCount64()
{
struct timespec tp;
#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
#endif
clock_gettime(CLOCK_REALTIME,&tp);
return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;
}
inline U32 LLFastTimer::getCPUClockCount32()
{
return (U32)(LLFastTimer::getCPUClockCount64() >> 8);
}
#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
//
// Mac+Linux+Solaris FAST x86 implementation of CPU clock
inline U32 LLFastTimer::getCPUClockCount32()
{
U64 x;
__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
return (U32)(x >> 8);
}
inline U64 LLFastTimer::getCPUClockCount64()
{
U64 x;
__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
return x;
}
#endif
#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__)))
//
// Mac PPC (deprecated) implementation of CPU clock
//
// Just use gettimeofday implementation for now
inline U32 LLFastTimer::getCPUClockCount32()
{
return (U32)(get_clock_count()>>8);
}
inline U64 LLFastTimer::getCPUClockCount64()
{
return get_clock_count();
}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment