Newer
Older
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
}
bool bind(U32 index)
{
if (texUnit) // should always be there with dummy (-1), but just in case
{
return texUnit->bindManual(LLTexUnit::TT_TEXTURE, source[index]);
}
return false;
}
private:
// capture which LLTexUnit we're going to use
LLTexUnit* texUnit;
// use std::vector for implicit resource management
std::vector<U32> source;
};
class ShaderBinder
{
public:
ShaderBinder(LLGLSLShader& shader) :
mShader(shader)
{
mShader.bind();
}
~ShaderBinder()
{
mShader.unbind();
}
private:
LLGLSLShader& mShader;
};
//-----------------------------------------------------------------------------
// gpu_benchmark()
//-----------------------------------------------------------------------------
F32 gpu_benchmark()
if (!gGLManager.mHasShaderObjects || !gGLManager.mHasTimerQuery)
{ // don't bother benchmarking the fixed function
// or venerable drivers which don't support accurate timing anyway
// and are likely to be correctly identified by the GPU table already.
return -1.f;
if (gBenchmarkProgram.mProgramObject == 0)
{
LLViewerShaderMgr::instance()->initAttribsAndUniforms();
gBenchmarkProgram.mName = "Benchmark Shader";
gBenchmarkProgram.mFeatures.attachNothing = true;
gBenchmarkProgram.mShaderFiles.clear();
gBenchmarkProgram.mShaderFiles.push_back(std::make_pair("interface/benchmarkV.glsl", GL_VERTEX_SHADER));
gBenchmarkProgram.mShaderFiles.push_back(std::make_pair("interface/benchmarkF.glsl", GL_FRAGMENT_SHADER));
gBenchmarkProgram.mShaderLevel = 1;
if (!gBenchmarkProgram.createShader(NULL, NULL))
{
return -1.f;
}
LLGLDisable blend(GL_BLEND);
//measure memory bandwidth by:
// - allocating a batch of textures and render targets
// - rendering those textures to those render targets
// - recording time taken
// - taking the median time for a given number of samples
//resolution of textures/render targets
const U32 res = 1024;
//number of textures
const U32 count = 32;
//number of samples to take
const S32 samples = 64;
//time limit, allocation operations shouldn't take longer then 30 seconds, same for actual benchmark.
const F32 time_limit = 30;
ShaderProfileHelper initProfile;
std::vector<LLRenderTarget> dest(count);
TextureHolder texHolder(0, count);
std::vector<F32> results;
//build a random texture
davep
committed
U8* pixels = new U8[res*res*4];
for (U32 i = 0; i < res*res*4; ++i)
{
pixels[i] = (U8) ll_rand(255);
}
gGL.setColorMask(true, true);
LLGLDepthTest depth(GL_FALSE);
LLTimer alloc_timer;
alloc_timer.start();
for (U32 i = 0; i < count; ++i)
{
//allocate render targets and textures
if (!dest[i].allocate(res, res, GL_RGBA, false, false, LLTexUnit::TT_TEXTURE, true))
{
LL_WARNS("Benchmark") << "Failed to allocate render target." << LL_ENDL;
// abandon the benchmark test
delete[] pixels;
return -1.f;
}
dest[i].bindTarget();
dest[i].clear();
dest[i].flush();
if (!texHolder.bind(i))
{
// can use a dummy value mDummyTexUnit = new LLTexUnit(-1);
LL_WARNS("Benchmark") << "Failed to bind tex unit." << LL_ENDL;
// abandon the benchmark test
delete[] pixels;
return -1.f;
}
LLImageGL::setManualImage(GL_TEXTURE_2D, 0, GL_RGBA, res,res,GL_RGBA, GL_UNSIGNED_BYTE, pixels);
if (alloc_timer.getElapsedTimeF32() > time_limit)
{
// abandon the benchmark test
LL_WARNS("Benchmark") << "Allocation operation took longer then 30 seconds, stopping." << LL_ENDL;
delete[] pixels;
return -1.f;
}
davep
committed
delete [] pixels;
//make a dummy triangle to draw with
LLPointer<LLVertexBuffer> buff = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0, GL_STREAM_DRAW);
if (!buff->allocateBuffer(3, 0, true))
{
LL_WARNS("Benchmark") << "Failed to allocate buffer during benchmark." << LL_ENDL;
// abandon the benchmark test
return -1.f;
}
LLStrider<LLVector3> v;
LLStrider<LLVector2> tc;
if (! buff->getVertexStrider(v))
LL_WARNS("Benchmark") << "GL LLVertexBuffer::getVertexStrider() returned false, "
<< "buff->getMappedData() is"
<< (buff->getMappedData()? " not" : "")
<< " NULL" << LL_ENDL;
// abandon the benchmark test
return -1.f;
Graham Linden
committed
// generate dummy triangle
v[0].set(-1, 1, 0);
v[1].set(-1, -3, 0);
v[2].set(3, 1, 0);
Graham Linden
committed
// ensure matched pair of bind() and unbind() calls
ShaderBinder binder(gBenchmarkProgram);
David Parks
committed
buff->setBuffer(LLVertexBuffer::MAP_VERTEX);
glFinish();
F32 time_passed = 0; // seconds
for (S32 c = -1; c < samples && time_passed < time_limit; ++c)
{
LLTimer timer;
timer.start();
for (U32 i = 0; i < count; ++i)
{
dest[i].bindTarget();
texHolder.bind(i);
buff->drawArrays(LLRender::TRIANGLES, 0, 3);
dest[i].flush();
}
//wait for current batch of copies to finish
F32 time = timer.getElapsedTimeF32();
if (c >= 0) // <-- ignore the first sample as it tends to be artificially slow
{
//store result in gigabytes per second
F32 gb = (F32) ((F64) (res*res*8*count))/(1000000000);
F32 gbps = gb/time;
results.push_back(gbps);
}
}
std::sort(results.begin(), results.end());
F32 gbps = results[results.size()/2];
LL_INFOS("Benchmark") << "Memory bandwidth is " << llformat("%.3f", gbps) << "GB/sec according to CPU timers, " << (F32)results.size() << " tests took " << time_passed << " seconds" << LL_ENDL;
davep
committed
#if LL_DARWIN
if (gbps > 512.f)
{
LL_WARNS("Benchmark") << "Memory bandwidth is improbably high and likely incorrect; discarding result." << LL_ENDL;
davep
committed
//OSX is probably lying, discard result
return -1.f;
davep
committed
}
#endif
F32 ms = gBenchmarkProgram.mTimeElapsed/1000000.f;
F32 seconds = ms/1000.f;
David Parks
committed
F64 samples_drawn = res*res*count*results.size();
F32 samples_sec = (samples_drawn/1000000000.0)/seconds;
gbps = samples_sec*8;
David Parks
committed
LL_INFOS("Benchmark") << "Memory bandwidth is " << llformat("%.3f", gbps) << "GB/sec according to ARB_timer_query, total time " << seconds << " seconds" << LL_ENDL;
return gbps;