llglsandbox.cpp

	}

	bool bind(U32 index)
	{
		if (texUnit) // should always be there with dummy (-1), but just in case
		{
			return texUnit->bindManual(LLTexUnit::TT_TEXTURE, source[index]);
		}
		return false;
	}

private:
	// capture which LLTexUnit we're going to use
	LLTexUnit* texUnit;

	// use std::vector for implicit resource management
	std::vector<U32> source;
};

class ShaderBinder
{
public:
	ShaderBinder(LLGLSLShader& shader) :
		mShader(shader)
	{
		mShader.bind();
	}
	~ShaderBinder()
	{
		mShader.unbind();
	}

private:
	LLGLSLShader& mShader;
};


//-----------------------------------------------------------------------------
// gpu_benchmark()
//-----------------------------------------------------------------------------
F32 gpu_benchmark()
{
	if (!gGLManager.mHasShaderObjects || !gGLManager.mHasTimerQuery)
	{ // don't bother benchmarking the fixed function
      // or venerable drivers which don't support accurate timing anyway
      // and are likely to be correctly identified by the GPU table already.
		return -1.f;
	}

    if (gBenchmarkProgram.mProgramObject == 0)
	{
		LLViewerShaderMgr::instance()->initAttribsAndUniforms();

		gBenchmarkProgram.mName = "Benchmark Shader";
		gBenchmarkProgram.mFeatures.attachNothing = true;
		gBenchmarkProgram.mShaderFiles.clear();
		gBenchmarkProgram.mShaderFiles.push_back(std::make_pair("interface/benchmarkV.glsl", GL_VERTEX_SHADER));
		gBenchmarkProgram.mShaderFiles.push_back(std::make_pair("interface/benchmarkF.glsl", GL_FRAGMENT_SHADER));
		gBenchmarkProgram.mShaderLevel = 1;
		if (!gBenchmarkProgram.createShader(NULL, NULL))
		{
			return -1.f;
		}
	}

	LLGLDisable blend(GL_BLEND);
	
	//measure memory bandwidth by:
	// - allocating a batch of textures and render targets
	// - rendering those textures to those render targets
	// - recording time taken
	// - taking the median time for a given number of samples
	
	//resolution of textures/render targets
	const U32 res = 1024;
	
	//number of textures
	const U32 count = 32;

	//number of samples to take
	const S32 samples = 64;
		
	//time limit, allocation operations shouldn't take longer then 30 seconds, same for actual benchmark.
	const F32 time_limit = 30;

	ShaderProfileHelper initProfile;
	
	std::vector<LLRenderTarget> dest(count);
	TextureHolder texHolder(0, count);
	std::vector<F32> results;

	//build a random texture
	U8* pixels = new U8[res*res*4];

	for (U32 i = 0; i < res*res*4; ++i)
	{
		pixels[i] = (U8) ll_rand(255);
	}
	
	gGL.setColorMask(true, true);
	LLGLDepthTest depth(GL_FALSE);

	LLTimer alloc_timer;
	alloc_timer.start();
	for (U32 i = 0; i < count; ++i)
	{
		//allocate render targets and textures
		if (!dest[i].allocate(res, res, GL_RGBA, false, false, LLTexUnit::TT_TEXTURE, true))
		{
			LL_WARNS("Benchmark") << "Failed to allocate render target." << LL_ENDL;
			// abandon the benchmark test
			delete[] pixels;
			return -1.f;
		}
		dest[i].bindTarget();
		dest[i].clear();
		dest[i].flush();

		if (!texHolder.bind(i))
		{
			// can use a dummy value mDummyTexUnit = new LLTexUnit(-1);
			LL_WARNS("Benchmark") << "Failed to bind tex unit." << LL_ENDL;
			// abandon the benchmark test
			delete[] pixels;
			return -1.f;
		}
		LLImageGL::setManualImage(GL_TEXTURE_2D, 0, GL_RGBA, res,res,GL_RGBA, GL_UNSIGNED_BYTE, pixels);

		if (alloc_timer.getElapsedTimeF32() > time_limit)
		{
			// abandon the benchmark test
			LL_WARNS("Benchmark") << "Allocation operation took longer then 30 seconds, stopping." << LL_ENDL;
			delete[] pixels;
			return -1.f;
		}
	}

    delete [] pixels;

	//make a dummy triangle to draw with
	LLPointer<LLVertexBuffer> buff = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0, GL_STREAM_DRAW);

	if (!buff->allocateBuffer(3, 0, true))
	{
		LL_WARNS("Benchmark") << "Failed to allocate buffer during benchmark." << LL_ENDL;
		// abandon the benchmark test
		return -1.f;
	}

	LLStrider<LLVector3> v;
	LLStrider<LLVector2> tc;

	if (! buff->getVertexStrider(v))
	{
		LL_WARNS("Benchmark") << "GL LLVertexBuffer::getVertexStrider() returned false, "
				   << "buff->getMappedData() is"
				   << (buff->getMappedData()? " not" : "")
				   << " NULL" << LL_ENDL;
		// abandon the benchmark test
		return -1.f;
	}

	// generate dummy triangle
	v[0].set(-1, 1, 0);
	v[1].set(-1, -3, 0);
	v[2].set(3, 1, 0);

	buff->flush();

	// ensure matched pair of bind() and unbind() calls
	ShaderBinder binder(gBenchmarkProgram);

	buff->setBuffer(LLVertexBuffer::MAP_VERTEX);
	glFinish();

	F32 time_passed = 0; // seconds
	for (S32 c = -1; c < samples && time_passed < time_limit; ++c)
	{
		LLTimer timer;
		timer.start();

		for (U32 i = 0; i < count; ++i)
		{
			dest[i].bindTarget();
			texHolder.bind(i);
			buff->drawArrays(LLRender::TRIANGLES, 0, 3);
			dest[i].flush();
		}

		//wait for current batch of copies to finish
		glFinish();

		F32 time = timer.getElapsedTimeF32();
		time_passed += time;

		if (c >= 0) // <-- ignore the first sample as it tends to be artificially slow
		{ 
			//store result in gigabytes per second
			F32 gb = (F32) ((F64) (res*res*8*count))/(1000000000);
			F32 gbps = gb/time;
			results.push_back(gbps);
		}
	}

	std::sort(results.begin(), results.end());

	F32 gbps = results[results.size()/2];

	LL_INFOS("Benchmark") << "Memory bandwidth is " << llformat("%.3f", gbps) << "GB/sec according to CPU timers, " << (F32)results.size() << " tests took " << time_passed << " seconds" << LL_ENDL;
  
#if LL_DARWIN
    if (gbps > 512.f)
    { 
        LL_WARNS("Benchmark") << "Memory bandwidth is improbably high and likely incorrect; discarding result." << LL_ENDL;
        //OSX is probably lying, discard result
        return -1.f;
    }
#endif

	F32 ms = gBenchmarkProgram.mTimeElapsed/1000000.f;
	F32 seconds = ms/1000.f;

	F64 samples_drawn = res*res*count*results.size();
	F32 samples_sec = (samples_drawn/1000000000.0)/seconds;
	gbps = samples_sec*8;

	LL_INFOS("Benchmark") << "Memory bandwidth is " << llformat("%.3f", gbps) << "GB/sec according to ARB_timer_query, total time " << seconds << " seconds" << LL_ENDL;

	return gbps;
}