GLProfiler: GPU time

This commit is contained in:
yvt 2017-01-08 05:25:49 +09:00
parent 844bf5e587
commit 0eecce02a8
8 changed files with 218 additions and 27 deletions

View File

@ -152,7 +152,6 @@ namespace spades {
dev->ActiveTexture(0);
for (int i = 0; i < levels; i++) {
GLProfiler::Context measure(renderer->GetGLProfiler(), "Apply [%d / %d]", i + 1, levels);
GLColorBuffer output = input.GetManager()->CreateBufferHandle();
programShutterTimeScale.SetValue(shutterTimeScale);
dev->BindTexture(IGLDevice::Texture2D, buf.GetTexture());

View File

@ -60,7 +60,7 @@ namespace spades {
};
struct GLProfiler::Phase {
std::string name;
const std::string name;
std::string description;
// can't use vector here; a reference to a vector's element can be invalidated
@ -70,11 +70,14 @@ namespace spades {
std::unordered_map<std::string, std::reference_wrapper<Phase>> subphaseMap;
double startWallClockTime;
double startCPUTime;
stmp::optional<std::pair<std::size_t, std::size_t>> queryObjectIndices;
Measurement measurementLatest;
bool measured = false;
stmp::optional<Measurement> measurementSaved;
Phase(const std::string &name) : name{name} {}
};
GLProfiler::GLProfiler(GLRenderer &renderer)
@ -83,14 +86,17 @@ namespace spades {
m_device{*renderer.GetGLDevice()},
m_active{false},
m_lastSaveTime{0.0},
m_shouldSaveThisFrame{false} {
m_shouldSaveThisFrame{false},
m_waitingTimerQueryResult{false} {
SPADES_MARK_FUNCTION();
m_root.reset(new Phase());
m_root->name = "Frame";
}
GLProfiler::~GLProfiler() {}
GLProfiler::~GLProfiler() {
SPADES_MARK_FUNCTION();
for (IGLDevice::UInteger timerQueryObject: m_timerQueryObjects) {
m_device.DeleteQuery(timerQueryObject);
}
}
void GLProfiler::BeginFrame() {
SPADES_MARK_FUNCTION();
@ -98,9 +104,19 @@ namespace spades {
if (!m_settings.r_debugTiming) {
// Clear history
m_root.reset();
m_waitingTimerQueryResult = false;
return;
}
if (m_waitingTimerQueryResult) {
FinalizeMeasurement();
// Still waiting?
if (m_waitingTimerQueryResult) {
return;
}
}
SPAssert(m_stack.empty());
SPAssert(!m_active);
@ -117,9 +133,31 @@ namespace spades {
ResetTimes();
if (m_settings.r_debugTimingGPUTime) {
m_currentTimerQueryObjectIndex = 0;
if (m_timerQueryObjects.empty()) {
m_timerQueryObjects.push_back(m_device.GenQuery());
}
m_device.BeginQuery(IGLDevice::TimeElapsed, m_timerQueryObjects[0]);
}
if (m_root) {
struct Traverser {
GLProfiler &self;
Traverser(GLProfiler &self) : self{self} {}
void Traverse(Phase &phase) {
phase.measured = false;
phase.queryObjectIndices.reset();
for (Phase &subphase : phase.subphases) {
Traverse(subphase);
}
}
};
Traverser{*this}.Traverse(*m_root);
}
if (!m_root) {
m_root.reset(new Phase());
m_root->name = "Frame";
m_root.reset(new Phase{"Frame"});
m_root->description = "Frame";
}
@ -143,15 +181,93 @@ namespace spades {
EndPhaseInner();
SPAssert(m_stack.empty());
if (m_settings.r_debugTimingOutputLog) {
LogResult(root);
m_active = false;
FinalizeMeasurement();
}
void GLProfiler::FinalizeMeasurement() {
SPADES_MARK_FUNCTION();
Phase &root = *m_root;
// Collect GPU time information
if (m_settings.r_debugTimingGPUTime) {
if (!m_waitingTimerQueryResult) {
m_device.EndQuery(IGLDevice::TimeElapsed);
}
m_waitingTimerQueryResult = false;
// are results available?
for (std::size_t i = 0; i <= m_currentTimerQueryObjectIndex; ++i) {
if (!m_device.GetQueryObjectUInteger(m_timerQueryObjects[i], IGLDevice::QueryResultAvailable)) {
m_waitingTimerQueryResult = true;
return;
}
}
double t = 0;
m_timerQueryTimes.resize(m_currentTimerQueryObjectIndex + 2);
m_timerQueryTimes[0] = 0.0;
for (std::size_t i = 0; i <= m_currentTimerQueryObjectIndex; ++i) {
auto nanoseconds = m_device.GetQueryObjectUInteger64(m_timerQueryObjects[i], IGLDevice::QueryResult);
t += static_cast<double>(nanoseconds) / 1.0e+9;
m_timerQueryTimes[i + 1] = t;
}
struct Traverser {
GLProfiler &self;
Traverser(GLProfiler &self) : self{self} {}
void Traverse(Phase &phase) {
if (phase.queryObjectIndices) {
auto indices = *phase.queryObjectIndices;
double time1 = self.m_timerQueryTimes[indices.first];
double time2 = self.m_timerQueryTimes[indices.second];
phase.measurementLatest.totalGPUTime += time2 - time1;
}
for (Phase &subphase : phase.subphases) {
Traverse(subphase);
}
}
};
Traverser{*this}.Traverse(root);
} else {
m_waitingTimerQueryResult = false;
}
if (m_shouldSaveThisFrame) {
m_lastSaveTime = m_stopwatch.GetTime();
}
struct Traverser {
GLProfiler &self;
Traverser(GLProfiler &self) : self{self} {}
void Traverse(Phase &phase) {
phase.measurementSaved = phase.measurementLatest;
phase.measurementLatest = Measurement{};
for (Phase &subphase : phase.subphases) {
Traverse(subphase);
}
}
};
Traverser{*this}.Traverse(root);
m_active = false;
m_lastSaveTime = m_stopwatch.GetTime();
// Output the result to the system log
if (m_settings.r_debugTimingOutputLog) {
LogResult(root);
}
}
}
void GLProfiler::NewTimerQuery() {
SPADES_MARK_FUNCTION_DEBUG();
m_device.EndQuery(IGLDevice::TimeElapsed);
++m_currentTimerQueryObjectIndex;
if (m_currentTimerQueryObjectIndex >= m_timerQueryObjects.size()) {
m_timerQueryObjects.push_back(m_device.GenQuery());
}
m_device.BeginQuery(IGLDevice::TimeElapsed, m_timerQueryObjects[m_currentTimerQueryObjectIndex]);
}
void GLProfiler::BeginPhase(const std::string &name, const std::string &description) {
@ -166,26 +282,35 @@ namespace spades {
if (it == current.subphaseMap.end()) {
// Create a new subphase
current.subphases.emplace_back();
auto subphasesIt = current.subphases.emplace(current.subphases.end());
auto subphasesIt = current.subphases.emplace(current.subphases.end(), name);
auto insertResult = current.subphaseMap.emplace(name, std::ref(*subphasesIt));
SPAssert(insertResult.second);
it = insertResult.first;
it->second.get().name = name;
}
Phase &next = it->second;
m_stack.emplace_back(next);
if (next.measured) {
SPRaise("Cannot measure the timing of phase '%s' twice", name.c_str());
}
next.measured = true;
next.description = description;
BeginPhaseInner(next);
}
void GLProfiler::BeginPhaseInner(Phase &phase) {
SPADES_MARK_FUNCTION_DEBUG();
phase.startWallClockTime = GetWallClockTime();
if (m_settings.r_debugTimingGPUTime) {
NewTimerQuery();
phase.queryObjectIndices = std::pair<std::size_t, std::size_t>{};
(*phase.queryObjectIndices).first = m_currentTimerQueryObjectIndex;
}
}
void GLProfiler::EndPhase() {
@ -209,9 +334,10 @@ namespace spades {
current.measurementLatest.totalWallClockTime += wallClockTime - current.startWallClockTime;
current.measurementLatest.totalNumFrames += 1;
if (m_shouldSaveThisFrame) {
current.measurementSaved = current.measurementLatest;
current.measurementLatest = Measurement{};
if (m_settings.r_debugTimingGPUTime) {
SPAssert(current.queryObjectIndices);
NewTimerQuery();
(*current.queryObjectIndices).second = m_currentTimerQueryObjectIndex;
}
m_stack.pop_back();
@ -227,7 +353,12 @@ namespace spades {
SPLog("---- Start of GLProfiler Result ----");
SPLog("(%d sampled frame(s). Showing the per-frame timing)",
(*root.measurementSaved).totalNumFrames);
SPLog("(wall clock time)");
if (m_settings.r_debugTimingGPUTime) {
SPLog("(GPU time / wall clock time)");
} else {
SPLog("(wall clock time)");
}
struct Traverser {
GLProfiler &self;
@ -246,8 +377,14 @@ namespace spades {
for (int i = 0; i < indent; i++)
buf[i] = ' ';
buf[511] = 0;
snprintf(buf + indent, 511 - indent, "%s - %.3fms", phase.description.c_str(),
result.totalWallClockTime * 1000. * factor);
if (self.m_settings.r_debugTimingGPUTime) {
snprintf(buf + indent, 511 - indent, "%s - %.3fms / %.3fms", phase.description.c_str(),
result.totalGPUTime * 1000. * factor,
result.totalWallClockTime * 1000. * factor);
} else {
snprintf(buf + indent, 511 - indent, "%s - %.3fms", phase.description.c_str(),
result.totalWallClockTime * 1000. * factor);
}
SPLog("%s", buf);
for (Phase &subphase : phase.subphases) {

View File

@ -26,12 +26,12 @@
#include <vector>
#include <Core/Stopwatch.h>
#include "IGLDevice.h"
namespace spades {
namespace draw {
class GLRenderer;
class GLSettings;
class IGLDevice;
class GLProfiler {
struct Phase;
@ -43,12 +43,17 @@ namespace spades {
bool m_active;
double m_lastSaveTime;
bool m_shouldSaveThisFrame;
bool m_waitingTimerQueryResult;
Stopwatch m_stopwatch;
std::unique_ptr<Phase> m_root;
std::vector<std::reference_wrapper<Phase>> m_stack;
std::vector<IGLDevice::UInteger> m_timerQueryObjects;
std::vector<double> m_timerQueryTimes;
std::size_t m_currentTimerQueryObjectIndex;
Phase &GetCurrentPhase() { return m_stack.back(); }
void BeginPhase(const std::string &name, const std::string &description);
@ -58,9 +63,13 @@ namespace spades {
void EndPhaseInner();
void NewTimerQuery();
void LogResult(Phase &root);
void DrawResult(Phase &root);
void FinalizeMeasurement();
public:
GLProfiler(GLRenderer &);
~GLProfiler();

View File

@ -28,6 +28,7 @@ DEFINE_SPADES_SETTING(r_debugTiming, "0");
DEFINE_SPADES_SETTING(r_debugTimingOutputScreen, "1");
DEFINE_SPADES_SETTING(r_debugTimingOutputLog, "0");
DEFINE_SPADES_SETTING(r_debugTimingAverage, "1");
DEFINE_SPADES_SETTING(r_debugTimingGPUTime, "1");
DEFINE_SPADES_SETTING(r_depthOfField, "0");
DEFINE_SPADES_SETTING(r_depthOfFieldMaxCoc, "0.01");
DEFINE_SPADES_SETTING(r_depthPrepass, "1");

View File

@ -35,6 +35,7 @@ namespace spades {
TypedItemHandle<bool> r_debugTimingOutputScreen { *this, "r_debugTimingOutputScreen" };
TypedItemHandle<bool> r_debugTimingOutputLog { *this, "r_debugTimingOutputLog" };
TypedItemHandle<bool> r_debugTimingAverage { *this, "r_debugTimingAverage" };
TypedItemHandle<bool> r_debugTimingGPUTime { *this, "r_debugTimingGPUTime" };
TypedItemHandle<int> r_depthOfField { *this, "r_depthOfField" };
TypedItemHandle<float> r_depthOfFieldMaxCoc { *this, "r_depthOfFieldMaxCoc" };
TypedItemHandle<bool> r_depthPrepass { *this, "r_depthPrepass" };

View File

@ -20,6 +20,8 @@
#pragma once
#include <cstdlib> // for integer types
#include <Core/Math.h>
#include <Core/RefCountedObject.h>
@ -243,6 +245,9 @@ namespace spades {
FramebufferIncompleteMultisample,
FramebufferIncompleteLayerTargets,
// EXT_timer_query
TimeElapsed,
ColorBufferBit = 1,
DepthBufferBit = 2,
StencilBufferBit = 4
@ -252,6 +257,7 @@ namespace spades {
typedef int Integer;
typedef float Float;
typedef unsigned int Sizei;
typedef std::uint64_t UInteger64;
virtual void DepthRange(Float near, Float far) = 0;
virtual void Viewport(Integer x, Integer y, Sizei width, Sizei height) = 0;
@ -294,6 +300,7 @@ namespace spades {
virtual void BeginQuery(Enum target, UInteger query) = 0;
virtual void EndQuery(Enum target) = 0;
virtual UInteger GetQueryObjectUInteger(UInteger query, Enum pname) = 0;
virtual UInteger64 GetQueryObjectUInteger64(UInteger query, Enum pname) = 0;
virtual void BeginConditionalRender(UInteger query, Enum mode) = 0;
virtual void EndConditionalRender() = 0;

View File

@ -602,6 +602,7 @@ namespace spades {
switch (target) {
case IGLDevice::SamplesPassed: return GL_SAMPLES_PASSED;
case IGLDevice::AnySamplesPassed: return GL_ANY_SAMPLES_PASSED;
case IGLDevice::TimeElapsed: return GL_TIME_ELAPSED;
default: SPInvalidEnum("target", target);
}
}
@ -682,6 +683,41 @@ namespace spades {
return val;
}
IGLDevice::UInteger64 SDLGLDevice::GetQueryObjectUInteger64(UInteger query, Enum pname) {
GLuint64 val;
SPADES_MARK_FUNCTION_DEBUG();
#if GLEW
if (glGetQueryObjectui64v) {
switch (pname) {
case draw::IGLDevice::QueryResult:
glGetQueryObjectui64v(query, GL_QUERY_RESULT, &val);
break;
default: SPInvalidEnum("pname", pname);
}
} else if (glGetQueryObjectui64vEXT) {
switch (pname) {
case draw::IGLDevice::QueryResult:
glGetQueryObjectui64vEXT(query, GL_QUERY_RESULT, &val);
break;
default: SPInvalidEnum("pname", pname);
}
} else {
ReportMissingFunc("glGetQueryObjectui64v");
}
#else
CheckExistence(glGetQueryObjectui64v);
switch (pname) {
case draw::IGLDevice::QueryResult:
glGetQueryObjectui64v(query, GL_QUERY_RESULT, &val);
break;
default: SPInvalidEnum("pname", pname);
}
#endif
CheckError();
return val;
}
void SDLGLDevice::BeginConditionalRender(UInteger query, Enum mode) {
SPADES_MARK_FUNCTION_DEBUG();
GLenum md;

View File

@ -82,6 +82,7 @@ namespace spades {
void BeginQuery(Enum target, UInteger query) override;
void EndQuery(Enum target) override;
UInteger GetQueryObjectUInteger(UInteger query, Enum pname) override;
UInteger64 GetQueryObjectUInteger64(UInteger query, Enum pname) override;
void BeginConditionalRender(UInteger query, Enum) override;
void EndConditionalRender() override;