From 0eecce02a8d40c368448be700f032502f0887f24 Mon Sep 17 00:00:00 2001 From: yvt Date: Sun, 8 Jan 2017 05:25:49 +0900 Subject: [PATCH] GLProfiler: GPU time --- Sources/Draw/GLCameraBlurFilter.cpp | 1 - Sources/Draw/GLProfiler.cpp | 187 ++++++++++++++++++++++++---- Sources/Draw/GLProfiler.h | 11 +- Sources/Draw/GLSettings.cpp | 1 + Sources/Draw/GLSettings.h | 1 + Sources/Draw/IGLDevice.h | 7 ++ Sources/Gui/SDLGLDevice.cpp | 36 ++++++ Sources/Gui/SDLGLDevice.h | 1 + 8 files changed, 218 insertions(+), 27 deletions(-) diff --git a/Sources/Draw/GLCameraBlurFilter.cpp b/Sources/Draw/GLCameraBlurFilter.cpp index b744f0d4..64976654 100644 --- a/Sources/Draw/GLCameraBlurFilter.cpp +++ b/Sources/Draw/GLCameraBlurFilter.cpp @@ -152,7 +152,6 @@ namespace spades { dev->ActiveTexture(0); for (int i = 0; i < levels; i++) { - GLProfiler::Context measure(renderer->GetGLProfiler(), "Apply [%d / %d]", i + 1, levels); GLColorBuffer output = input.GetManager()->CreateBufferHandle(); programShutterTimeScale.SetValue(shutterTimeScale); dev->BindTexture(IGLDevice::Texture2D, buf.GetTexture()); diff --git a/Sources/Draw/GLProfiler.cpp b/Sources/Draw/GLProfiler.cpp index b626781c..3180da58 100644 --- a/Sources/Draw/GLProfiler.cpp +++ b/Sources/Draw/GLProfiler.cpp @@ -60,7 +60,7 @@ namespace spades { }; struct GLProfiler::Phase { - std::string name; + const std::string name; std::string description; // can't use vector here; a reference to a vector's element can be invalidated @@ -70,11 +70,14 @@ namespace spades { std::unordered_map> subphaseMap; double startWallClockTime; - double startCPUTime; + stmp::optional> queryObjectIndices; Measurement measurementLatest; + bool measured = false; stmp::optional measurementSaved; + + Phase(const std::string &name) : name{name} {} }; GLProfiler::GLProfiler(GLRenderer &renderer) @@ -83,14 +86,17 @@ namespace spades { m_device{*renderer.GetGLDevice()}, m_active{false}, m_lastSaveTime{0.0}, - m_shouldSaveThisFrame{false} { + m_shouldSaveThisFrame{false}, + m_waitingTimerQueryResult{false} { SPADES_MARK_FUNCTION(); - - m_root.reset(new Phase()); - m_root->name = "Frame"; } - GLProfiler::~GLProfiler() {} + GLProfiler::~GLProfiler() { + SPADES_MARK_FUNCTION(); + for (IGLDevice::UInteger timerQueryObject: m_timerQueryObjects) { + m_device.DeleteQuery(timerQueryObject); + } + } void GLProfiler::BeginFrame() { SPADES_MARK_FUNCTION(); @@ -98,9 +104,19 @@ namespace spades { if (!m_settings.r_debugTiming) { // Clear history m_root.reset(); + m_waitingTimerQueryResult = false; return; } + if (m_waitingTimerQueryResult) { + FinalizeMeasurement(); + + // Still waiting? + if (m_waitingTimerQueryResult) { + return; + } + } + SPAssert(m_stack.empty()); SPAssert(!m_active); @@ -117,9 +133,31 @@ namespace spades { ResetTimes(); + if (m_settings.r_debugTimingGPUTime) { + m_currentTimerQueryObjectIndex = 0; + if (m_timerQueryObjects.empty()) { + m_timerQueryObjects.push_back(m_device.GenQuery()); + } + m_device.BeginQuery(IGLDevice::TimeElapsed, m_timerQueryObjects[0]); + } + + if (m_root) { + struct Traverser { + GLProfiler &self; + Traverser(GLProfiler &self) : self{self} {} + void Traverse(Phase &phase) { + phase.measured = false; + phase.queryObjectIndices.reset(); + for (Phase &subphase : phase.subphases) { + Traverse(subphase); + } + } + }; + Traverser{*this}.Traverse(*m_root); + } + if (!m_root) { - m_root.reset(new Phase()); - m_root->name = "Frame"; + m_root.reset(new Phase{"Frame"}); m_root->description = "Frame"; } @@ -143,15 +181,93 @@ namespace spades { EndPhaseInner(); SPAssert(m_stack.empty()); - if (m_settings.r_debugTimingOutputLog) { - LogResult(root); + m_active = false; + + FinalizeMeasurement(); + } + + void GLProfiler::FinalizeMeasurement() { + SPADES_MARK_FUNCTION(); + Phase &root = *m_root; + + // Collect GPU time information + if (m_settings.r_debugTimingGPUTime) { + if (!m_waitingTimerQueryResult) { + m_device.EndQuery(IGLDevice::TimeElapsed); + } + + m_waitingTimerQueryResult = false; + + // are results available? + for (std::size_t i = 0; i <= m_currentTimerQueryObjectIndex; ++i) { + if (!m_device.GetQueryObjectUInteger(m_timerQueryObjects[i], IGLDevice::QueryResultAvailable)) { + m_waitingTimerQueryResult = true; + return; + } + } + + double t = 0; + m_timerQueryTimes.resize(m_currentTimerQueryObjectIndex + 2); + m_timerQueryTimes[0] = 0.0; + for (std::size_t i = 0; i <= m_currentTimerQueryObjectIndex; ++i) { + auto nanoseconds = m_device.GetQueryObjectUInteger64(m_timerQueryObjects[i], IGLDevice::QueryResult); + t += static_cast(nanoseconds) / 1.0e+9; + m_timerQueryTimes[i + 1] = t; + } + struct Traverser { + GLProfiler &self; + Traverser(GLProfiler &self) : self{self} {} + void Traverse(Phase &phase) { + if (phase.queryObjectIndices) { + auto indices = *phase.queryObjectIndices; + double time1 = self.m_timerQueryTimes[indices.first]; + double time2 = self.m_timerQueryTimes[indices.second]; + phase.measurementLatest.totalGPUTime += time2 - time1; + } + + for (Phase &subphase : phase.subphases) { + Traverse(subphase); + } + } + }; + Traverser{*this}.Traverse(root); + } else { + m_waitingTimerQueryResult = false; } if (m_shouldSaveThisFrame) { - m_lastSaveTime = m_stopwatch.GetTime(); - } + struct Traverser { + GLProfiler &self; + Traverser(GLProfiler &self) : self{self} {} + void Traverse(Phase &phase) { + phase.measurementSaved = phase.measurementLatest; + phase.measurementLatest = Measurement{}; + for (Phase &subphase : phase.subphases) { + Traverse(subphase); + } + } + }; + Traverser{*this}.Traverse(root); - m_active = false; + m_lastSaveTime = m_stopwatch.GetTime(); + + // Output the result to the system log + if (m_settings.r_debugTimingOutputLog) { + LogResult(root); + } + } + } + + void GLProfiler::NewTimerQuery() { + SPADES_MARK_FUNCTION_DEBUG(); + + m_device.EndQuery(IGLDevice::TimeElapsed); + ++m_currentTimerQueryObjectIndex; + + if (m_currentTimerQueryObjectIndex >= m_timerQueryObjects.size()) { + m_timerQueryObjects.push_back(m_device.GenQuery()); + } + m_device.BeginQuery(IGLDevice::TimeElapsed, m_timerQueryObjects[m_currentTimerQueryObjectIndex]); } void GLProfiler::BeginPhase(const std::string &name, const std::string &description) { @@ -166,26 +282,35 @@ namespace spades { if (it == current.subphaseMap.end()) { // Create a new subphase - current.subphases.emplace_back(); - - auto subphasesIt = current.subphases.emplace(current.subphases.end()); + auto subphasesIt = current.subphases.emplace(current.subphases.end(), name); auto insertResult = current.subphaseMap.emplace(name, std::ref(*subphasesIt)); SPAssert(insertResult.second); it = insertResult.first; - - it->second.get().name = name; } Phase &next = it->second; m_stack.emplace_back(next); + if (next.measured) { + SPRaise("Cannot measure the timing of phase '%s' twice", name.c_str()); + } + + next.measured = true; next.description = description; BeginPhaseInner(next); } void GLProfiler::BeginPhaseInner(Phase &phase) { + SPADES_MARK_FUNCTION_DEBUG(); + phase.startWallClockTime = GetWallClockTime(); + + if (m_settings.r_debugTimingGPUTime) { + NewTimerQuery(); + phase.queryObjectIndices = std::pair{}; + (*phase.queryObjectIndices).first = m_currentTimerQueryObjectIndex; + } } void GLProfiler::EndPhase() { @@ -209,9 +334,10 @@ namespace spades { current.measurementLatest.totalWallClockTime += wallClockTime - current.startWallClockTime; current.measurementLatest.totalNumFrames += 1; - if (m_shouldSaveThisFrame) { - current.measurementSaved = current.measurementLatest; - current.measurementLatest = Measurement{}; + if (m_settings.r_debugTimingGPUTime) { + SPAssert(current.queryObjectIndices); + NewTimerQuery(); + (*current.queryObjectIndices).second = m_currentTimerQueryObjectIndex; } m_stack.pop_back(); @@ -227,7 +353,12 @@ namespace spades { SPLog("---- Start of GLProfiler Result ----"); SPLog("(%d sampled frame(s). Showing the per-frame timing)", (*root.measurementSaved).totalNumFrames); - SPLog("(wall clock time)"); + + if (m_settings.r_debugTimingGPUTime) { + SPLog("(GPU time / wall clock time)"); + } else { + SPLog("(wall clock time)"); + } struct Traverser { GLProfiler &self; @@ -246,8 +377,14 @@ namespace spades { for (int i = 0; i < indent; i++) buf[i] = ' '; buf[511] = 0; - snprintf(buf + indent, 511 - indent, "%s - %.3fms", phase.description.c_str(), - result.totalWallClockTime * 1000. * factor); + if (self.m_settings.r_debugTimingGPUTime) { + snprintf(buf + indent, 511 - indent, "%s - %.3fms / %.3fms", phase.description.c_str(), + result.totalGPUTime * 1000. * factor, + result.totalWallClockTime * 1000. * factor); + } else { + snprintf(buf + indent, 511 - indent, "%s - %.3fms", phase.description.c_str(), + result.totalWallClockTime * 1000. * factor); + } SPLog("%s", buf); for (Phase &subphase : phase.subphases) { diff --git a/Sources/Draw/GLProfiler.h b/Sources/Draw/GLProfiler.h index 27bcbc79..560beb43 100644 --- a/Sources/Draw/GLProfiler.h +++ b/Sources/Draw/GLProfiler.h @@ -26,12 +26,12 @@ #include #include +#include "IGLDevice.h" namespace spades { namespace draw { class GLRenderer; class GLSettings; - class IGLDevice; class GLProfiler { struct Phase; @@ -43,12 +43,17 @@ namespace spades { bool m_active; double m_lastSaveTime; bool m_shouldSaveThisFrame; + bool m_waitingTimerQueryResult; Stopwatch m_stopwatch; std::unique_ptr m_root; std::vector> m_stack; + std::vector m_timerQueryObjects; + std::vector m_timerQueryTimes; + std::size_t m_currentTimerQueryObjectIndex; + Phase &GetCurrentPhase() { return m_stack.back(); } void BeginPhase(const std::string &name, const std::string &description); @@ -58,9 +63,13 @@ namespace spades { void EndPhaseInner(); + void NewTimerQuery(); + void LogResult(Phase &root); void DrawResult(Phase &root); + void FinalizeMeasurement(); + public: GLProfiler(GLRenderer &); ~GLProfiler(); diff --git a/Sources/Draw/GLSettings.cpp b/Sources/Draw/GLSettings.cpp index 8a8347cf..5f3a6c24 100644 --- a/Sources/Draw/GLSettings.cpp +++ b/Sources/Draw/GLSettings.cpp @@ -28,6 +28,7 @@ DEFINE_SPADES_SETTING(r_debugTiming, "0"); DEFINE_SPADES_SETTING(r_debugTimingOutputScreen, "1"); DEFINE_SPADES_SETTING(r_debugTimingOutputLog, "0"); DEFINE_SPADES_SETTING(r_debugTimingAverage, "1"); +DEFINE_SPADES_SETTING(r_debugTimingGPUTime, "1"); DEFINE_SPADES_SETTING(r_depthOfField, "0"); DEFINE_SPADES_SETTING(r_depthOfFieldMaxCoc, "0.01"); DEFINE_SPADES_SETTING(r_depthPrepass, "1"); diff --git a/Sources/Draw/GLSettings.h b/Sources/Draw/GLSettings.h index a3e004fd..5751bb9b 100644 --- a/Sources/Draw/GLSettings.h +++ b/Sources/Draw/GLSettings.h @@ -35,6 +35,7 @@ namespace spades { TypedItemHandle r_debugTimingOutputScreen { *this, "r_debugTimingOutputScreen" }; TypedItemHandle r_debugTimingOutputLog { *this, "r_debugTimingOutputLog" }; TypedItemHandle r_debugTimingAverage { *this, "r_debugTimingAverage" }; + TypedItemHandle r_debugTimingGPUTime { *this, "r_debugTimingGPUTime" }; TypedItemHandle r_depthOfField { *this, "r_depthOfField" }; TypedItemHandle r_depthOfFieldMaxCoc { *this, "r_depthOfFieldMaxCoc" }; TypedItemHandle r_depthPrepass { *this, "r_depthPrepass" }; diff --git a/Sources/Draw/IGLDevice.h b/Sources/Draw/IGLDevice.h index a8ddfa0d..3044f143 100644 --- a/Sources/Draw/IGLDevice.h +++ b/Sources/Draw/IGLDevice.h @@ -20,6 +20,8 @@ #pragma once +#include // for integer types + #include #include @@ -243,6 +245,9 @@ namespace spades { FramebufferIncompleteMultisample, FramebufferIncompleteLayerTargets, + // EXT_timer_query + TimeElapsed, + ColorBufferBit = 1, DepthBufferBit = 2, StencilBufferBit = 4 @@ -252,6 +257,7 @@ namespace spades { typedef int Integer; typedef float Float; typedef unsigned int Sizei; + typedef std::uint64_t UInteger64; virtual void DepthRange(Float near, Float far) = 0; virtual void Viewport(Integer x, Integer y, Sizei width, Sizei height) = 0; @@ -294,6 +300,7 @@ namespace spades { virtual void BeginQuery(Enum target, UInteger query) = 0; virtual void EndQuery(Enum target) = 0; virtual UInteger GetQueryObjectUInteger(UInteger query, Enum pname) = 0; + virtual UInteger64 GetQueryObjectUInteger64(UInteger query, Enum pname) = 0; virtual void BeginConditionalRender(UInteger query, Enum mode) = 0; virtual void EndConditionalRender() = 0; diff --git a/Sources/Gui/SDLGLDevice.cpp b/Sources/Gui/SDLGLDevice.cpp index 4a6780e6..2acc890a 100644 --- a/Sources/Gui/SDLGLDevice.cpp +++ b/Sources/Gui/SDLGLDevice.cpp @@ -602,6 +602,7 @@ namespace spades { switch (target) { case IGLDevice::SamplesPassed: return GL_SAMPLES_PASSED; case IGLDevice::AnySamplesPassed: return GL_ANY_SAMPLES_PASSED; + case IGLDevice::TimeElapsed: return GL_TIME_ELAPSED; default: SPInvalidEnum("target", target); } } @@ -682,6 +683,41 @@ namespace spades { return val; } + IGLDevice::UInteger64 SDLGLDevice::GetQueryObjectUInteger64(UInteger query, Enum pname) { + GLuint64 val; + SPADES_MARK_FUNCTION_DEBUG(); + +#if GLEW + if (glGetQueryObjectui64v) { + switch (pname) { + case draw::IGLDevice::QueryResult: + glGetQueryObjectui64v(query, GL_QUERY_RESULT, &val); + break; + default: SPInvalidEnum("pname", pname); + } + } else if (glGetQueryObjectui64vEXT) { + switch (pname) { + case draw::IGLDevice::QueryResult: + glGetQueryObjectui64vEXT(query, GL_QUERY_RESULT, &val); + break; + default: SPInvalidEnum("pname", pname); + } + } else { + ReportMissingFunc("glGetQueryObjectui64v"); + } +#else + CheckExistence(glGetQueryObjectui64v); + switch (pname) { + case draw::IGLDevice::QueryResult: + glGetQueryObjectui64v(query, GL_QUERY_RESULT, &val); + break; + default: SPInvalidEnum("pname", pname); + } +#endif + CheckError(); + return val; + } + void SDLGLDevice::BeginConditionalRender(UInteger query, Enum mode) { SPADES_MARK_FUNCTION_DEBUG(); GLenum md; diff --git a/Sources/Gui/SDLGLDevice.h b/Sources/Gui/SDLGLDevice.h index 90e26f2d..4c0723af 100644 --- a/Sources/Gui/SDLGLDevice.h +++ b/Sources/Gui/SDLGLDevice.h @@ -82,6 +82,7 @@ namespace spades { void BeginQuery(Enum target, UInteger query) override; void EndQuery(Enum target) override; UInteger GetQueryObjectUInteger(UInteger query, Enum pname) override; + UInteger64 GetQueryObjectUInteger64(UInteger query, Enum pname) override; void BeginConditionalRender(UInteger query, Enum) override; void EndConditionalRender() override;