From eb97679eb0b910954e31cf93ac9f7599c8205f9a Mon Sep 17 00:00:00 2001 From: Manuel Moos Date: Sat, 10 Feb 2018 18:23:12 +0100 Subject: [PATCH] Collecting frame end time with glQueryCounter. Collection is triggered in rasterizer_gles3.cpp in RasterizerGLES3::end_frame(). The new virtual function Rasterizer::sync_end_frame() can be used to later fetch the result. The virtual function VisualServer::sync() and its implementations have been adapted to pass the result out to the main loop. Side effect: Rasterizer::sync_end_frame() and therefore VisualServer::sync() will now actively wait for the next-to-last frame to finish rendering. The last frame can still be fully pending in the GPU/Driver pipleline, so this should have no impact on throughput in normal situations. It does increase the risk of frame drops if the CPU has unusually much to do for a frame or two and additional bufferd frames could compensate, so the precise frame to wait for (or whether there should be any waiting) should be made configurable later. --- drivers/gles3/rasterizer_gles3.cpp | 65 +++++++++++++++++++++++- drivers/gles3/rasterizer_gles3.h | 1 + servers/visual/rasterizer.h | 1 + servers/visual/visual_server_raster.cpp | 3 +- servers/visual/visual_server_raster.h | 2 +- servers/visual/visual_server_wrap_mt.cpp | 11 +++- servers/visual/visual_server_wrap_mt.h | 4 +- servers/visual_server.h | 2 +- 8 files changed, 83 insertions(+), 6 deletions(-) diff --git a/drivers/gles3/rasterizer_gles3.cpp b/drivers/gles3/rasterizer_gles3.cpp index b43deab58f61..5f5b9a54a8ef 100644 --- a/drivers/gles3/rasterizer_gles3.cpp +++ b/drivers/gles3/rasterizer_gles3.cpp @@ -33,6 +33,7 @@ #include "gl_context/context_gl.h" #include "os/os.h" #include "project_settings.h" +#include "vector.h" #include RasterizerStorage *RasterizerGLES3::get_storage() { @@ -363,12 +364,74 @@ void RasterizerGLES3::blit_render_target_to_screen(RID p_render_target, const Re #endif } -void RasterizerGLES3::end_frame(bool p_swap_buffers) { +struct _PendingFrame { + GLuint query_handler; + GLsync fence; + + void free() { + glDeleteSync(fence); + glDeleteQueries(1, &query_handler); + } +}; + +class _PendingFrameWaiter { + Vector<_PendingFrame> pending_frames; + int max_pending_frames; +public: + _PendingFrameWaiter() : max_pending_frames(4){} + ~_PendingFrameWaiter() { + for (int i = pending_frames.size() - 1; i >= 0; --i) { + pending_frames[i].free(); + } + } + void push() { + if(pending_frames.size() > max_pending_frames + 4) + return;// overflow protection: if pop is not called often enough, do not fill up buffer. + + _PendingFrame frame; + glGenQueries(1, &frame.query_handler); + glQueryCounter(frame.query_handler, GL_TIMESTAMP); + frame.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + pending_frames.push_back(frame); + } + GLint64 pop(int p_max_pending_frames) { + max_pending_frames = p_max_pending_frames > 0 ? p_max_pending_frames : 0; + if (pending_frames.empty()) + return -1; + + GLuint64 time = -1; + while (pending_frames.size() > p_max_pending_frames) { + _PendingFrame &frame = pending_frames[0]; + + glClientWaitSync(frame.fence, 0, 1000 * 1000 * 20); + + GLint available = 0; + glGetQueryObjectiv(frame.query_handler, GL_QUERY_RESULT_AVAILABLE, &available); + if (!available) + break; // timer not ready, try again next frame (should never happen, we already synced the fence) + glGetQueryObjectui64v(frame.query_handler, GL_QUERY_RESULT, &time); + frame.free(); + pending_frames.remove(0); + } + + return time; + } +}; + +static _PendingFrameWaiter _pending_frame_waiter; + +void RasterizerGLES3::end_frame(bool p_swap_buffers) { if (p_swap_buffers) OS::get_singleton()->swap_buffers(); else glFinish(); + + _pending_frame_waiter.push(); +} + +int64_t RasterizerGLES3::sync_end_frame(int p_max_pending_frames) { + return _pending_frame_waiter.pop(p_max_pending_frames); } void RasterizerGLES3::finalize() { diff --git a/drivers/gles3/rasterizer_gles3.h b/drivers/gles3/rasterizer_gles3.h index 5213101778c3..9c5edf41eb72 100644 --- a/drivers/gles3/rasterizer_gles3.h +++ b/drivers/gles3/rasterizer_gles3.h @@ -61,6 +61,7 @@ class RasterizerGLES3 : public Rasterizer { virtual void clear_render_target(const Color &p_color); virtual void blit_render_target_to_screen(RID p_render_target, const Rect2 &p_screen_rect, int p_screen = 0); virtual void end_frame(bool p_swap_buffers); + virtual int64_t sync_end_frame(int p_max_pending_frames); virtual void finalize(); static void make_current(); diff --git a/servers/visual/rasterizer.h b/servers/visual/rasterizer.h index f2bb853a3b3b..23fc0dc8185f 100644 --- a/servers/visual/rasterizer.h +++ b/servers/visual/rasterizer.h @@ -1066,6 +1066,7 @@ class Rasterizer { virtual void clear_render_target(const Color &p_color) = 0; virtual void blit_render_target_to_screen(RID p_render_target, const Rect2 &p_screen_rect, int p_screen = 0) = 0; virtual void end_frame(bool p_swap_buffers) = 0; + virtual int64_t sync_end_frame(int p_max_pending_frames) = 0; virtual void finalize() = 0; virtual ~Rasterizer() {} diff --git a/servers/visual/visual_server_raster.cpp b/servers/visual/visual_server_raster.cpp index 64a3502e4081..bf6784f719c6 100644 --- a/servers/visual/visual_server_raster.cpp +++ b/servers/visual/visual_server_raster.cpp @@ -124,7 +124,8 @@ void VisualServerRaster::draw(bool p_swap_buffers) { emit_signal("frame_drawn_in_thread"); } -void VisualServerRaster::sync() { +int64_t VisualServerRaster::sync() { + return VSG::rasterizer->sync_end_frame(1); } bool VisualServerRaster::has_changed() const { diff --git a/servers/visual/visual_server_raster.h b/servers/visual/visual_server_raster.h index 106b84a6ffc7..e98528d2daf4 100644 --- a/servers/visual/visual_server_raster.h +++ b/servers/visual/visual_server_raster.h @@ -649,7 +649,7 @@ class VisualServerRaster : public VisualServer { virtual void request_frame_drawn_callback(Object *p_where, const StringName &p_method, const Variant &p_userdata); virtual void draw(bool p_swap_buffers); - virtual void sync(); + virtual int64_t sync(); virtual bool has_changed() const; virtual void init(); virtual void finish(); diff --git a/servers/visual/visual_server_wrap_mt.cpp b/servers/visual/visual_server_wrap_mt.cpp index 094e2794ed31..e3da4f6daaa1 100644 --- a/servers/visual/visual_server_wrap_mt.cpp +++ b/servers/visual/visual_server_wrap_mt.cpp @@ -50,6 +50,11 @@ void VisualServerWrapMT::thread_flush() { atomic_decrement(&draw_pending); } +void VisualServerWrapMT::thread_sync() { + // the only write acces to gpu_frame_timer, must be called in push_and_sync context + gpu_frame_timer = visual_server->sync(); +} + void VisualServerWrapMT::_thread_callback(void *_instance) { VisualServerWrapMT *vsmt = reinterpret_cast(_instance); @@ -79,15 +84,18 @@ void VisualServerWrapMT::thread_loop() { /* EVENT QUEUING */ -void VisualServerWrapMT::sync() { +int64_t VisualServerWrapMT::sync() { if (create_thread) { + command_queue.push_and_sync(this, &VisualServerWrapMT::thread_sync); atomic_increment(&draw_pending); command_queue.push_and_sync(this, &VisualServerWrapMT::thread_flush); + return gpu_frame_timer; // only place this is read } else { command_queue.flush_all(); //flush all pending from other threads + return gpu_frame_timer = visual_server->sync(); } } @@ -177,6 +185,7 @@ VisualServerWrapMT::VisualServerWrapMT(VisualServer *p_contained, bool p_create_ create_thread = p_create_thread; thread = NULL; draw_pending = 0; + gpu_frame_timer = 0; draw_thread_up = false; alloc_mutex = Mutex::create(); pool_max_size = GLOBAL_GET("memory/limits/multithreaded_server/rid_pool_prealloc"); diff --git a/servers/visual/visual_server_wrap_mt.h b/servers/visual/visual_server_wrap_mt.h index c86a8164cec0..ded6c3640fee 100644 --- a/servers/visual/visual_server_wrap_mt.h +++ b/servers/visual/visual_server_wrap_mt.h @@ -55,8 +55,10 @@ class VisualServerWrapMT : public VisualServer { bool create_thread; uint64_t draw_pending; + int64_t gpu_frame_timer; void thread_draw(); void thread_flush(); + void thread_sync(); void thread_exit(); @@ -569,7 +571,7 @@ class VisualServerWrapMT : public VisualServer { virtual void init(); virtual void finish(); virtual void draw(bool p_swap_buffers); - virtual void sync(); + virtual int64_t sync(); FUNC0RC(bool, has_changed) /* RENDER INFO */ diff --git a/servers/visual_server.h b/servers/visual_server.h index 16ba135c3009..0881a865d4c1 100644 --- a/servers/visual_server.h +++ b/servers/visual_server.h @@ -931,7 +931,7 @@ class VisualServer : public Object { /* EVENT QUEUING */ virtual void draw(bool p_swap_buffers = true) = 0; - virtual void sync() = 0; + virtual int64_t sync() = 0; virtual bool has_changed() const = 0; virtual void init() = 0; virtual void finish() = 0;