Skip to content

Commit

Permalink
Merge pull request #67032 from clayjohn/GLES3-gpu-profiling
Browse files Browse the repository at this point in the history
Add OpenGL timer queries to OpenGL3 backend
  • Loading branch information
akien-mga committed Oct 31, 2022
2 parents 40258bb + aca964d commit 5a00568
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 139 deletions.
10 changes: 8 additions & 2 deletions drivers/gles3/rasterizer_gles3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,7 @@ void RasterizerGLES3::begin_frame(double frame_step) {
scene->set_time(time_total, frame_step);

GLES3::Utilities *utils = GLES3::Utilities::get_singleton();
utils->info.render_final = utils->info.render;
utils->info.render.reset();
utils->_capture_timestamps_begin();

//scene->iteration();
}
Expand Down Expand Up @@ -272,6 +271,13 @@ RasterizerGLES3::~RasterizerGLES3() {
}

void RasterizerGLES3::prepare_for_blitting_render_targets() {
// This is a hack, but this function is called one time after all viewports have been updated.
// So it marks the end of the frame for all viewports
// In the OpenGL renderer we have to call end_frame for each viewport so we can swap the
// buffers for each window before proceeding to the next.
// This allows us to only increment the frame after all viewports are done.
GLES3::Utilities *utils = GLES3::Utilities::get_singleton();
utils->capture_timestamps_end();
}

void RasterizerGLES3::_blit_render_target_to_screen(RID p_render_target, DisplayServer::WindowID p_screen, const Rect2 &p_screen_rect, uint32_t p_layer) {
Expand Down
163 changes: 82 additions & 81 deletions drivers/gles3/storage/utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,35 @@
#include "particles_storage.h"
#include "texture_storage.h"

#include "servers/rendering/rendering_server_globals.h"

using namespace GLES3;

Utilities *Utilities::singleton = nullptr;

Utilities::Utilities() {
singleton = this;
frame = 0;
for (int i = 0; i < FRAME_COUNT; i++) {
frames[i].index = 0;
glGenQueries(max_timestamp_query_elements, frames[i].queries);

frames[i].timestamp_names.resize(max_timestamp_query_elements);
frames[i].timestamp_cpu_values.resize(max_timestamp_query_elements);
frames[i].timestamp_count = 0;

frames[i].timestamp_result_names.resize(max_timestamp_query_elements);
frames[i].timestamp_cpu_result_values.resize(max_timestamp_query_elements);
frames[i].timestamp_result_values.resize(max_timestamp_query_elements);
frames[i].timestamp_result_count = 0;
}
}

Utilities::~Utilities() {
singleton = nullptr;
for (int i = 0; i < FRAME_COUNT; i++) {
glDeleteQueries(max_timestamp_query_elements, frames[i].queries);
}
}

Vector<uint8_t> Utilities::buffer_get_data(GLenum p_target, GLuint p_buffer, uint32_t p_buffer_size) {
Expand Down Expand Up @@ -213,87 +232,69 @@ void Utilities::visibility_notifier_call(RID p_notifier, bool p_enter, bool p_de

/* TIMING */

//void Utilities::render_info_begin_capture() {
// info.snap = info.render;
//}

//void Utilities::render_info_end_capture() {
// info.snap.object_count = info.render.object_count - info.snap.object_count;
// info.snap.draw_call_count = info.render.draw_call_count - info.snap.draw_call_count;
// info.snap.material_switch_count = info.render.material_switch_count - info.snap.material_switch_count;
// info.snap.surface_switch_count = info.render.surface_switch_count - info.snap.surface_switch_count;
// info.snap.shader_rebind_count = info.render.shader_rebind_count - info.snap.shader_rebind_count;
// info.snap.vertices_count = info.render.vertices_count - info.snap.vertices_count;
// info.snap._2d_item_count = info.render._2d_item_count - info.snap._2d_item_count;
// info.snap._2d_draw_call_count = info.render._2d_draw_call_count - info.snap._2d_draw_call_count;
//}

//int Utilities::get_captured_render_info(RS::RenderInfo p_info) {
// switch (p_info) {
// case RS::INFO_OBJECTS_IN_FRAME: {
// return info.snap.object_count;
// } break;
// case RS::INFO_VERTICES_IN_FRAME: {
// return info.snap.vertices_count;
// } break;
// case RS::INFO_MATERIAL_CHANGES_IN_FRAME: {
// return info.snap.material_switch_count;
// } break;
// case RS::INFO_SHADER_CHANGES_IN_FRAME: {
// return info.snap.shader_rebind_count;
// } break;
// case RS::INFO_SURFACE_CHANGES_IN_FRAME: {
// return info.snap.surface_switch_count;
// } break;
// case RS::INFO_DRAW_CALLS_IN_FRAME: {
// return info.snap.draw_call_count;
// } break;
// /*
// case RS::INFO_2D_ITEMS_IN_FRAME: {
// return info.snap._2d_item_count;
// } break;
// case RS::INFO_2D_DRAW_CALLS_IN_FRAME: {
// return info.snap._2d_draw_call_count;
// } break;
// */
// default: {
// return get_render_info(p_info);
// }
// }
//}

//int Utilities::get_render_info(RS::RenderInfo p_info) {
// switch (p_info) {
// case RS::INFO_OBJECTS_IN_FRAME:
// return info.render_final.object_count;
// case RS::INFO_VERTICES_IN_FRAME:
// return info.render_final.vertices_count;
// case RS::INFO_MATERIAL_CHANGES_IN_FRAME:
// return info.render_final.material_switch_count;
// case RS::INFO_SHADER_CHANGES_IN_FRAME:
// return info.render_final.shader_rebind_count;
// case RS::INFO_SURFACE_CHANGES_IN_FRAME:
// return info.render_final.surface_switch_count;
// case RS::INFO_DRAW_CALLS_IN_FRAME:
// return info.render_final.draw_call_count;
// /*
// case RS::INFO_2D_ITEMS_IN_FRAME:
// return info.render_final._2d_item_count;
// case RS::INFO_2D_DRAW_CALLS_IN_FRAME:
// return info.render_final._2d_draw_call_count;
//*/
// case RS::INFO_USAGE_VIDEO_MEM_TOTAL:
// return 0; //no idea
// case RS::INFO_VIDEO_MEM_USED:
// return info.vertex_mem + info.texture_mem;
// case RS::INFO_TEXTURE_MEM_USED:
// return info.texture_mem;
// case RS::INFO_VERTEX_MEM_USED:
// return info.vertex_mem;
// default:
// return 0; //no idea either
// }
//}
void Utilities::capture_timestamps_begin() {
capture_timestamp("Frame Begin");
}

void Utilities::capture_timestamp(const String &p_name) {
ERR_FAIL_COND(frames[frame].timestamp_count >= max_timestamp_query_elements);

#ifdef GLES_OVER_GL
glQueryCounter(frames[frame].queries[frames[frame].timestamp_count], GL_TIMESTAMP);
#endif

frames[frame].timestamp_names[frames[frame].timestamp_count] = p_name;
frames[frame].timestamp_cpu_values[frames[frame].timestamp_count] = OS::get_singleton()->get_ticks_usec();
frames[frame].timestamp_count++;
}

void Utilities::_capture_timestamps_begin() {
// frame is incremented at the end of the frame so this gives us the queries for frame - 2. By then they should be ready.
if (frames[frame].timestamp_count) {
#ifdef GLES_OVER_GL
for (uint32_t i = 0; i < frames[frame].timestamp_count; i++) {
uint64_t temp = 0;
glGetQueryObjectui64v(frames[frame].queries[i], GL_QUERY_RESULT, &temp);
frames[frame].timestamp_result_values[i] = temp;
}
#endif
SWAP(frames[frame].timestamp_names, frames[frame].timestamp_result_names);
SWAP(frames[frame].timestamp_cpu_values, frames[frame].timestamp_cpu_result_values);
}

frames[frame].timestamp_result_count = frames[frame].timestamp_count;
frames[frame].timestamp_count = 0;
frames[frame].index = Engine::get_singleton()->get_frames_drawn();
capture_timestamp("Internal Begin");
}

void Utilities::capture_timestamps_end() {
capture_timestamp("Internal End");
frame = (frame + 1) % FRAME_COUNT;
}

uint32_t Utilities::get_captured_timestamps_count() const {
return frames[frame].timestamp_result_count;
}

uint64_t Utilities::get_captured_timestamps_frame() const {
return frames[frame].index;
}

uint64_t Utilities::get_captured_timestamp_gpu_time(uint32_t p_index) const {
ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, 0);
return frames[frame].timestamp_result_values[p_index];
}

uint64_t Utilities::get_captured_timestamp_cpu_time(uint32_t p_index) const {
ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, 0);
return frames[frame].timestamp_cpu_result_values[p_index];
}

String Utilities::get_captured_timestamp_name(uint32_t p_index) const {
ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, String());
return frames[frame].timestamp_result_names[p_index];
}

/* MISC */

Expand Down
85 changes: 29 additions & 56 deletions drivers/gles3/storage/utilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,62 +79,35 @@ class Utilities : public RendererUtilities {

/* TIMING */

struct Info {
uint64_t texture_mem = 0;
uint64_t vertex_mem = 0;

struct Render {
uint32_t object_count;
uint32_t draw_call_count;
uint32_t material_switch_count;
uint32_t surface_switch_count;
uint32_t shader_rebind_count;
uint32_t vertices_count;
uint32_t _2d_item_count;
uint32_t _2d_draw_call_count;

void reset() {
object_count = 0;
draw_call_count = 0;
material_switch_count = 0;
surface_switch_count = 0;
shader_rebind_count = 0;
vertices_count = 0;
_2d_item_count = 0;
_2d_draw_call_count = 0;
}
} render, render_final, snap;

Info() {
render.reset();
render_final.reset();
}

} info;

virtual void capture_timestamps_begin() override {}
virtual void capture_timestamp(const String &p_name) override {}
virtual uint32_t get_captured_timestamps_count() const override {
return 0;
}
virtual uint64_t get_captured_timestamps_frame() const override {
return 0;
}
virtual uint64_t get_captured_timestamp_gpu_time(uint32_t p_index) const override {
return 0;
}
virtual uint64_t get_captured_timestamp_cpu_time(uint32_t p_index) const override {
return 0;
}
virtual String get_captured_timestamp_name(uint32_t p_index) const override {
return String();
}

// void render_info_begin_capture() override;
// void render_info_end_capture() override;
// int get_captured_render_info(RS::RenderInfo p_info) override;

// int get_render_info(RS::RenderInfo p_info) override;
#define MAX_QUERIES 256
#define FRAME_COUNT 3

struct Frame {
GLuint queries[MAX_QUERIES];
TightLocalVector<String> timestamp_names;
TightLocalVector<uint64_t> timestamp_cpu_values;
uint32_t timestamp_count = 0;
TightLocalVector<String> timestamp_result_names;
TightLocalVector<uint64_t> timestamp_cpu_result_values;
TightLocalVector<uint64_t> timestamp_result_values;
uint32_t timestamp_result_count = 0;
uint64_t index = 0;
};

const uint32_t max_timestamp_query_elements = MAX_QUERIES;

Frame frames[FRAME_COUNT]; // Frames for capturing timestamps. We use 3 so we don't need to wait for commands to complete
uint32_t frame = 0;

virtual void capture_timestamps_begin() override;
virtual void capture_timestamp(const String &p_name) override;
virtual uint32_t get_captured_timestamps_count() const override;
virtual uint64_t get_captured_timestamps_frame() const override;
virtual uint64_t get_captured_timestamp_gpu_time(uint32_t p_index) const override;
virtual uint64_t get_captured_timestamp_cpu_time(uint32_t p_index) const override;
virtual String get_captured_timestamp_name(uint32_t p_index) const override;
void _capture_timestamps_begin();
void capture_timestamps_end();

/* MISC */

Expand Down

0 comments on commit 5a00568

Please sign in to comment.