Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.x] Shader goodies: async. compilation + caching #46330

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions core/hash_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ class HashMap {
TKey key;
TData data;

Pair() {}
Pair(const TKey &p_key) :
key(p_key),
data() {}
Pair(const TKey &p_key, const TData &p_data) :
key(p_key),
data(p_data) {
Expand Down Expand Up @@ -90,6 +92,11 @@ class HashMap {
const TData &value() const {
return pair.value();
}

Element(const TKey &p_key) :
pair(p_key) {}
Element(const Element &p_other) :
pair(p_other.pair.key, p_other.pair.data) {}
};

private:
Expand Down Expand Up @@ -192,14 +199,12 @@ class HashMap {

Element *create_element(const TKey &p_key) {
/* if element doesn't exist, create it */
Element *e = memnew(Element);
Element *e = memnew(Element(p_key));
ERR_FAIL_COND_V_MSG(!e, nullptr, "Out of memory.");
uint32_t hash = Hasher::hash(p_key);
uint32_t index = hash & ((1 << hash_table_power) - 1);
e->next = hash_table[index];
e->hash = hash;
e->pair.key = p_key;
e->pair.data = TData();

hash_table[index] = e;
elements++;
Expand Down Expand Up @@ -228,9 +233,7 @@ class HashMap {
const Element *e = p_t.hash_table[i];

while (e) {
Element *le = memnew(Element); /* local element */

*le = *e; /* copy data */
Element *le = memnew(Element(*e)); /* local element */

/* add to list and reassign pointers */
le->next = hash_table[i];
Expand Down
6 changes: 6 additions & 0 deletions core/os/os.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,12 @@ const char *OS::get_video_driver_name(int p_driver) const {
}
}

bool OS::is_offscreen_gl_available() const {
return false;
}

void OS::set_offscreen_gl_current(bool p_current) {}

int OS::get_audio_driver_count() const {
return AudioDriverManager::get_driver_count();
}
Expand Down
3 changes: 3 additions & 0 deletions core/os/os.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ class OS {
virtual const char *get_video_driver_name(int p_driver) const;
virtual int get_current_video_driver() const = 0;

virtual bool is_offscreen_gl_available() const;
virtual void set_offscreen_gl_current(bool p_current);

virtual int get_audio_driver_count() const;
virtual const char *get_audio_driver_name(int p_driver) const;

Expand Down
133 changes: 133 additions & 0 deletions core/threaded_callable_queue.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*************************************************************************/
/* threaded_callable_queue.h */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/

#ifndef THREADED_CALLABLE_QUEUE_H
#define THREADED_CALLABLE_QUEUE_H

#include "core/local_vector.h"
#include "core/ordered_hash_map.h"
#include "core/os/mutex.h"
#include "core/os/semaphore.h"
#include "core/os/thread.h"

#include <functional>

template <class K>
class ThreadedCallableQueue {
public:
using Job = std::function<void()>;

private:
bool exit;
Thread thread;
BinaryMutex mutex;
Semaphore sem;
OrderedHashMap<K, Job> queue;

static void _thread_func(void *p_user_data);

public:
void enqueue(K p_key, Job p_job);
void cancel(K p_key);

ThreadedCallableQueue();
~ThreadedCallableQueue();
};

template <class K>
void ThreadedCallableQueue<K>::_thread_func(void *p_user_data) {
ThreadedCallableQueue *self = static_cast<ThreadedCallableQueue *>(p_user_data);

while (true) {
self->sem.wait();
self->mutex.lock();
if (self->exit) {
self->mutex.unlock();
break;
}

typename OrderedHashMap<K, Job>::Element E = self->queue.front();
// Defense about implementation bugs (excessive posts)
if (!E) {
ERR_PRINT("Semaphore unlocked, the queue is empty. Bug?");
self->mutex.unlock();
// --- Defense end
} else {
LocalVector<Job> jobs;
jobs.push_back(E.value());
self->queue.erase(E);
self->mutex.unlock();

for (uint32_t i = 0; i < jobs.size(); i++) {
jobs[i]();
}
}
}

self->mutex.lock();
for (typename OrderedHashMap<K, Job>::Element E = self->queue.front(); E; E = E.next()) {
Job job = E.value();
job();
}
self->mutex.unlock();
}

template <class K>
void ThreadedCallableQueue<K>::enqueue(K p_key, Job p_job) {
MutexLock lock(mutex);
ERR_FAIL_COND(exit);
ERR_FAIL_COND(queue.has(p_key));
queue.insert(p_key, p_job);
sem.post();
}

template <class K>
void ThreadedCallableQueue<K>::cancel(K p_key) {
MutexLock lock(mutex);
ERR_FAIL_COND(exit);
if (queue.erase(p_key)) {
sem.wait();
}
}

template <class K>
ThreadedCallableQueue<K>::ThreadedCallableQueue() :
exit(false) {
thread.start(&_thread_func, this);
}

template <class K>
ThreadedCallableQueue<K>::~ThreadedCallableQueue() {
exit = true;
sem.post();
thread.wait_to_finish();
}

#endif // THREADED_CALLABLE_QUEUE_H
37 changes: 37 additions & 0 deletions doc/classes/ProjectSettings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1213,6 +1213,43 @@
If [code]true[/code] and available on the target device, enables high floating point precision for all shader computations in GLES2.
[b]Warning:[/b] High floating point precision can be extremely slow on older devices and is often not available at all. Use with caution.
</member>
<member name="rendering/gles3/shaders/async_compile_enabled" type="bool" setter="" getter="" default="false">
If [code]true[/code] and available on the target device, enables asynchronous compilation of shaders.
That means that when a shader is first used under some new rendering situation, if it's configured to have a fallback, the game won't stall while it is being compiled. Instead, the fallback will be used and the real shader will be compiled in the background. Once the actual shader is compiled, it will be used the next times it's used to draw a frame.
This setting also enabled asynchronous reconstruction of shaders from cache. This means that if [code]rendering/gles3/shaders/cache_enabled[/code]) is enabled as well, the reconstruction of a cached shader will also be done in the background.
[b]Warning:[/b] Async. compilation is currently only supported for spatial shaders.
</member>
<member name="rendering/gles3/shaders/cache_enabled" type="bool" setter="" getter="" default="false">
If [code]true[/code] and available on the target device, a binary representation of shaders under the rendering situations they are used will be written to a cache directory.
This helps, at the cost of some storage space, getting shaders ready the next time they are used after having been unloaded from memory (namely, next startup of the game or next switch to a specific game scene).
</member>
<member name="rendering/gles3/shaders/debug_force_use_fallbacks" type="bool" setter="" getter="" default="false">
Only meaningful if [code]rendering/gles3/shaders/async_compile_enabled[/code] is [code]true[/code].
If [code]true[/code], for every shader that has a fallback, such fallback is used to render instead of the real shader, even it it's already compiled.
This is useful to see how well the fallbacks look in a scene, because normally they may be switching to the real ones too fast to be assessed.
</member>
<member name="rendering/gles3/shaders/force_no_render_fallback" type="bool" setter="" getter="" default="false">
Only meaningful if [code]rendering/gles3/shaders/async_compile_enabled[/code] is [code]true[/code].
If [code]true[/code], shaders of every kind for which asynchronous compilation is supported will be forced to use a 'no render' fallback.
On one hand, this is a quick way to enable async. shader compilation for a project, instead of deciding the proper fallback mode for each shader.
On the other hand, this helps informing that decision process by letting you run the project and noticing where you can't aesthetically afford to have certain shaders totally absent while they are getting ready.
</member>
<member name="rendering/gles3/shaders/max_concurrent_compiles" type="int" setter="" getter="" default="4">
Only meaningful if [code]rendering/gles3/shaders/async_compile_enabled[/code] is [code]true[/code].
This is the maximum number of shaders that can be compiled (or reconstructed from cache) at the same time.
At runtime, while that count is reached, other shaders that can be asynchronously compiled will just use their fallback, without their setup being started until the count gets lower.
This is a way to balance the CPU work between running the game and compiling the shaders. The goal is to have as many asynchronous compiles in flight as possible without impacting the responsiveness of the game, which beyond some point would destroy the benefits of asynchronous compilation. In other words, you may be able to afford that the FPS lowers a bit, and that will already be better than the stalling that synchronous compilation could cause.
The default value is meant to be a reasonable one for desktop platforms, but you are advised to tweak it according to the hardware you are targeting.
</member>
<member name="rendering/gles3/shaders/max_concurrent_compiles.mobile" type="int" setter="" getter="" default="1">
A very conservative override for [code]rendering/gles3/shaders/max_concurrent_compiles[/code] on mobile.
Depending on the specific devices you are targeting, you may want to raise it.
</member>
<member name="rendering/gles3/shaders/simple_fallback_modulate" type="Color" setter="" getter="" default="Color( 1, 1, 1, 1 )">
Only meaningful if [code]rendering/gles3/shaders/async_compile_enabled[/code] is [code]true[/code].
This is for shaders whose fallback mode is set to 'simple'. While they are being set up, the simple shader used to replace them will be modulated by these RGB values.
This is useful to tweak their appearance to the overall lighting of a game. For instance, the albedo-only look of the simple shader may be too bright so you can use this setting to make the simple fallbacks darker so they don't stand out too much.
</member>
<member name="rendering/limits/buffers/blend_shape_max_buffer_size_kb" type="int" setter="" getter="" default="4096">
Max buffer size for blend shapes. Any blend shape bigger than this will not work.
</member>
Expand Down
14 changes: 14 additions & 0 deletions doc/classes/SpatialMaterial.xml
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,11 @@
<member name="emission_texture" type="Texture" setter="set_texture" getter="get_texture">
Texture that specifies how much surface emits light at a given point.
</member>
<member name="fallback_mode" type="int" setter="set_fallback_mode" getter="get_fallback_mode" enum="SpatialMaterial.FallbackMode" default="2">
When the project setting [code]rendering/gles3/shaders/async_compile_enabled[/code] is [code]true[/code], this determines how this material must behave in regards to asynchronous shader compilation.
This behaves like a hint, which means that, depending on the features this material uses, using the simple fallback may not be possible.
The default is [constant FALLBACK_MODE_SIMPLE], which is a good starting point for most projects. Feel free to check the other fallback modes.
</member>
<member name="flags_albedo_tex_force_srgb" type="bool" setter="set_flag" getter="get_flag" default="false">
Forces a conversion of the [member albedo_texture] from sRGB space to linear space.
</member>
Expand Down Expand Up @@ -638,5 +643,14 @@
<constant name="DISTANCE_FADE_OBJECT_DITHER" value="3" enum="DistanceFadeMode">
Smoothly fades the object out based on the object's distance from the camera using a dither approach. Dithering discards pixels based on a set pattern to smoothly fade without enabling transparency. On certain hardware this can be faster than [constant DISTANCE_FADE_PIXEL_ALPHA].
</constant>
<constant name="FALLBACK_MODE_NONE" value="0" enum="FallbackMode">
This material won't have a fallback. The application will stop to compile its full blown shader when it's used for the first time.
</constant>
<constant name="FALLBACK_MODE_NO_RENDER" value="1" enum="FallbackMode">
Anything with this material applied won't be rendered while this material's shader is being compiled.
</constant>
<constant name="FALLBACK_MODE_SIMPLE" value="2" enum="FallbackMode">
Anything with this material applied will be rendered with a simple shader
</constant>
</constants>
</class>
10 changes: 10 additions & 0 deletions doc/classes/VisualServer.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2523,6 +2523,16 @@
Sets the default clear color which is used when a specific clear color has not been selected.
</description>
</method>
<method name="set_forced_sync_shader_compile_enabled">
<return type="void">
</return>
<argument index="0" name="enabled" type="bool">
</argument>
<description>
In case asynchronous shader compilation is enabled, this disables or re-enabled it.
A project may want to disable it temporarily, for instance, if it wants to ensure shaders are "warmed up" by just rendering a frame of the whole view of a level beneath a loading screen.
</description>
</method>
<method name="set_shader_time_scale">
<return type="void" />
<argument index="0" name="scale" type="float" />
Expand Down
3 changes: 3 additions & 0 deletions drivers/dummy/rasterizer_dummy.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,9 @@ class RasterizerStorageDummy : public RasterizerStorage {
void shader_get_custom_defines(RID p_shader, Vector<String> *p_defines) const {}
void shader_remove_custom_define(RID p_shader, const String &p_define) {}

void set_forced_sync_shader_compile_enabled(bool p_enabled) {}
bool is_forced_sync_shader_compile_enabled() { return false; }

/* COMMON MATERIAL API */

RID material_create() { return RID(); }
Expand Down
3 changes: 3 additions & 0 deletions drivers/gles2/rasterizer_storage_gles2.h
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,9 @@ class RasterizerStorageGLES2 : public RasterizerStorage {
virtual void shader_get_custom_defines(RID p_shader, Vector<String> *p_defines) const;
virtual void shader_remove_custom_define(RID p_shader, const String &p_define);

void set_forced_sync_shader_compile_enabled(bool p_enabled) {}
bool is_forced_sync_shader_compile_enabled() { return false; }

void _update_shader(Shader *p_shader) const;
void update_dirty_shaders();

Expand Down
15 changes: 14 additions & 1 deletion drivers/gles3/rasterizer_gles3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,13 +207,16 @@ void RasterizerGLES3::begin_frame(double frame_step) {
storage->frame.time[2] = Math::fmod(time_total, 900);
storage->frame.time[3] = Math::fmod(time_total, 60);
storage->frame.count++;
storage->frame.shade_compiles_started = 0;
storage->frame.delta = frame_step;

storage->update_dirty_resources();

storage->info.render_final = storage->info.render;
storage->info.render.reset();

ShaderGLES3::current_frame = storage->frame.count;

scene->iteration();
}

Expand Down Expand Up @@ -410,6 +413,8 @@ void RasterizerGLES3::end_frame(bool p_swap_buffers) {
}
}

ShaderGLES3::advance_async_shaders_compilation();

if (p_swap_buffers) {
OS::get_singleton()->swap_buffers();
} else {
Expand All @@ -431,6 +436,13 @@ void RasterizerGLES3::make_current() {
}

void RasterizerGLES3::register_config() {
GLOBAL_DEF("rendering/gles3/shaders/cache_enabled", false);
GLOBAL_DEF("rendering/gles3/shaders/async_compile_enabled", false);
GLOBAL_DEF("rendering/gles3/shaders/max_concurrent_compiles", 4);
GLOBAL_DEF("rendering/gles3/shaders/max_concurrent_compiles.mobile", 1);
GLOBAL_DEF("rendering/gles3/shaders/simple_fallback_modulate", Color(1, 1, 1));
GLOBAL_DEF("rendering/gles3/shaders/force_no_render_fallback", false);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We define all project settings in visual_server.cpp now. This way users can still see the gles3 settings when running in GLES2 mode.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. I'll fix it.

GLOBAL_DEF("rendering/gles3/shaders/debug_force_use_fallbacks", false);
}

bool RasterizerGLES3::gl_check_errors() {
Expand Down Expand Up @@ -484,13 +496,14 @@ RasterizerGLES3::RasterizerGLES3() {
storage->canvas = canvas;
scene->storage = storage;
storage->scene = scene;
ShaderGLES3::shader_compiles_started_this_frame = &storage->frame.shade_compiles_started;

time_total = 0;
time_scale = 1;
}

RasterizerGLES3::~RasterizerGLES3() {
memdelete(storage);
memdelete(canvas);
memdelete(scene);
memdelete(storage);
}
Loading