Skip to content

Commit

Permalink
Restructure shadowmap rendering in mobile renderer
Browse files Browse the repository at this point in the history
  • Loading branch information
BastiaanOlij committed May 9, 2023
1 parent bd1bc68 commit 92f2545
Show file tree
Hide file tree
Showing 13 changed files with 333 additions and 329 deletions.
58 changes: 41 additions & 17 deletions drivers/vulkan/rendering_device_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6721,18 +6721,27 @@ Error RenderingDeviceVulkan::_draw_list_render_pass_begin(Framebuffer *framebuff
render_pass_begin.pNext = nullptr;
render_pass_begin.renderPass = render_pass;
render_pass_begin.framebuffer = vkframebuffer;
/*
* Given how API works, it makes sense to always fully operate on the whole framebuffer.
* This allows better continue operations for operations like shadowmapping.
render_pass_begin.renderArea.extent.width = viewport_size.width;
render_pass_begin.renderArea.extent.height = viewport_size.height;
render_pass_begin.renderArea.offset.x = viewport_offset.x;
render_pass_begin.renderArea.offset.y = viewport_offset.y;
*/
render_pass_begin.renderArea.extent.width = framebuffer->size.width;
render_pass_begin.renderArea.extent.height = framebuffer->size.height;
render_pass_begin.renderArea.offset.x = 0;
render_pass_begin.renderArea.offset.y = 0;

if (use_render_area) {
/*
* Regardless of the original remark here, copied below,
* feedback from GPU vendors is that if the full texture is specified here,
* the full texture is processed by the tiled system wasting bandwidth.
*/
render_pass_begin.renderArea.extent.width = viewport_size.width;
render_pass_begin.renderArea.extent.height = viewport_size.height;
render_pass_begin.renderArea.offset.x = viewport_offset.x;
render_pass_begin.renderArea.offset.y = viewport_offset.y;
} else {
/*
* Given how API works, it makes sense to always fully operate on the whole framebuffer.
* This allows better continue operations for operations like shadowmapping.
*/
render_pass_begin.renderArea.extent.width = framebuffer->size.width;
render_pass_begin.renderArea.extent.height = framebuffer->size.height;
render_pass_begin.renderArea.offset.x = 0;
render_pass_begin.renderArea.offset.y = 0;
}

Vector<VkClearValue> clear_values;
clear_values.resize(framebuffer->texture_ids.size());
Expand Down Expand Up @@ -8199,10 +8208,16 @@ void RenderingDeviceVulkan::compute_list_end(BitField<BarrierMask> p_post_barrie
barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_RASTER)) {
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) {
// TODO check if VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT/VK_ACCESS_INDIRECT_COMMAND_READ_BIT makes sense.
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) {
// TODO check if VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT/VK_ACCESS_INDIRECT_COMMAND_READ_BIT makes sense.
barrier_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) {
barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;
Expand All @@ -8227,7 +8242,10 @@ void RenderingDeviceVulkan::barrier(BitField<BarrierMask> p_from, BitField<Barri
src_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
src_access_flags |= VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_from.has_flag(BARRIER_MASK_RASTER)) {
if (p_from.has_flag(BARRIER_MASK_VERTEX)) {
// don't seem to have barrier here that make sense (logical because we're inside of a pass)
}
if (p_from.has_flag(BARRIER_MASK_FRAGMENT)) {
src_barrier_flags |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
src_access_flags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
}
Expand All @@ -8247,10 +8265,16 @@ void RenderingDeviceVulkan::barrier(BitField<BarrierMask> p_from, BitField<Barri
dst_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_to.has_flag(BARRIER_MASK_RASTER)) {
dst_barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
if (p_to.has_flag(BARRIER_MASK_VERTEX)) {
// TODO check if VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT/VK_ACCESS_INDIRECT_COMMAND_READ_BIT makes sense.
dst_barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
}
if (p_to.has_flag(BARRIER_MASK_FRAGMENT)) {
// TODO check if VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT/VK_ACCESS_INDIRECT_COMMAND_READ_BIT makes sense.
dst_barrier_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
}
if (p_to.has_flag(BARRIER_MASK_TRANSFER)) {
dst_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
dst_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;
Expand Down
1 change: 1 addition & 0 deletions drivers/vulkan/rendering_device_vulkan.h
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
#endif
uint32_t draw_list_current_subpass = 0;

bool use_render_area = true; // Use render area to constraint render target (faster on mobile, slower on desktop?? Or incorrect assumption?)
bool draw_list_split = false;
Vector<RID> draw_list_bound_textures;
Vector<RID> draw_list_storage_textures;
Expand Down
4 changes: 2 additions & 2 deletions servers/rendering/renderer_rd/effects/copy_effects.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -962,7 +962,7 @@ void CopyEffects::set_color_raster(RID p_dest_texture, const Color &p_color, con
RD::get_singleton()->draw_list_end();
}

void CopyEffects::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuffer, const Rect2 &p_rect, const Vector2 &p_dst_size, float p_z_near, float p_z_far, bool p_dp_flip) {
void CopyEffects::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuffer, const Rect2 &p_rect, const Vector2 &p_dst_size, float p_z_near, float p_z_far, bool p_dp_flip, BitField<RD::BarrierMask> p_post_barrier) {
UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton();
ERR_FAIL_NULL(uniform_set_cache);
MaterialStorage *material_storage = MaterialStorage::get_singleton();
Expand Down Expand Up @@ -994,7 +994,7 @@ void CopyEffects::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuf

RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(CopyToDPPushConstant));
RD::get_singleton()->draw_list_draw(draw_list, true);
RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_TRANSFER);
RD::get_singleton()->draw_list_end(p_post_barrier);
}

void CopyEffects::cubemap_downsample(RID p_source_cubemap, RID p_dest_cubemap, const Size2i &p_size) {
Expand Down
2 changes: 1 addition & 1 deletion servers/rendering/renderer_rd/effects/copy_effects.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ class CopyEffects {
void set_color(RID p_dest_texture, const Color &p_color, const Rect2i &p_region, bool p_8bit_dst = false);
void set_color_raster(RID p_dest_texture, const Color &p_color, const Rect2i &p_region);

void copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuffer, const Rect2 &p_rect, const Vector2 &p_dst_size, float p_z_near, float p_z_far, bool p_dp_flip);
void copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuffer, const Rect2 &p_rect, const Vector2 &p_dst_size, float p_z_near, float p_z_far, bool p_dp_flip, BitField<RD::BarrierMask> p_post_barrier = RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_TRANSFER);
void cubemap_downsample(RID p_source_cubemap, RID p_dest_cubemap, const Size2i &p_size);
void cubemap_downsample_raster(RID p_source_cubemap, RID p_dest_framebuffer, uint32_t p_face_id, const Size2i &p_size);
void cubemap_filter(RID p_source_cubemap, Vector<RID> p_dest_cubemap, bool p_use_array);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1416,7 +1416,7 @@ void RenderForwardClustered::_pre_opaque_render(RenderDataRD *p_render_data, boo

//Do shadow rendering (in parallel with GI)
if (render_shadows) {
_render_shadow_end(RD::BARRIER_MASK_NO_BARRIER);
_render_shadow_end();
}

if (render_gi) {
Expand Down Expand Up @@ -2293,13 +2293,17 @@ void RenderForwardClustered::_render_shadow_pass(RID p_light, RID p_shadow_atlas
if (finalize_cubemap) {
_render_shadow_process();
_render_shadow_end();

//barrier, don't start our next fragment shader until previous fragment shaders are done...
RD::get_singleton()->barrier(RD::BARRIER_MASK_FRAGMENT, RD::BARRIER_MASK_FRAGMENT);

//reblit
Rect2 atlas_rect_norm = atlas_rect;
atlas_rect_norm.position /= float(atlas_size);
atlas_rect_norm.size /= float(atlas_size);
copy_effects->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, atlas_rect.size, light_projection.get_z_near(), light_projection.get_z_far(), false);
copy_effects->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, atlas_rect.size, light_projection.get_z_near(), light_projection.get_z_far(), false, RD::BARRIER_MASK_NO_BARRIER);
atlas_rect_norm.position += Vector2(dual_paraboloid_offset) * atlas_rect_norm.size;
copy_effects->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, atlas_rect.size, light_projection.get_z_near(), light_projection.get_z_far(), true);
copy_effects->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, atlas_rect.size, light_projection.get_z_near(), light_projection.get_z_far(), true, RD::BARRIER_MASK_NO_BARRIER);

//restore transform so it can be properly used
light_storage->light_instance_set_shadow_transform(p_light, Projection(), light_storage->light_instance_get_base_transform(p_light), zfar, 0, 0, 0);
Expand Down Expand Up @@ -2398,17 +2402,14 @@ void RenderForwardClustered::_render_shadow_process() {

RD::get_singleton()->draw_command_end_label();
}
void RenderForwardClustered::_render_shadow_end(uint32_t p_barrier) {
void RenderForwardClustered::_render_shadow_end() {
RD::get_singleton()->draw_command_begin_label("Shadow Render");

for (SceneState::ShadowPass &shadow_pass : scene_state.shadow_passes) {
RenderListParameters render_list_parameters(render_list[RENDER_LIST_SECONDARY].elements.ptr() + shadow_pass.element_from, render_list[RENDER_LIST_SECONDARY].element_info.ptr() + shadow_pass.element_from, shadow_pass.element_count, shadow_pass.flip_cull, shadow_pass.pass_mode, 0, true, false, shadow_pass.rp_uniform_set, false, Vector2(), shadow_pass.lod_distance_multiplier, shadow_pass.screen_mesh_lod_threshold, 1, shadow_pass.element_from, RD::BARRIER_MASK_NO_BARRIER);
_render_list_with_threads(&render_list_parameters, shadow_pass.framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, shadow_pass.initial_depth_action, shadow_pass.final_depth_action, Vector<Color>(), 1.0, 0, shadow_pass.rect);
}

if (p_barrier != RD::BARRIER_MASK_NO_BARRIER) {
RD::get_singleton()->barrier(RD::BARRIER_MASK_RASTER, p_barrier);
}
RD::get_singleton()->draw_command_end_label();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ class RenderForwardClustered : public RendererSceneRenderRD {
void _render_shadow_begin();
void _render_shadow_append(RID p_framebuffer, const PagedArray<RenderGeometryInstance *> &p_instances, const Projection &p_projection, const Transform3D &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_mesh_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true, RenderingMethod::RenderInfo *p_render_info = nullptr);
void _render_shadow_process();
void _render_shadow_end(uint32_t p_barrier = RD::BARRIER_MASK_ALL_BARRIERS);
void _render_shadow_end();

/* Render Scene */
void _process_ssao(Ref<RenderSceneBuffersRD> p_render_buffers, RID p_environment, const RID *p_normal_buffers, const Projection *p_projections);
Expand Down
Loading

0 comments on commit 92f2545

Please sign in to comment.