From 4577dfdb6747600438ac4b86c29a4d6c12ec9b0a Mon Sep 17 00:00:00 2001 From: lawnjelly Date: Fri, 10 Nov 2023 08:17:47 +0000 Subject: [PATCH] Shadow volume culling and tighter shadow caster culling Existing shadow caster culling takes no account of the camera. This PR adds the highly encapsulated class RenderingLightCuller which can cut down the casters in the shadow volume to only those which can cast shadows on the camera frustum. --- core/error/error_macros.h | 10 + core/templates/paged_array.h | 6 + doc/classes/ProjectSettings.xml | 4 + servers/rendering/renderer_scene_cull.cpp | 82 +- servers/rendering/renderer_scene_cull.h | 53 +- servers/rendering/rendering_light_culler.cpp | 1125 ++++++++++++++++++ servers/rendering/rendering_light_culler.h | 248 ++++ 7 files changed, 1515 insertions(+), 13 deletions(-) create mode 100644 servers/rendering/rendering_light_culler.cpp create mode 100644 servers/rendering/rendering_light_culler.h diff --git a/core/error/error_macros.h b/core/error/error_macros.h index c8182975d57b..016c963e0420 100644 --- a/core/error/error_macros.h +++ b/core/error/error_macros.h @@ -812,4 +812,14 @@ void _err_flush_stdout(); #define DEV_ASSERT(m_cond) #endif +#ifdef DEV_ENABLED +#define DEV_CHECK_ONCE(m_cond) \ + if (unlikely(!(m_cond))) { \ + ERR_PRINT_ONCE("DEV_CHECK_ONCE failed \"" _STR(m_cond) "\" is false."); \ + } else \ + ((void)0) +#else +#define DEV_CHECK_ONCE(m_cond) +#endif + #endif // ERROR_MACROS_H diff --git a/core/templates/paged_array.h b/core/templates/paged_array.h index 69a792958a69..6b7e0cee1683 100644 --- a/core/templates/paged_array.h +++ b/core/templates/paged_array.h @@ -229,6 +229,12 @@ class PagedArray { count--; } + void remove_at_unordered(uint64_t p_index) { + ERR_FAIL_UNSIGNED_INDEX(p_index, count); + (*this)[p_index] = (*this)[count - 1]; + pop_back(); + } + void clear() { //destruct if needed if (!std::is_trivially_destructible::value) { diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index 0876261a3153..e87c5da5d0df 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -2530,6 +2530,10 @@ Lower-end override for [member rendering/lights_and_shadows/positional_shadow/soft_shadow_filter_quality] on mobile devices, due to performance concerns or driver support. + + If [code]true[/code], items that cannot cast shadows into the view frustum will not be rendered into shadow maps. + This can increase performance. + Enables the use of physically based units for light sources. Physically based units tend to be much larger than the arbitrary units used by Godot, but they can be used to match lighting within Godot to real-world lighting. Due to the large dynamic range of lighting conditions present in nature, Godot bakes exposure into the various lighting quantities before rendering. Most light sources bake exposure automatically at run time based on the active [CameraAttributes] resource, but [LightmapGI] and [VoxelGI] require a [CameraAttributes] resource to be set at bake time to reduce the dynamic range. At run time, Godot will automatically reconcile the baked exposure with the active exposure to ensure lighting remains consistent. diff --git a/servers/rendering/renderer_scene_cull.cpp b/servers/rendering/renderer_scene_cull.cpp index 2f7e4fef06b6..b8f14bb6112c 100644 --- a/servers/rendering/renderer_scene_cull.cpp +++ b/servers/rendering/renderer_scene_cull.cpp @@ -33,6 +33,7 @@ #include "core/config/project_settings.h" #include "core/object/worker_thread_pool.h" #include "core/os/os.h" +#include "rendering_light_culler.h" #include "rendering_server_default.h" #include @@ -158,7 +159,7 @@ void RendererSceneCull::_instance_pair(Instance *p_A, Instance *p_B) { light->geometries.insert(A); if (geom->can_cast_shadows) { - light->shadow_dirty = true; + light->make_shadow_dirty(); } if (A->scenario && A->array_index >= 0) { @@ -265,7 +266,7 @@ void RendererSceneCull::_instance_unpair(Instance *p_A, Instance *p_B) { light->geometries.erase(A); if (geom->can_cast_shadows) { - light->shadow_dirty = true; + light->make_shadow_dirty(); } if (A->scenario && A->array_index >= 0) { @@ -871,7 +872,7 @@ void RendererSceneCull::instance_set_layer_mask(RID p_instance, uint32_t p_mask) if (geom->can_cast_shadows) { for (HashSet::Iterator I = geom->lights.begin(); I != geom->lights.end(); ++I) { InstanceLightData *light = static_cast((*I)->base_data); - light->shadow_dirty = true; + light->make_shadow_dirty(); } } } @@ -1565,7 +1566,7 @@ void RendererSceneCull::_update_instance(Instance *p_instance) { RSG::light_storage->light_instance_set_transform(light->instance, p_instance->transform); RSG::light_storage->light_instance_set_aabb(light->instance, p_instance->transform.xform(p_instance->aabb)); - light->shadow_dirty = true; + light->make_shadow_dirty(); RS::LightBakeMode bake_mode = RSG::light_storage->light_get_bake_mode(p_instance->base); if (RSG::light_storage->light_get_type(p_instance->base) != RS::LIGHT_DIRECTIONAL && bake_mode != light->bake_mode) { @@ -1650,7 +1651,7 @@ void RendererSceneCull::_update_instance(Instance *p_instance) { if (geom->can_cast_shadows) { for (const Instance *E : geom->lights) { InstanceLightData *light = static_cast(E->base_data); - light->shadow_dirty = true; + light->make_shadow_dirty(); } } @@ -2075,6 +2076,9 @@ void RendererSceneCull::_update_instance_lightmap_captures(Instance *p_instance) } void RendererSceneCull::_light_instance_setup_directional_shadow(int p_shadow_index, Instance *p_instance, const Transform3D p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect) { + // For later tight culling, the light culler needs to know the details of the directional light. + light_culler->prepare_directional_light(p_instance, p_shadow_index); + InstanceLightData *light = static_cast(p_instance->base_data); Transform3D light_transform = p_instance->transform; @@ -2345,6 +2349,10 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons RendererSceneRender::RenderShadowData &shadow_data = render_shadow_data[max_shadows_used++]; + if (!light->is_shadow_update_full()) { + light_culler->cull_regular_light(instance_shadow_cull_result); + } + for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) { Instance *instance = instance_shadow_cull_result[j]; if (!instance->visible || !((1 << instance->base_type) & RS::INSTANCE_GEOMETRY_MASK) || !static_cast(instance->base_data)->can_cast_shadows || !(p_visible_layers & instance->layer_mask)) { @@ -2423,6 +2431,10 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons RendererSceneRender::RenderShadowData &shadow_data = render_shadow_data[max_shadows_used++]; + if (!light->is_shadow_update_full()) { + light_culler->cull_regular_light(instance_shadow_cull_result); + } + for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) { Instance *instance = instance_shadow_cull_result[j]; if (!instance->visible || !((1 << instance->base_type) & RS::INSTANCE_GEOMETRY_MASK) || !static_cast(instance->base_data)->can_cast_shadows || !(p_visible_layers & instance->layer_mask)) { @@ -2486,6 +2498,10 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons RendererSceneRender::RenderShadowData &shadow_data = render_shadow_data[max_shadows_used++]; + if (!light->is_shadow_update_full()) { + light_culler->cull_regular_light(instance_shadow_cull_result); + } + for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) { Instance *instance = instance_shadow_cull_result[j]; if (!instance->visible || !((1 << instance->base_type) & RS::INSTANCE_GEOMETRY_MASK) || !static_cast(instance->base_data)->can_cast_shadows || !(p_visible_layers & instance->layer_mask)) { @@ -2940,6 +2956,9 @@ void RendererSceneCull::_scene_cull(CullData &cull_data, InstanceCullResult &cul } for (uint32_t j = 0; j < cull_data.cull->shadow_count; j++) { + if (!light_culler->cull_directional_light(cull_data.scenario->instance_aabbs[i], j)) { + continue; + } for (uint32_t k = 0; k < cull_data.cull->shadows[j].cascade_count; k++) { if (IN_FRUSTUM(cull_data.cull->shadows[j].cascades[k].frustum) && VIS_CHECK) { uint32_t base_type = idata.flags & InstanceData::FLAG_BASE_TYPE_MASK; @@ -2992,6 +3011,9 @@ void RendererSceneCull::_scene_cull(CullData &cull_data, InstanceCullResult &cul void RendererSceneCull::_render_scene(const RendererSceneRender::CameraData *p_camera_data, const Ref &p_render_buffers, RID p_environment, RID p_force_camera_attributes, uint32_t p_visible_layers, RID p_scenario, RID p_viewport, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, bool p_using_shadows, RenderingMethod::RenderInfo *r_render_info) { Instance *render_reflection_probe = instance_owner.get_or_null(p_reflection_probe); //if null, not rendering to it + // Prepare the light - camera volume culling system. + light_culler->prepare_camera(p_camera_data->main_transform, p_camera_data->main_projection); + Scenario *scenario = scenario_owner.get_or_null(p_scenario); ERR_FAIL_COND(p_render_buffers.is_null()); @@ -3126,6 +3148,7 @@ void RendererSceneCull::_render_scene(const RendererSceneRender::CameraData *p_c #ifdef DEBUG_CULL_TIME uint64_t time_from = OS::get_singleton()->get_ticks_usec(); #endif + if (cull_to > thread_cull_threshold) { //multiple threads for (InstanceCullResult &thread : scene_cull_result_threads) { @@ -3263,9 +3286,31 @@ void RendererSceneCull::_render_scene(const RendererSceneRender::CameraData *p_c } } - if (light->shadow_dirty) { - light->last_version++; - light->shadow_dirty = false; + // We can detect whether multiple cameras are hitting this light, whether or not the shadow is dirty, + // so that we can turn off tighter caster culling. + light->detect_light_intersects_multiple_cameras(Engine::get_singleton()->get_frames_drawn()); + + if (light->is_shadow_dirty()) { + // Dirty shadows have no need to be drawn if + // the light volume doesn't intersect the camera frustum. + + // Returns false if the entire light can be culled. + bool allow_redraw = light_culler->prepare_regular_light(*ins); + + // Directional lights aren't handled here, _light_instance_update_shadow is called from elsewhere. + // Checking for this in case this changes, as this is assumed. + DEV_CHECK_ONCE(RSG::light_storage->light_get_type(ins->base) != RS::LIGHT_DIRECTIONAL); + + // Tighter caster culling to the camera frustum should work correctly with multiple viewports + cameras. + // The first camera will cull tightly, but if the light is present on more than 1 camera, the second will + // do a full render, and mark the light as non-dirty. + // There is however a cost to tighter shadow culling in this situation (2 shadow updates in 1 frame), + // so we should detect this and switch off tighter caster culling automatically. + // This is done in the logic for `decrement_shadow_dirty()`. + if (allow_redraw) { + light->last_version++; + light->decrement_shadow_dirty(); + } } bool redraw = RSG::light_storage->shadow_atlas_update_light(p_shadow_atlas, light->instance, coverage, light->last_version); @@ -3273,10 +3318,14 @@ void RendererSceneCull::_render_scene(const RendererSceneRender::CameraData *p_c if (redraw && max_shadows_used < MAX_UPDATE_SHADOWS) { //must redraw! RENDER_TIMESTAMP("> Render Light3D " + itos(i)); - light->shadow_dirty = _light_instance_update_shadow(ins, p_camera_data->main_transform, p_camera_data->main_projection, p_camera_data->is_orthogonal, p_camera_data->vaspect, p_shadow_atlas, scenario, p_screen_mesh_lod_threshold, p_visible_layers); + if (_light_instance_update_shadow(ins, p_camera_data->main_transform, p_camera_data->main_projection, p_camera_data->is_orthogonal, p_camera_data->vaspect, p_shadow_atlas, scenario, p_screen_mesh_lod_threshold, p_visible_layers)) { + light->make_shadow_dirty(); + } RENDER_TIMESTAMP("< Render Light3D " + itos(i)); } else { - light->shadow_dirty = redraw; + if (redraw) { + light->make_shadow_dirty(); + } } } } @@ -3953,7 +4002,7 @@ void RendererSceneCull::_update_dirty_instance(Instance *p_instance) { //ability to cast shadows change, let lights now for (const Instance *E : geom->lights) { InstanceLightData *light = static_cast(E->base_data); - light->shadow_dirty = true; + light->make_shadow_dirty(); } geom->can_cast_shadows = can_cast_shadows; @@ -4165,6 +4214,12 @@ RendererSceneCull::RendererSceneCull() { thread_cull_threshold = MAX(thread_cull_threshold, (uint32_t)WorkerThreadPool::get_singleton()->get_thread_count()); //make sure there is at least one thread per CPU dummy_occlusion_culling = memnew(RendererSceneOcclusionCull); + + light_culler = memnew(RenderingLightCuller); + + bool tighter_caster_culling = GLOBAL_DEF("rendering/lights_and_shadows/tighter_shadow_caster_culling", true); + light_culler->set_caster_culling_active(tighter_caster_culling); + light_culler->set_light_culling_active(tighter_caster_culling); } RendererSceneCull::~RendererSceneCull() { @@ -4187,4 +4242,9 @@ RendererSceneCull::~RendererSceneCull() { if (dummy_occlusion_culling) { memdelete(dummy_occlusion_culling); } + + if (light_culler) { + memdelete(light_culler); + light_culler = nullptr; + } } diff --git a/servers/rendering/renderer_scene_cull.h b/servers/rendering/renderer_scene_cull.h index e3e20b85029c..a09823b00869 100644 --- a/servers/rendering/renderer_scene_cull.h +++ b/servers/rendering/renderer_scene_cull.h @@ -46,6 +46,8 @@ #include "servers/rendering/storage/utilities.h" #include "servers/xr/xr_interface.h" +class RenderingLightCuller; + class RendererSceneCull : public RenderingMethod { public: RendererSceneRender *scene_render = nullptr; @@ -679,7 +681,6 @@ class RendererSceneCull : public RenderingMethod { uint64_t last_version; List::Element *D; // directional light in scenario - bool shadow_dirty; bool uses_projector = false; bool uses_softshadow = false; @@ -690,12 +691,59 @@ class RendererSceneCull : public RenderingMethod { RS::LightBakeMode bake_mode; uint32_t max_sdfgi_cascade = 2; + private: + // Instead of a single dirty flag, we maintain a count + // so that we can detect lights that are being made dirty + // each frame, and switch on tighter caster culling. + int32_t shadow_dirty_count; + + uint32_t light_update_frame_id; + bool light_intersects_multiple_cameras; + uint32_t light_intersects_multiple_cameras_timeout_frame_id; + + public: + bool is_shadow_dirty() const { return shadow_dirty_count != 0; } + void make_shadow_dirty() { shadow_dirty_count = light_intersects_multiple_cameras ? 1 : 2; } + void detect_light_intersects_multiple_cameras(uint32_t p_frame_id) { + // We need to detect the case where shadow updates are occurring + // more than once per frame. In this case, we need to turn off + // tighter caster culling, so situation reverts to one full shadow update + // per frame (light_intersects_multiple_cameras is set). + if (p_frame_id == light_update_frame_id) { + light_intersects_multiple_cameras = true; + light_intersects_multiple_cameras_timeout_frame_id = p_frame_id + 60; + } else { + // When shadow_volume_intersects_multiple_cameras is set, we + // want to detect the situation this is no longer the case, via a timeout. + // The system can go back to tighter caster culling in this situation. + // Having a long-ish timeout prevents rapid cycling. + if (light_intersects_multiple_cameras && (p_frame_id >= light_intersects_multiple_cameras_timeout_frame_id)) { + light_intersects_multiple_cameras = false; + light_intersects_multiple_cameras_timeout_frame_id = UINT32_MAX; + } + } + light_update_frame_id = p_frame_id; + } + + void decrement_shadow_dirty() { + shadow_dirty_count--; + DEV_ASSERT(shadow_dirty_count >= 0); + } + + // Shadow updates can either full (everything in the shadow volume) + // or closely culled to the camera frustum. + bool is_shadow_update_full() const { return shadow_dirty_count == 0; } + InstanceLightData() { bake_mode = RS::LIGHT_BAKE_DISABLED; - shadow_dirty = true; D = nullptr; last_version = 0; baked_light = nullptr; + + shadow_dirty_count = 1; + light_update_frame_id = UINT32_MAX; + light_intersects_multiple_cameras_timeout_frame_id = UINT32_MAX; + light_intersects_multiple_cameras = false; } }; @@ -955,6 +1003,7 @@ class RendererSceneCull : public RenderingMethod { uint32_t geometry_instance_pair_mask = 0; // used in traditional forward, unnecessary on clustered LocalVector camera_jitter_array; + RenderingLightCuller *light_culler = nullptr; virtual RID instance_allocate(); virtual void instance_initialize(RID p_rid); diff --git a/servers/rendering/rendering_light_culler.cpp b/servers/rendering/rendering_light_culler.cpp new file mode 100644 index 000000000000..0d704c85de25 --- /dev/null +++ b/servers/rendering/rendering_light_culler.cpp @@ -0,0 +1,1125 @@ +/**************************************************************************/ +/* rendering_light_culler.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "rendering_light_culler.h" + +#include "core/math/plane.h" +#include "core/math/projection.h" +#include "rendering_server_globals.h" +#include "scene/3d/camera_3d.h" + +#ifdef RENDERING_LIGHT_CULLER_DEBUG_STRINGS +const char *RenderingLightCuller::Data::string_planes[] = { + "NEAR", + "FAR", + "LEFT", + "TOP", + "RIGHT", + "BOTTOM", +}; +const char *RenderingLightCuller::Data::string_points[] = { + "FAR_LEFT_TOP", + "FAR_LEFT_BOTTOM", + "FAR_RIGHT_TOP", + "FAR_RIGHT_BOTTOM", + "NEAR_LEFT_TOP", + "NEAR_LEFT_BOTTOM", + "NEAR_RIGHT_TOP", + "NEAR_RIGHT_BOTTOM", +}; + +String RenderingLightCuller::Data::plane_bitfield_to_string(unsigned int BF) { + String sz; + + for (int n = 0; n < 6; n++) { + unsigned int bit = 1 << n; + if (BF & bit) { + sz += String(string_planes[n]) + ", "; + } + } + + return sz; +} +#endif + +void RenderingLightCuller::prepare_directional_light(const RendererSceneCull::Instance *p_instance, int32_t p_directional_light_id) { + //data.directional_light = p_instance; + // Something is probably going wrong, we shouldn't have this many directional lights... + ERR_FAIL_COND(p_directional_light_id > 512); + DEV_ASSERT(p_directional_light_id >= 0); + + // First make sure we have enough directional lights to hold this one. + if (p_directional_light_id >= (int32_t)data.directional_cull_planes.size()) { + data.directional_cull_planes.resize(p_directional_light_id + 1); + } + + _prepare_light(*p_instance, p_directional_light_id); +} + +bool RenderingLightCuller::_prepare_light(const RendererSceneCull::Instance &p_instance, int32_t p_directional_light_id) { + if (!data.is_active()) { + return true; + } + + LightSource lsource; + switch (RSG::light_storage->light_get_type(p_instance.base)) { + case RS::LIGHT_SPOT: + lsource.type = LightSource::ST_SPOTLIGHT; + lsource.angle = RSG::light_storage->light_get_param(p_instance.base, RS::LIGHT_PARAM_SPOT_ANGLE); + lsource.range = RSG::light_storage->light_get_param(p_instance.base, RS::LIGHT_PARAM_RANGE); + break; + case RS::LIGHT_OMNI: + lsource.type = LightSource::ST_OMNI; + lsource.range = RSG::light_storage->light_get_param(p_instance.base, RS::LIGHT_PARAM_RANGE); + break; + case RS::LIGHT_DIRECTIONAL: + lsource.type = LightSource::ST_DIRECTIONAL; + // Could deal with a max directional shadow range here? NYI + // LIGHT_PARAM_SHADOW_MAX_DISTANCE + break; + } + + lsource.pos = p_instance.transform.origin; + lsource.dir = -p_instance.transform.basis.get_column(2); + lsource.dir.normalize(); + + bool visible; + if (p_directional_light_id == -1) { + visible = _add_light_camera_planes(data.regular_cull_planes, lsource); + } else { + visible = _add_light_camera_planes(data.directional_cull_planes[p_directional_light_id], lsource); + } + + if (data.light_culling_active) { + return visible; + } + return true; +} + +bool RenderingLightCuller::cull_directional_light(const RendererSceneCull::InstanceBounds &p_bound, int32_t p_directional_light_id) { + if (!data.is_active() || !is_caster_culling_active()) { + return true; + } + + ERR_FAIL_INDEX_V(p_directional_light_id, (int32_t)data.directional_cull_planes.size(), true); + + LightCullPlanes &cull_planes = data.directional_cull_planes[p_directional_light_id]; + + Vector3 mins = Vector3(p_bound.bounds[0], p_bound.bounds[1], p_bound.bounds[2]); + Vector3 maxs = Vector3(p_bound.bounds[3], p_bound.bounds[4], p_bound.bounds[5]); + AABB bb(mins, maxs - mins); + + real_t r_min, r_max; + for (int p = 0; p < cull_planes.num_cull_planes; p++) { + bb.project_range_in_plane(cull_planes.cull_planes[p], r_min, r_max); + if (r_min > 0.0f) { +#ifdef LIGHT_CULLER_DEBUG_DIRECTIONAL_LIGHT + cull_planes.rejected_count++; +#endif + + return false; + } + } + + return true; +} + +void RenderingLightCuller::cull_regular_light(PagedArray &r_instance_shadow_cull_result) { + if (!data.is_active() || !is_caster_culling_active()) { + return; + } + + // If the light is out of range, no need to check anything, just return 0 casters. + // Ideally an out of range light should not even be drawn AT ALL (no shadow map, no PCF etc). + if (data.out_of_range) { + return; + } + + // Shorter local alias. + PagedArray &list = r_instance_shadow_cull_result; + +#ifdef LIGHT_CULLER_DEBUG_LOGGING + uint32_t count_before = r_instance_shadow_cull_result.size(); +#endif + + // Go through all the casters in the list (the list will hopefully shrink as we go). + for (int n = 0; n < (int)list.size(); n++) { + // World space aabb. + const AABB &bb = list[n]->transformed_aabb; + +#ifdef LIGHT_CULLER_DEBUG_LOGGING + if (is_logging()) { + print_line("bb : " + String(bb)); + } +#endif + + real_t r_min, r_max; + bool show = true; + + for (int p = 0; p < data.regular_cull_planes.num_cull_planes; p++) { + // As we only need r_min, could this be optimized? + bb.project_range_in_plane(data.regular_cull_planes.cull_planes[p], r_min, r_max); + +#ifdef LIGHT_CULLER_DEBUG_LOGGING + if (is_logging()) { + print_line("\tplane " + itos(p) + " : " + String(data.regular_cull_planes.cull_planes[p]) + " r_min " + String(Variant(r_min)) + " r_max " + String(Variant(r_max))); + } +#endif + + if (r_min > 0.0f) { + show = false; + break; + } + } + + // Remove. + if (!show) { + list.remove_at_unordered(n); + + // Repeat this element next iteration of the loop as it has been removed and replaced by the last. + n--; + +#ifdef LIGHT_CULLER_DEBUG_REGULAR_LIGHT + data.regular_rejected_count++; +#endif + } + } + +#ifdef LIGHT_CULLER_DEBUG_LOGGING + uint32_t removed = r_instance_shadow_cull_result.size() - count_before; + if (removed) { + if (((data.debug_count) % 60) == 0) { + print_line("[" + itos(data.debug_count) + "] linear cull before " + itos(count_before) + " after " + itos(r_instance_shadow_cull_result.size())); + } + } +#endif +} + +void RenderingLightCuller::LightCullPlanes::add_cull_plane(const Plane &p) { + ERR_FAIL_COND(num_cull_planes >= MAX_CULL_PLANES); + cull_planes[num_cull_planes++] = p; +} + +// Directional lights are different to points, as the origin is infinitely in the distance, so the plane third +// points are derived differently. +bool RenderingLightCuller::add_light_camera_planes_directional(LightCullPlanes &r_cull_planes, const LightSource &p_light_source) { + uint32_t lookup = 0; + r_cull_planes.num_cull_planes = 0; + + // Directional light, we will use dot against the light direction to determine back facing planes. + for (int n = 0; n < 6; n++) { + float dot = data.frustum_planes[n].normal.dot(p_light_source.dir); + if (dot > 0.0f) { + lookup |= 1 << n; + + // Add backfacing camera frustum planes. + r_cull_planes.add_cull_plane(data.frustum_planes[n]); + } + } + + ERR_FAIL_COND_V(lookup >= LUT_SIZE, true); + + // Deal with special case... if the light is INSIDE the view frustum (i.e. all planes face away) + // then we will add the camera frustum planes to clip the light volume .. there is no need to + // render shadow casters outside the frustum as shadows can never re-enter the frustum. + + // Should never happen with directional light?? This may be able to be removed. + if (lookup == 63) { + r_cull_planes.num_cull_planes = 0; + for (int n = 0; n < data.frustum_planes.size(); n++) { + r_cull_planes.add_cull_plane(data.frustum_planes[n]); + } + + return true; + } + +// Each edge forms a plane. +#ifdef RENDERING_LIGHT_CULLER_CALCULATE_LUT + const LocalVector &entry = _calculated_LUT[lookup]; + + // each edge forms a plane + int n_edges = entry.size() - 1; +#else + uint8_t *entry = &data.LUT_entries[lookup][0]; + int n_edges = data.LUT_entry_sizes[lookup] - 1; +#endif + + for (int e = 0; e < n_edges; e++) { + int i0 = entry[e]; + int i1 = entry[e + 1]; + const Vector3 &pt0 = data.frustum_points[i0]; + const Vector3 &pt1 = data.frustum_points[i1]; + + // Create a third point from the light direction. + Vector3 pt2 = pt0 - p_light_source.dir; + + // Create plane from 3 points. + Plane p(pt0, pt1, pt2); + r_cull_planes.add_cull_plane(p); + } + + // Last to 0 edge. + if (n_edges) { + int i0 = entry[n_edges]; // Last. + int i1 = entry[0]; // First. + + const Vector3 &pt0 = data.frustum_points[i0]; + const Vector3 &pt1 = data.frustum_points[i1]; + + // Create a third point from the light direction. + Vector3 pt2 = pt0 - p_light_source.dir; + + // Create plane from 3 points. + Plane p(pt0, pt1, pt2); + r_cull_planes.add_cull_plane(p); + } + +#ifdef LIGHT_CULLER_DEBUG_LOGGING + if (is_logging()) { + print_line("lcam.pos is " + String(p_light_source.pos)); + } +#endif + + return true; +} + +bool RenderingLightCuller::_add_light_camera_planes(LightCullPlanes &r_cull_planes, const LightSource &p_light_source) { + if (!data.is_active()) { + return true; + } + + // We should have called prepare_camera before this. + ERR_FAIL_COND_V(data.frustum_planes.size() != 6, true); + + switch (p_light_source.type) { + case LightSource::ST_SPOTLIGHT: + case LightSource::ST_OMNI: + break; + case LightSource::ST_DIRECTIONAL: + return add_light_camera_planes_directional(r_cull_planes, p_light_source); + break; + default: + return false; // not yet supported + break; + } + + // Start with 0 cull planes. + r_cull_planes.num_cull_planes = 0; + data.out_of_range = false; + uint32_t lookup = 0; + + // Find which of the camera planes are facing away from the light. + // We can also test for the situation where the light max range means it cannot + // affect the camera frustum. This is absolutely worth doing because it is relatively + // cheap, and if the entire light can be culled this can vastly improve performance + // (much more than just culling casters). + + // POINT LIGHT (spotlight, omni) + // Instead of using dot product to compare light direction to plane, we can simply + // find out which side of the plane the camera is on. By definition this marks the point at which the plane + // becomes invisible. + + // OMNIS + if (p_light_source.type == LightSource::ST_OMNI) { + for (int n = 0; n < 6; n++) { + float dist = data.frustum_planes[n].distance_to(p_light_source.pos); + if (dist < 0.0f) { + lookup |= 1 << n; + + // Add backfacing camera frustum planes. + r_cull_planes.add_cull_plane(data.frustum_planes[n]); + } else { + // Is the light out of range? + // This is one of the tests. If the point source is more than range distance from a frustum plane, it can't + // be seen. + if (dist >= p_light_source.range) { + // If the light is out of range, no need to do anything else, everything will be culled. + data.out_of_range = true; + return false; + } + } + } + } else { + // SPOTLIGHTs, more complex to cull. + Vector3 pos_end = p_light_source.pos + (p_light_source.dir * p_light_source.range); + + // This is the radius of the cone at distance 1. + float radius_at_dist_one = Math::tan(Math::deg_to_rad(p_light_source.angle)); + + // The worst case radius of the cone at the end point can be calculated + // (the radius will scale linearly with length along the cone). + float end_cone_radius = radius_at_dist_one * p_light_source.range; + + for (int n = 0; n < 6; n++) { + float dist = data.frustum_planes[n].distance_to(p_light_source.pos); + if (dist < 0.0f) { + // Either the plane is backfacing or we are inside the frustum. + lookup |= 1 << n; + + // Add backfacing camera frustum planes. + r_cull_planes.add_cull_plane(data.frustum_planes[n]); + } else { + // The light is in front of the plane. + + // Is the light out of range? + if (dist >= p_light_source.range) { + data.out_of_range = true; + return false; + } + + // For a spotlight, we can use an extra test + // at this point the cone start is in front of the plane... + // If the cone end point is further than the maximum possible distance to the plane + // we can guarantee that the cone does not cross the plane, and hence the cone + // is outside the frustum. + float dist_end = data.frustum_planes[n].distance_to(pos_end); + + if (dist_end >= end_cone_radius) { + data.out_of_range = true; + return false; + } + } + } + } + + // The lookup should be within the LUT, logic should prevent this. + ERR_FAIL_COND_V(lookup >= LUT_SIZE, true); + + // Deal with special case... if the light is INSIDE the view frustum (i.e. all planes face away) + // then we will add the camera frustum planes to clip the light volume .. there is no need to + // render shadow casters outside the frustum as shadows can never re-enter the frustum. + if (lookup == 63) { + r_cull_planes.num_cull_planes = 0; + for (int n = 0; n < data.frustum_planes.size(); n++) { + r_cull_planes.add_cull_plane(data.frustum_planes[n]); + } + + return true; + } + + // Each edge forms a plane. + uint8_t *entry = &data.LUT_entries[lookup][0]; + int n_edges = data.LUT_entry_sizes[lookup] - 1; + + for (int e = 0; e < n_edges; e++) { + int i0 = entry[e]; + int i1 = entry[e + 1]; + const Vector3 &pt0 = data.frustum_points[i0]; + const Vector3 &pt1 = data.frustum_points[i1]; + + // Create plane from 3 points. + Plane p(pt0, pt1, p_light_source.pos); + r_cull_planes.add_cull_plane(p); + } + + // Last to 0 edge. + if (n_edges) { + int i0 = entry[n_edges]; // Last. + int i1 = entry[0]; // First. + + const Vector3 &pt0 = data.frustum_points[i0]; + const Vector3 &pt1 = data.frustum_points[i1]; + + // Create plane from 3 points. + Plane p(pt0, pt1, p_light_source.pos); + r_cull_planes.add_cull_plane(p); + } + +#ifdef LIGHT_CULLER_DEBUG_LOGGING + if (is_logging()) { + print_line("lsource.pos is " + String(p_light_source.pos)); + } +#endif + + return true; +} + +bool RenderingLightCuller::prepare_camera(const Transform3D &p_cam_transform, const Projection &p_cam_matrix) { + data.debug_count++; + if (data.debug_count >= 120) { + data.debug_count = 0; + } + + // For debug flash off and on. +#ifdef LIGHT_CULLER_DEBUG_FLASH + if (!Engine::get_singleton()->is_editor_hint()) { + int dc = Engine::get_singleton()->get_process_frames() / LIGHT_CULLER_DEBUG_FLASH_FREQUENCY; + bool bnew_active; + bnew_active = (dc % 2) == 0; + + if (bnew_active != data.light_culling_active) { + data.light_culling_active = bnew_active; + print_line("switching light culler " + String(Variant(data.light_culling_active))); + } + } +#endif + + if (!data.is_active()) { + return false; + } + + // Get the camera frustum planes in world space. + data.frustum_planes = p_cam_matrix.get_projection_planes(p_cam_transform); + DEV_CHECK_ONCE(data.frustum_planes.size() == 6); + + data.regular_cull_planes.num_cull_planes = 0; + +#ifdef LIGHT_CULLER_DEBUG_DIRECTIONAL_LIGHT + if (is_logging()) { + for (uint32_t n = 0; n < data.directional_cull_planes.size(); n++) { + print_line("LightCuller directional light " + itos(n) + " rejected " + itos(data.directional_cull_planes[n].rejected_count) + " instances."); + } + } +#endif +#ifdef LIGHT_CULLER_DEBUG_REGULAR_LIGHT + if (data.regular_rejected_count) { + print_line("LightCuller regular lights rejected " + itos(data.regular_rejected_count) + " instances."); + } + data.regular_rejected_count = 0; +#endif + + data.directional_cull_planes.resize(0); + +#ifdef LIGHT_CULLER_DEBUG_LOGGING + if (is_logging()) { + for (int p = 0; p < 6; p++) { + print_line("plane " + itos(p) + " : " + String(data.frustum_planes[p])); + } + } +#endif + + // We want to calculate the frustum corners in a specific order. + const Projection::Planes intersections[8][3] = { + { Projection::PLANE_FAR, Projection::PLANE_LEFT, Projection::PLANE_TOP }, + { Projection::PLANE_FAR, Projection::PLANE_LEFT, Projection::PLANE_BOTTOM }, + { Projection::PLANE_FAR, Projection::PLANE_RIGHT, Projection::PLANE_TOP }, + { Projection::PLANE_FAR, Projection::PLANE_RIGHT, Projection::PLANE_BOTTOM }, + { Projection::PLANE_NEAR, Projection::PLANE_LEFT, Projection::PLANE_TOP }, + { Projection::PLANE_NEAR, Projection::PLANE_LEFT, Projection::PLANE_BOTTOM }, + { Projection::PLANE_NEAR, Projection::PLANE_RIGHT, Projection::PLANE_TOP }, + { Projection::PLANE_NEAR, Projection::PLANE_RIGHT, Projection::PLANE_BOTTOM }, + }; + + for (int i = 0; i < 8; i++) { + // 3 plane intersection, gives us a point. + bool res = data.frustum_planes[intersections[i][0]].intersect_3(data.frustum_planes[intersections[i][1]], data.frustum_planes[intersections[i][2]], &data.frustum_points[i]); + + // What happens with a zero frustum? NYI - deal with this. + ERR_FAIL_COND_V(!res, false); + +#ifdef LIGHT_CULLER_DEBUG_LOGGING + if (is_logging()) { + print_line("point " + itos(i) + " -> " + String(data.frustum_points[i])); + } +#endif + } + + return true; +} + +RenderingLightCuller::RenderingLightCuller() { + // Used to determine which frame to give debug output. + data.debug_count = -1; + + // Uncomment below to switch off light culler in the editor. + // data.caster_culling_active = Engine::get_singleton()->is_editor_hint() == false; + +#ifdef RENDERING_LIGHT_CULLER_CALCULATE_LUT + create_LUT(); +#endif +} + +/* clang-format off */ +uint8_t RenderingLightCuller::Data::LUT_entry_sizes[LUT_SIZE] = {0, 4, 4, 0, 4, 6, 6, 8, 4, 6, 6, 8, 6, 6, 6, 6, 4, 6, 6, 8, 0, 8, 8, 0, 6, 6, 6, 6, 8, 6, 6, 4, 4, 6, 6, 8, 6, 6, 6, 6, 0, 8, 8, 0, 8, 6, 6, 4, 6, 6, 6, 6, 8, 6, 6, 4, 8, 6, 6, 4, 0, 4, 4, 0, }; + +// The lookup table used to determine which edges form the silhouette of the camera frustum, +// depending on the viewing angle (defined by which camera planes are backward facing). +uint8_t RenderingLightCuller::Data::LUT_entries[LUT_SIZE][8] = { +{0, 0, 0, 0, 0, 0, 0, 0, }, +{7, 6, 4, 5, 0, 0, 0, 0, }, +{1, 0, 2, 3, 0, 0, 0, 0, }, +{0, 0, 0, 0, 0, 0, 0, 0, }, +{1, 5, 4, 0, 0, 0, 0, 0, }, +{1, 5, 7, 6, 4, 0, 0, 0, }, +{4, 0, 2, 3, 1, 5, 0, 0, }, +{5, 7, 6, 4, 0, 2, 3, 1, }, +{0, 4, 6, 2, 0, 0, 0, 0, }, +{0, 4, 5, 7, 6, 2, 0, 0, }, +{6, 2, 3, 1, 0, 4, 0, 0, }, +{2, 3, 1, 0, 4, 5, 7, 6, }, +{0, 1, 5, 4, 6, 2, 0, 0, }, +{0, 1, 5, 7, 6, 2, 0, 0, }, +{6, 2, 3, 1, 5, 4, 0, 0, }, +{2, 3, 1, 5, 7, 6, 0, 0, }, +{2, 6, 7, 3, 0, 0, 0, 0, }, +{2, 6, 4, 5, 7, 3, 0, 0, }, +{7, 3, 1, 0, 2, 6, 0, 0, }, +{3, 1, 0, 2, 6, 4, 5, 7, }, +{0, 0, 0, 0, 0, 0, 0, 0, }, +{2, 6, 4, 0, 1, 5, 7, 3, }, +{7, 3, 1, 5, 4, 0, 2, 6, }, +{0, 0, 0, 0, 0, 0, 0, 0, }, +{2, 0, 4, 6, 7, 3, 0, 0, }, +{2, 0, 4, 5, 7, 3, 0, 0, }, +{7, 3, 1, 0, 4, 6, 0, 0, }, +{3, 1, 0, 4, 5, 7, 0, 0, }, +{2, 0, 1, 5, 4, 6, 7, 3, }, +{2, 0, 1, 5, 7, 3, 0, 0, }, +{7, 3, 1, 5, 4, 6, 0, 0, }, +{3, 1, 5, 7, 0, 0, 0, 0, }, +{3, 7, 5, 1, 0, 0, 0, 0, }, +{3, 7, 6, 4, 5, 1, 0, 0, }, +{5, 1, 0, 2, 3, 7, 0, 0, }, +{7, 6, 4, 5, 1, 0, 2, 3, }, +{3, 7, 5, 4, 0, 1, 0, 0, }, +{3, 7, 6, 4, 0, 1, 0, 0, }, +{5, 4, 0, 2, 3, 7, 0, 0, }, +{7, 6, 4, 0, 2, 3, 0, 0, }, +{0, 0, 0, 0, 0, 0, 0, 0, }, +{3, 7, 6, 2, 0, 4, 5, 1, }, +{5, 1, 0, 4, 6, 2, 3, 7, }, +{0, 0, 0, 0, 0, 0, 0, 0, }, +{3, 7, 5, 4, 6, 2, 0, 1, }, +{3, 7, 6, 2, 0, 1, 0, 0, }, +{5, 4, 6, 2, 3, 7, 0, 0, }, +{7, 6, 2, 3, 0, 0, 0, 0, }, +{3, 2, 6, 7, 5, 1, 0, 0, }, +{3, 2, 6, 4, 5, 1, 0, 0, }, +{5, 1, 0, 2, 6, 7, 0, 0, }, +{1, 0, 2, 6, 4, 5, 0, 0, }, +{3, 2, 6, 7, 5, 4, 0, 1, }, +{3, 2, 6, 4, 0, 1, 0, 0, }, +{5, 4, 0, 2, 6, 7, 0, 0, }, +{6, 4, 0, 2, 0, 0, 0, 0, }, +{3, 2, 0, 4, 6, 7, 5, 1, }, +{3, 2, 0, 4, 5, 1, 0, 0, }, +{5, 1, 0, 4, 6, 7, 0, 0, }, +{1, 0, 4, 5, 0, 0, 0, 0, }, +{0, 0, 0, 0, 0, 0, 0, 0, }, +{3, 2, 0, 1, 0, 0, 0, 0, }, +{5, 4, 6, 7, 0, 0, 0, 0, }, +{0, 0, 0, 0, 0, 0, 0, 0, }, +}; + +/* clang-format on */ + +#ifdef RENDERING_LIGHT_CULLER_CALCULATE_LUT + +// See e.g. http://lspiroengine.com/?p=153 for reference. +// Principles are the same, but differences to the article: +// * Order of planes / points is different in Godot. +// * We use a lookup table at runtime. +void RenderingLightCuller::create_LUT() { + // Each pair of planes that are opposite can have an edge. + for (int plane_0 = 0; plane_0 < PLANE_TOTAL; plane_0++) { + // For each neighbour of the plane. + PlaneOrder neighs[4]; + get_neighbouring_planes((PlaneOrder)plane_0, neighs); + + for (int n = 0; n < 4; n++) { + int plane_1 = neighs[n]; + + // If these are opposite we need to add the 2 points they share. + PointOrder pts[2]; + get_corners_of_planes((PlaneOrder)plane_0, (PlaneOrder)plane_1, pts); + + add_LUT(plane_0, plane_1, pts); + } + } + + for (uint32_t n = 0; n < LUT_SIZE; n++) { + compact_LUT_entry(n); + } + + debug_print_LUT(); + debug_print_LUT_as_table(); +} + +// we can pre-create the entire LUT and store it hard coded as a static inside the executable! +// it is only small in size, 64 entries with max 8 bytes per entry +void RenderingLightCuller::debug_print_LUT_as_table() { + print_line("\nLIGHT VOLUME TABLE BEGIN\n"); + + print_line("Copy this to LUT_entry_sizes:\n"); + String sz = "{"; + for (int n = 0; n < LUT_SIZE; n++) { + const LocalVector &entry = _calculated_LUT[n]; + + sz += itos(entry.size()) + ", "; + } + sz += "}"; + print_line(sz); + print_line("\nCopy this to LUT_entries:\n"); + + for (int n = 0; n < LUT_SIZE; n++) { + const LocalVector &entry = _calculated_LUT[n]; + + String sz = "{"; + + // First is the number of points in the entry. + int s = entry.size(); + + for (int p = 0; p < 8; p++) { + if (p < s) + sz += itos(entry[p]); + else + sz += "0"; // just a spacer + + sz += ", "; + } + + sz += "},"; + print_line(sz); + } + + print_line("\nLIGHT VOLUME TABLE END\n"); +} + +void RenderingLightCuller::debug_print_LUT() { + for (int n = 0; n < LUT_SIZE; n++) { + String sz; + sz = "LUT" + itos(n) + ":\t"; + + sz += Data::plane_bitfield_to_string(n); + print_line(sz); + + const LocalVector &entry = _calculated_LUT[n]; + + sz = "\t" + string_LUT_entry(entry); + + print_line(sz); + } +} + +String RenderingLightCuller::string_LUT_entry(const LocalVector &p_entry) { + String string; + + for (uint32_t n = 0; n < p_entry.size(); n++) { + uint8_t val = p_entry[n]; + DEV_ASSERT(val < 8); + const char *sz_point = Data::string_points[val]; + string += sz_point; + string += ", "; + } + + return string; +} + +String RenderingLightCuller::debug_string_LUT_entry(const LocalVector &p_entry, bool p_pair) { + String string; + + for (uint32_t i = 0; i < p_entry.size(); i++) { + int pt_order = p_entry[i]; + if (p_pair && ((i % 2) == 0)) { + string += itos(pt_order) + "-"; + } else { + string += itos(pt_order) + ", "; + } + } + + return string; +} + +void RenderingLightCuller::add_LUT(int p_plane_0, int p_plane_1, PointOrder p_pts[2]) { + // Note that some entries to the LUT will be "impossible" situations, + // because it contains all combinations of plane flips. + uint32_t bit0 = 1 << p_plane_0; + uint32_t bit1 = 1 << p_plane_1; + + // All entries of the LUT that have plane 0 set and plane 1 not set. + for (uint32_t n = 0; n < 64; n++) { + // If bit0 not set... + if (!(n & bit0)) + continue; + + // If bit1 set... + if (n & bit1) + continue; + + // Meets criteria. + add_LUT_entry(n, p_pts); + } +} + +void RenderingLightCuller::add_LUT_entry(uint32_t p_entry_id, PointOrder p_pts[2]) { + DEV_ASSERT(p_entry_id < LUT_SIZE); + LocalVector &entry = _calculated_LUT[p_entry_id]; + + entry.push_back(p_pts[0]); + entry.push_back(p_pts[1]); +} + +void RenderingLightCuller::compact_LUT_entry(uint32_t p_entry_id) { + DEV_ASSERT(p_entry_id < LUT_SIZE); + LocalVector &entry = _calculated_LUT[p_entry_id]; + + int num_pairs = entry.size() / 2; + + if (num_pairs == 0) + return; + + LocalVector temp; + + String string; + string = "Compact LUT" + itos(p_entry_id) + ":\t"; + string += debug_string_LUT_entry(entry, true); + print_line(string); + + // Add first pair. + temp.push_back(entry[0]); + temp.push_back(entry[1]); + unsigned int BFpairs = 1; + + string = debug_string_LUT_entry(temp) + " -> "; + print_line(string); + + // Attempt to add a pair each time. + for (int done = 1; done < num_pairs; done++) { + string = "done " + itos(done) + ": "; + // Find a free pair. + for (int p = 1; p < num_pairs; p++) { + unsigned int bit = 1 << p; + // Is it done already? + if (BFpairs & bit) + continue; + + // There must be at least 1 free pair. + // Attempt to add. + int a = entry[p * 2]; + int b = entry[(p * 2) + 1]; + + string += "[" + itos(a) + "-" + itos(b) + "], "; + + int found_a = temp.find(a); + int found_b = temp.find(b); + + // Special case, if they are both already in the list, no need to add + // as this is a link from the tail to the head of the list. + if ((found_a != -1) && (found_b != -1)) { + string += "foundAB link " + itos(found_a) + ", " + itos(found_b) + " "; + BFpairs |= bit; + goto found; + } + + // Find a. + if (found_a != -1) { + string += "foundA " + itos(found_a) + " "; + temp.insert(found_a + 1, b); + BFpairs |= bit; + goto found; + } + + // Find b. + if (found_b != -1) { + string += "foundB " + itos(found_b) + " "; + temp.insert(found_b, a); + BFpairs |= bit; + goto found; + } + + } // Check each pair for adding. + + // If we got here before finding a link, the whole set of planes is INVALID + // e.g. far and near plane only, does not create continuous sillouhette of edges. + print_line("\tINVALID"); + entry.clear(); + return; + + found:; + print_line(string); + string = "\ttemp now : " + debug_string_LUT_entry(temp); + print_line(string); + } + + // temp should now be the sorted entry .. delete the old one and replace by temp. + entry.clear(); + entry = temp; +} + +void RenderingLightCuller::get_neighbouring_planes(PlaneOrder p_plane, PlaneOrder r_neigh_planes[4]) const { + // Table of neighbouring planes to each. + static const PlaneOrder neigh_table[PLANE_TOTAL][4] = { + { // LSM_FP_NEAR + PLANE_LEFT, + PLANE_RIGHT, + PLANE_TOP, + PLANE_BOTTOM }, + { // LSM_FP_FAR + PLANE_LEFT, + PLANE_RIGHT, + PLANE_TOP, + PLANE_BOTTOM }, + { // LSM_FP_LEFT + PLANE_TOP, + PLANE_BOTTOM, + PLANE_NEAR, + PLANE_FAR }, + { // LSM_FP_TOP + PLANE_LEFT, + PLANE_RIGHT, + PLANE_NEAR, + PLANE_FAR }, + { // LSM_FP_RIGHT + PLANE_TOP, + PLANE_BOTTOM, + PLANE_NEAR, + PLANE_FAR }, + { // LSM_FP_BOTTOM + PLANE_LEFT, + PLANE_RIGHT, + PLANE_NEAR, + PLANE_FAR }, + }; + + for (int n = 0; n < 4; n++) { + r_neigh_planes[n] = neigh_table[p_plane][n]; + } +} + +// Given two planes, returns the two points shared by those planes. The points are always +// returned in counter-clockwise order, assuming the first input plane is facing towards +// the viewer. + +// param p_plane_a The plane facing towards the viewer. +// param p_plane_b A plane neighboring p_plane_a. +// param r_points An array of exactly two elements to be filled with the indices of the points +// on return. + +void RenderingLightCuller::get_corners_of_planes(PlaneOrder p_plane_a, PlaneOrder p_plane_b, PointOrder r_points[2]) const { + static const PointOrder fp_table[PLANE_TOTAL][PLANE_TOTAL][2] = { + { + // LSM_FP_NEAR + { + // LSM_FP_NEAR + PT_NEAR_LEFT_TOP, PT_NEAR_RIGHT_TOP, // Invalid combination. + }, + { + // LSM_FP_FAR + PT_FAR_RIGHT_TOP, PT_FAR_LEFT_TOP, // Invalid combination. + }, + { + // LSM_FP_LEFT + PT_NEAR_LEFT_TOP, + PT_NEAR_LEFT_BOTTOM, + }, + { + // LSM_FP_TOP + PT_NEAR_RIGHT_TOP, + PT_NEAR_LEFT_TOP, + }, + { + // LSM_FP_RIGHT + PT_NEAR_RIGHT_BOTTOM, + PT_NEAR_RIGHT_TOP, + }, + { + // LSM_FP_BOTTOM + PT_NEAR_LEFT_BOTTOM, + PT_NEAR_RIGHT_BOTTOM, + }, + }, + + { + // LSM_FP_FAR + { + // LSM_FP_NEAR + PT_FAR_LEFT_TOP, PT_FAR_RIGHT_TOP, // Invalid combination. + }, + { + // LSM_FP_FAR + PT_FAR_RIGHT_TOP, PT_FAR_LEFT_TOP, // Invalid combination. + }, + { + // LSM_FP_LEFT + PT_FAR_LEFT_BOTTOM, + PT_FAR_LEFT_TOP, + }, + { + // LSM_FP_TOP + PT_FAR_LEFT_TOP, + PT_FAR_RIGHT_TOP, + }, + { + // LSM_FP_RIGHT + PT_FAR_RIGHT_TOP, + PT_FAR_RIGHT_BOTTOM, + }, + { + // LSM_FP_BOTTOM + PT_FAR_RIGHT_BOTTOM, + PT_FAR_LEFT_BOTTOM, + }, + }, + + { + // LSM_FP_LEFT + { + // LSM_FP_NEAR + PT_NEAR_LEFT_BOTTOM, + PT_NEAR_LEFT_TOP, + }, + { + // LSM_FP_FAR + PT_FAR_LEFT_TOP, + PT_FAR_LEFT_BOTTOM, + }, + { + // LSM_FP_LEFT + PT_FAR_LEFT_BOTTOM, PT_FAR_LEFT_BOTTOM, // Invalid combination. + }, + { + // LSM_FP_TOP + PT_NEAR_LEFT_TOP, + PT_FAR_LEFT_TOP, + }, + { + // LSM_FP_RIGHT + PT_FAR_LEFT_BOTTOM, PT_FAR_LEFT_BOTTOM, // Invalid combination. + }, + { + // LSM_FP_BOTTOM + PT_FAR_LEFT_BOTTOM, + PT_NEAR_LEFT_BOTTOM, + }, + }, + + { + // LSM_FP_TOP + { + // LSM_FP_NEAR + PT_NEAR_LEFT_TOP, + PT_NEAR_RIGHT_TOP, + }, + { + // LSM_FP_FAR + PT_FAR_RIGHT_TOP, + PT_FAR_LEFT_TOP, + }, + { + // LSM_FP_LEFT + PT_FAR_LEFT_TOP, + PT_NEAR_LEFT_TOP, + }, + { + // LSM_FP_TOP + PT_NEAR_LEFT_TOP, PT_FAR_LEFT_TOP, // Invalid combination. + }, + { + // LSM_FP_RIGHT + PT_NEAR_RIGHT_TOP, + PT_FAR_RIGHT_TOP, + }, + { + // LSM_FP_BOTTOM + PT_FAR_LEFT_BOTTOM, PT_NEAR_LEFT_BOTTOM, // Invalid combination. + }, + }, + + { + // LSM_FP_RIGHT + { + // LSM_FP_NEAR + PT_NEAR_RIGHT_TOP, + PT_NEAR_RIGHT_BOTTOM, + }, + { + // LSM_FP_FAR + PT_FAR_RIGHT_BOTTOM, + PT_FAR_RIGHT_TOP, + }, + { + // LSM_FP_LEFT + PT_FAR_RIGHT_BOTTOM, PT_FAR_RIGHT_BOTTOM, // Invalid combination. + }, + { + // LSM_FP_TOP + PT_FAR_RIGHT_TOP, + PT_NEAR_RIGHT_TOP, + }, + { + // LSM_FP_RIGHT + PT_FAR_RIGHT_BOTTOM, PT_FAR_RIGHT_BOTTOM, // Invalid combination. + }, + { + // LSM_FP_BOTTOM + PT_NEAR_RIGHT_BOTTOM, + PT_FAR_RIGHT_BOTTOM, + }, + }, + + // == + + // P_NEAR, + // P_FAR, + // P_LEFT, + // P_TOP, + // P_RIGHT, + // P_BOTTOM, + + { + // LSM_FP_BOTTOM + { + // LSM_FP_NEAR + PT_NEAR_RIGHT_BOTTOM, + PT_NEAR_LEFT_BOTTOM, + }, + { + // LSM_FP_FAR + PT_FAR_LEFT_BOTTOM, + PT_FAR_RIGHT_BOTTOM, + }, + { + // LSM_FP_LEFT + PT_NEAR_LEFT_BOTTOM, + PT_FAR_LEFT_BOTTOM, + }, + { + // LSM_FP_TOP + PT_NEAR_LEFT_BOTTOM, PT_FAR_LEFT_BOTTOM, // Invalid combination. + }, + { + // LSM_FP_RIGHT + PT_FAR_RIGHT_BOTTOM, + PT_NEAR_RIGHT_BOTTOM, + }, + { + // LSM_FP_BOTTOM + PT_FAR_LEFT_BOTTOM, PT_NEAR_LEFT_BOTTOM, // Invalid combination. + }, + }, + + // == + + }; + r_points[0] = fp_table[p_plane_a][p_plane_b][0]; + r_points[1] = fp_table[p_plane_a][p_plane_b][1]; +} + +#endif diff --git a/servers/rendering/rendering_light_culler.h b/servers/rendering/rendering_light_culler.h new file mode 100644 index 000000000000..602543850aa9 --- /dev/null +++ b/servers/rendering/rendering_light_culler.h @@ -0,0 +1,248 @@ +/**************************************************************************/ +/* rendering_light_culler.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef RENDERING_LIGHT_CULLER_H +#define RENDERING_LIGHT_CULLER_H + +#include "core/math/plane.h" +#include "core/math/vector3.h" +#include "renderer_scene_cull.h" + +struct Projection; +struct Transform3D; + +// For testing performance improvements from the LightCuller: +// Uncomment LIGHT_CULLER_DEBUG_FLASH and it will turn the culler +// on and off every LIGHT_CULLER_DEBUG_FLASH_FREQUENCY camera prepares. +// Uncomment LIGHT_CULLER_DEBUG_LOGGING to get periodic print of the number of casters culled before / after. +// Uncomment LIGHT_CULLER_DEBUG_DIRECTIONAL_LIGHT to get periodic print of the number of casters culled for the directional light.. + +// #define LIGHT_CULLER_DEBUG_LOGGING +// #define LIGHT_CULLER_DEBUG_DIRECTIONAL_LIGHT +// #define LIGHT_CULLER_DEBUG_REGULAR_LIGHT +// #define LIGHT_CULLER_DEBUG_FLASH +#define LIGHT_CULLER_DEBUG_FLASH_FREQUENCY 1024 +//////////////////////////////////////////////////////////////////////////////////////////////// + +// The code to generate the lookup table is included but commented out. +// This may be useful for debugging / regenerating the LUT in the future, +// especially if the order of planes changes. +// When this define is set, the generated lookup table will be printed to debug output. +// The generated lookup table can be copy pasted +// straight to LUT_entry_sizes and LUT_entries. +// See the referenced article for explanation. +// #define RENDERING_LIGHT_CULLER_CALCULATE_LUT + +//////////////////////////////////////////////////////////////////////////////////////////////// +// This define will be set automatically depending on earlier defines, you can leave this as is. +#if defined(LIGHT_CULLER_DEBUG_LOGGING) || defined(RENDERING_LIGHT_CULLER_CALCULATE_LUT) +#define RENDERING_LIGHT_CULLER_DEBUG_STRINGS +#endif + +// Culls shadow casters that can't cast shadows into the camera frustum. +class RenderingLightCuller { +public: + RenderingLightCuller(); + +private: + class LightSource { + public: + enum SourceType { + ST_UNKNOWN, + ST_DIRECTIONAL, + ST_SPOTLIGHT, + ST_OMNI, + }; + + LightSource() { + type = ST_UNKNOWN; + angle = 0.0f; + range = FLT_MAX; + } + + // All in world space, culling done in world space. + Vector3 pos; + Vector3 dir; + SourceType type; + + float angle; // For spotlight. + float range; + }; + + // Same order as godot. + enum PlaneOrder { + PLANE_NEAR, + PLANE_FAR, + PLANE_LEFT, + PLANE_TOP, + PLANE_RIGHT, + PLANE_BOTTOM, + PLANE_TOTAL, + }; + + // Same order as godot. + enum PointOrder { + PT_FAR_LEFT_TOP, + PT_FAR_LEFT_BOTTOM, + PT_FAR_RIGHT_TOP, + PT_FAR_RIGHT_BOTTOM, + PT_NEAR_LEFT_TOP, + PT_NEAR_LEFT_BOTTOM, + PT_NEAR_RIGHT_TOP, + PT_NEAR_RIGHT_BOTTOM, + }; + + // 6 bits, 6 planes. + enum { + NUM_CAM_PLANES = 6, + NUM_CAM_POINTS = 8, + MAX_CULL_PLANES = 17, + LUT_SIZE = 64, + }; + +public: + // Before each pass with a different camera, you must call this so the culler can pre-create + // the camera frustum planes and corner points in world space which are used for the culling. + bool prepare_camera(const Transform3D &p_cam_transform, const Projection &p_cam_matrix); + + // REGULAR LIGHTS (SPOT, OMNI). + // These are prepared then used for culling one by one, single threaded. + // prepare_regular_light() returns false if the entire light is culled (i.e. there is no intersection between the light and the view frustum). + bool prepare_regular_light(const RendererSceneCull::Instance &p_instance) { return _prepare_light(p_instance, -1); } + + // Cull according to the regular light planes that were setup in the previous call to prepare_regular_light. + void cull_regular_light(PagedArray &r_instance_shadow_cull_result); + + // Directional lights are prepared in advance, and can be culled multithreaded chopping and changing between + // different directional_light_id. + void prepare_directional_light(const RendererSceneCull::Instance *p_instance, int32_t p_directional_light_id); + + // Return false if the instance is to be culled. + bool cull_directional_light(const RendererSceneCull::InstanceBounds &p_bound, int32_t p_directional_light_id); + + // Can turn on and off from the engine if desired. + void set_caster_culling_active(bool p_active) { data.caster_culling_active = p_active; } + void set_light_culling_active(bool p_active) { data.light_culling_active = p_active; } + +private: + struct LightCullPlanes { + void add_cull_plane(const Plane &p); + Plane cull_planes[MAX_CULL_PLANES]; + int num_cull_planes = 0; +#ifdef LIGHT_CULLER_DEBUG_DIRECTIONAL_LIGHT + uint32_t rejected_count = 0; +#endif + }; + + bool _prepare_light(const RendererSceneCull::Instance &p_instance, int32_t p_directional_light_id = -1); + + // Internal version uses LightSource. + bool _add_light_camera_planes(LightCullPlanes &r_cull_planes, const LightSource &p_light_source); + + // Directional light gives parallel culling planes (as opposed to point lights). + bool add_light_camera_planes_directional(LightCullPlanes &r_cull_planes, const LightSource &p_light_source); + + // Is the light culler active? maybe not in the editor... + bool is_caster_culling_active() const { return data.caster_culling_active; } + bool is_light_culling_active() const { return data.light_culling_active; } + + // Do we want to log some debug output? + bool is_logging() const { return data.debug_count == 0; } + + struct Data { + // Camera frustum planes (world space) - order ePlane. + Vector frustum_planes; + + // Camera frustum corners (world space) - order ePoint. + Vector3 frustum_points[NUM_CAM_POINTS]; + + // Master can have multiple directional lights. + // These need to store their own cull planes individually, as master + // chops and changes between culling different lights + // instead of doing one by one, and we don't want to prepare + // lights multiple times per frame. + LocalVector directional_cull_planes; + + // Single threaded cull planes for regular lights + // (OMNI, SPOT). These lights reuse the same set of cull plane data. + LightCullPlanes regular_cull_planes; + +#ifdef LIGHT_CULLER_DEBUG_REGULAR_LIGHT + uint32_t regular_rejected_count = 0; +#endif + // The whole regular light can be out of range of the view frustum, in which case all casters should be culled. + bool out_of_range = false; + +#ifdef RENDERING_LIGHT_CULLER_DEBUG_STRINGS + static String plane_bitfield_to_string(unsigned int BF); + // Names of the plane and point enums, useful for debugging. + static const char *string_planes[]; + static const char *string_points[]; +#endif + + // Precalculated look up table. + static uint8_t LUT_entry_sizes[LUT_SIZE]; + static uint8_t LUT_entries[LUT_SIZE][8]; + + bool caster_culling_active = true; + bool light_culling_active = true; + + // Light culling is a basic on / off switch. + // Caster culling only works if light culling is also on. + bool is_active() const { return light_culling_active; } + + // Ideally a frame counter, but for ease of implementation + // this is just incremented on each prepare_camera. + // used to turn on and off debugging features. + int debug_count = -1; + } data; + + // This functionality is not required in general use (and is compiled out), + // as the lookup table can normally be hard coded + // (provided order of planes etc does not change). + // It is provided for debugging / future maintenance. +#ifdef RENDERING_LIGHT_CULLER_CALCULATE_LUT + void get_neighbouring_planes(PlaneOrder p_plane, PlaneOrder r_neigh_planes[4]) const; + void get_corners_of_planes(PlaneOrder p_plane_a, PlaneOrder p_plane_b, PointOrder r_points[2]) const; + void create_LUT(); + void compact_LUT_entry(uint32_t p_entry_id); + void debug_print_LUT(); + void debug_print_LUT_as_table(); + void add_LUT(int p_plane_0, int p_plane_1, PointOrder p_pts[2]); + void add_LUT_entry(uint32_t p_entry_id, PointOrder p_pts[2]); + String debug_string_LUT_entry(const LocalVector &p_entry, bool p_pair = false); + String string_LUT_entry(const LocalVector &p_entry); + + // Contains a list of points for each combination of plane facing directions. + LocalVector _calculated_LUT[LUT_SIZE]; +#endif +}; + +#endif // RENDERING_LIGHT_CULLER_H