From aa739907905b195b5e31b921c973a28521cbec88 Mon Sep 17 00:00:00 2001 From: Dario Date: Fri, 22 Nov 2024 11:55:06 -0300 Subject: [PATCH] Implement support for fragment density maps. Co-Authored-By: Bastiaan Olij --- .../d3d12/rendering_device_driver_d3d12.cpp | 79 ++- drivers/d3d12/rendering_device_driver_d3d12.h | 17 +- drivers/metal/rendering_device_driver_metal.h | 6 +- .../metal/rendering_device_driver_metal.mm | 23 +- .../vulkan/rendering_device_driver_vulkan.cpp | 521 +++++++++++++----- .../vulkan/rendering_device_driver_vulkan.h | 49 +- servers/rendering/renderer_rd/effects/vrs.cpp | 30 +- servers/rendering/renderer_rd/effects/vrs.h | 2 + .../render_forward_clustered.cpp | 2 - .../forward_mobile/render_forward_mobile.cpp | 15 +- .../renderer_rd/framebuffer_cache_rd.h | 5 - .../renderer_rd/shaders/effects/vrs.glsl | 9 + .../storage_rd/render_scene_buffers_rd.cpp | 4 +- .../storage_rd/texture_storage.cpp | 12 +- servers/rendering/rendering_device.cpp | 178 ++++-- servers/rendering/rendering_device.h | 47 +- servers/rendering/rendering_device_commons.h | 5 +- servers/rendering/rendering_device_driver.h | 38 +- servers/rendering/rendering_device_graph.cpp | 30 +- servers/rendering/rendering_device_graph.h | 10 +- servers/xr/xr_vrs.cpp | 19 +- 21 files changed, 756 insertions(+), 345 deletions(-) diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index b72a1932f830..80c3c519f968 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -1203,7 +1203,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p if ((p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT)) { resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } - if ((p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT)) { + if ((p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) && (p_format.usage_bits & TEXTURE_USAGE_VRS_FRAGMENT_SHADING_RATE_BIT)) { // For VRS images we can't use the typeless format. resource_desc.Format = DXGI_FORMAT_R8_UINT; } @@ -1808,8 +1808,11 @@ static D3D12_BARRIER_ACCESS _rd_texture_layout_access_mask(RDD::TextureLayout p_ return D3D12_BARRIER_ACCESS_RESOLVE_SOURCE; case RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL: return D3D12_BARRIER_ACCESS_RESOLVE_DEST; - case RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL: + case RDD::TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL: return D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE; + case RDD::TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL: + DEV_ASSERT(false && "Fragment density maps are not supported in D3D12."); + return D3D12_BARRIER_ACCESS_NO_ACCESS; default: return D3D12_BARRIER_ACCESS_NO_ACCESS; } @@ -1928,7 +1931,7 @@ static void _rd_stages_to_d3d12(BitField p_stages, D3D12 r_sync |= D3D12_BARRIER_SYNC_VERTEX_SHADING; } - if (p_stages.has_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) { + if (p_stages.has_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT)) { r_sync |= D3D12_BARRIER_SYNC_PIXEL_SHADING; } @@ -2023,8 +2026,11 @@ static D3D12_BARRIER_LAYOUT _rd_texture_layout_to_d3d12_barrier_layout(RDD::Text return D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE; case RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL: return D3D12_BARRIER_LAYOUT_RESOLVE_DEST; - case RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL: + case RDD::TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL: return D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE; + case RDD::TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL: + DEV_ASSERT(false && "Fragment density maps are not supported in D3D12."); + return D3D12_BARRIER_LAYOUT_UNDEFINED; default: DEV_ASSERT(false && "Unknown texture layout."); return D3D12_BARRIER_LAYOUT_UNDEFINED; @@ -2412,7 +2418,7 @@ RDD::SwapChainID RenderingDeviceDriverD3D12::swap_chain_create(RenderingContextD color_ref.aspect.set_flag(RDD::TEXTURE_ASPECT_COLOR_BIT); subpass.color_references.push_back(color_ref); - RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1); + RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1, AttachmentReference()); ERR_FAIL_COND_V(!render_pass, SwapChainID()); // Create the empty swap chain until it is resized. @@ -2772,8 +2778,8 @@ RDD::FramebufferID RenderingDeviceDriverD3D12::_framebuffer_create(RenderPassID uint32_t vrs_index = UINT32_MAX; for (const Subpass &E : pass_info->subpasses) { - if (E.vrs_reference.attachment != AttachmentReference::UNUSED) { - vrs_index = E.vrs_reference.attachment; + if (E.fragment_shading_rate_reference.attachment != AttachmentReference::UNUSED) { + vrs_index = E.fragment_shading_rate_reference.attachment; } } @@ -4922,7 +4928,9 @@ Vector RenderingDeviceDriverD3D12::pipeline_cache_serialize() { // ----- SUBPASS ----- -RDD::RenderPassID RenderingDeviceDriverD3D12::render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count) { +RDD::RenderPassID RenderingDeviceDriverD3D12::render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) { + ERR_FAIL_COND_V_MSG(p_fragment_density_map_attachment.attachment != AttachmentReference::UNUSED, RenderPassID(), "Fragment density maps are not supported in D3D12."); + // Pre-bookkeep. RenderPassInfo *pass_info = VersatileResource::allocate(resources_allocator); @@ -5023,7 +5031,7 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd } } - if (fb_info->vrs_attachment && vrs_capabilities.ss_image_supported) { + if (fb_info->vrs_attachment && fsr_capabilities.attachment_supported) { ComPtr cmd_list_5; cmd_buf_info->cmd_list->QueryInterface(cmd_list_5.GetAddressOf()); if (cmd_list_5) { @@ -5143,7 +5151,7 @@ void RenderingDeviceDriverD3D12::command_end_render_pass(CommandBufferID p_cmd_b const FramebufferInfo *fb_info = cmd_buf_info->render_pass_state.fb_info; const RenderPassInfo *pass_info = cmd_buf_info->render_pass_state.pass_info; - if (vrs_capabilities.ss_image_supported) { + if (fsr_capabilities.attachment_supported) { ComPtr cmd_list_5; cmd_buf_info->cmd_list->QueryInterface(cmd_list_5.GetAddressOf()); if (cmd_list_5) { @@ -6173,12 +6181,6 @@ uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) { return subgroup_capabilities.supported_stages_flags_rd(); case LIMIT_SUBGROUP_OPERATIONS: return subgroup_capabilities.supported_operations_flags_rd(); - case LIMIT_VRS_TEXEL_WIDTH: - case LIMIT_VRS_TEXEL_HEIGHT: - return vrs_capabilities.ss_image_tile_size; - case LIMIT_VRS_MAX_FRAGMENT_WIDTH: - case LIMIT_VRS_MAX_FRAGMENT_HEIGHT: - return vrs_capabilities.ss_max_fragment_size; default: { #ifdef DEV_ENABLED WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + "."); @@ -6213,12 +6215,8 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) { bool RenderingDeviceDriverD3D12::has_feature(Features p_feature) { switch (p_feature) { - case SUPPORTS_MULTIVIEW: - return multiview_capabilities.is_supported && multiview_capabilities.max_view_count > 1; case SUPPORTS_FSR_HALF_FLOAT: return shader_capabilities.native_16bit_ops && storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported; - case SUPPORTS_ATTACHMENT_VRS: - return vrs_capabilities.ss_image_supported; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; default: @@ -6230,6 +6228,14 @@ const RDD::MultiviewCapabilities &RenderingDeviceDriverD3D12::get_multiview_capa return multiview_capabilities; } +const RDD::FragmentShadingRateCapabilities &RenderingDeviceDriverD3D12::get_fragment_shading_rate_capabilities() { + return fsr_capabilities; +} + +const RDD::FragmentDensityMapCapabilities &RenderingDeviceDriverD3D12::get_fragment_density_map_capabilities() { + return fdm_capabilities; +} + String RenderingDeviceDriverD3D12::get_api_name() const { return "D3D12"; } @@ -6391,12 +6397,6 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { device_capabilities.version_minor = feature_level % 10; // Assume not supported until proven otherwise. - vrs_capabilities.draw_call_supported = false; - vrs_capabilities.primitive_supported = false; - vrs_capabilities.primitive_in_multiviewport = false; - vrs_capabilities.ss_image_supported = false; - vrs_capabilities.ss_image_tile_size = 1; - vrs_capabilities.additional_rates_supported = false; multiview_capabilities.is_supported = false; multiview_capabilities.geometry_shader_is_supported = false; multiview_capabilities.tessellation_shader_is_supported = false; @@ -6487,14 +6487,12 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS6, &options6, sizeof(options6)); if (SUCCEEDED(res)) { if (options6.VariableShadingRateTier >= D3D12_VARIABLE_SHADING_RATE_TIER_1) { - vrs_capabilities.draw_call_supported = true; + fsr_capabilities.pipeline_supported = true; if (options6.VariableShadingRateTier >= D3D12_VARIABLE_SHADING_RATE_TIER_2) { - vrs_capabilities.primitive_supported = true; - vrs_capabilities.primitive_in_multiviewport = options6.PerPrimitiveShadingRateSupportedWithViewportIndexing; - vrs_capabilities.ss_image_supported = true; - vrs_capabilities.ss_image_tile_size = options6.ShadingRateImageTileSize; - vrs_capabilities.ss_max_fragment_size = 8; // TODO figure out if this is supplied and/or needed - vrs_capabilities.additional_rates_supported = options6.AdditionalShadingRatesSupported; + fsr_capabilities.primitive_supported = true; + fsr_capabilities.attachment_supported = true; + fsr_capabilities.min_texel_size = Size2i(options6.ShadingRateImageTileSize, options6.ShadingRateImageTileSize); + fsr_capabilities.max_texel_size = Size2i(8, 8); } } } @@ -6506,19 +6504,16 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { barrier_capabilities.enhanced_barriers_supported = options12.EnhancedBarriersSupported; } - if (vrs_capabilities.draw_call_supported || vrs_capabilities.primitive_supported || vrs_capabilities.ss_image_supported) { + if (fsr_capabilities.pipeline_supported || fsr_capabilities.primitive_supported || fsr_capabilities.attachment_supported) { print_verbose("- D3D12 Variable Rate Shading supported:"); - if (vrs_capabilities.draw_call_supported) { + if (fsr_capabilities.pipeline_supported) { print_verbose(" Draw call"); } - if (vrs_capabilities.primitive_supported) { - print_verbose(String(" Per-primitive (multi-viewport: ") + (vrs_capabilities.primitive_in_multiviewport ? "yes" : "no") + ")"); - } - if (vrs_capabilities.ss_image_supported) { - print_verbose(String(" Screen-space image (tile size: ") + itos(vrs_capabilities.ss_image_tile_size) + ")"); + if (fsr_capabilities.primitive_supported) { + print_verbose(" Primitive"); } - if (vrs_capabilities.additional_rates_supported) { - print_verbose(String(" Additional rates: ") + (vrs_capabilities.additional_rates_supported ? "yes" : "no")); + if (fsr_capabilities.attachment_supported) { + print_verbose(String(" Screen-space image (tile size: ") + itos(fsr_capabilities.min_texel_size.x) + ")"); } } else { print_verbose("- D3D12 Variable Rate Shading not supported"); diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index b449a9087665..2f29114f7cf6 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -116,16 +116,6 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { uint32_t supported_operations_flags_rd() const; }; - struct VRSCapabilities { - bool draw_call_supported = false; // We can specify our fragment rate on a draw call level. - bool primitive_supported = false; // We can specify our fragment rate on each drawcall. - bool primitive_in_multiviewport = false; - bool ss_image_supported = false; // We can provide a density map attachment on our framebuffer. - uint32_t ss_image_tile_size = 0; - uint32_t ss_max_fragment_size = 0; - bool additional_rates_supported = false; - }; - struct ShaderCapabilities { D3D_SHADER_MODEL shader_model = (D3D_SHADER_MODEL)0; bool native_16bit_ops = false; @@ -157,7 +147,8 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { uint32_t feature_level = 0; // Major * 10 + minor. SubgroupCapabilities subgroup_capabilities; RDD::MultiviewCapabilities multiview_capabilities; - VRSCapabilities vrs_capabilities; + FragmentShadingRateCapabilities fsr_capabilities; + FragmentDensityMapCapabilities fdm_capabilities; ShaderCapabilities shader_capabilities; StorageBufferCapabilities storage_buffer_capabilities; FormatCapabilities format_capabilities; @@ -825,7 +816,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { }; public: - virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count) override final; + virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) override final; virtual void render_pass_free(RenderPassID p_render_pass) override final; // ----- COMMANDS ----- @@ -990,6 +981,8 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { virtual uint64_t api_trait_get(ApiTrait p_trait) override final; virtual bool has_feature(Features p_feature) override final; virtual const MultiviewCapabilities &get_multiview_capabilities() override final; + virtual const FragmentShadingRateCapabilities &get_fragment_shading_rate_capabilities() override final; + virtual const FragmentDensityMapCapabilities &get_fragment_density_map_capabilities() override final; virtual String get_api_name() const override final; virtual String get_api_version() const override final; virtual String get_pipeline_cache_uuid() const override final; diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h index 09ab7601e9c1..2a048b248a09 100644 --- a/drivers/metal/rendering_device_driver_metal.h +++ b/drivers/metal/rendering_device_driver_metal.h @@ -67,6 +67,8 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public RDD::Capabilities capabilities; RDD::MultiviewCapabilities multiview_capabilities; + RDD::FragmentShadingRateCapabilities fsr_capabilities; + RDD::FragmentDensityMapCapabilities fdm_capabilities; id archive = nil; uint32_t archive_count = 0; @@ -315,7 +317,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public // ----- SUBPASS ----- - virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count) override final; + virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) override final; virtual void render_pass_free(RenderPassID p_render_pass) override final; // ----- COMMANDS ----- @@ -417,6 +419,8 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public virtual uint64_t api_trait_get(ApiTrait p_trait) override final; virtual bool has_feature(Features p_feature) override final; virtual const MultiviewCapabilities &get_multiview_capabilities() override final; + virtual const FragmentShadingRateCapabilities &get_fragment_shading_rate_capabilities() override final; + virtual const FragmentDensityMapCapabilities &get_fragment_density_map_capabilities() override final; virtual String get_api_name() const override final { return "Metal"; } virtual String get_api_version() const override final; virtual String get_pipeline_cache_uuid() const override final; diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index 976020abf1df..fd229ab4adb1 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -947,7 +947,7 @@ static const API_AVAILABLE(macos(11.0), ios(14.0)) MTLSamplerBorderColor SAMPLER color_ref.aspect.set_flag(RDD::TEXTURE_ASPECT_COLOR_BIT); subpass.color_references.push_back(color_ref); - RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1); + RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1, RDD::AttachmentReference()); ERR_FAIL_COND_V(!render_pass, SwapChainID()); // Create the empty swap chain until it is resized. @@ -3006,7 +3006,7 @@ bool isArrayTexture(MTLTextureType p_type) { // ----- SUBPASS ----- -RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count) { +RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) { PixelFormats &pf = *pixel_formats; size_t subpass_count = p_subpasses.size(); @@ -3886,8 +3886,6 @@ bool isArrayTexture(MTLTextureType p_type) { return (int64_t)limits.subgroupSupportedShaderStages; case LIMIT_SUBGROUP_OPERATIONS: return (int64_t)limits.subgroupSupportedOperations; - UNKNOWN(LIMIT_VRS_TEXEL_WIDTH); - UNKNOWN(LIMIT_VRS_TEXEL_HEIGHT); default: ERR_FAIL_V(0); } @@ -3906,17 +3904,8 @@ bool isArrayTexture(MTLTextureType p_type) { bool RenderingDeviceDriverMetal::has_feature(Features p_feature) { switch (p_feature) { - case SUPPORTS_MULTIVIEW: - return multiview_capabilities.is_supported; case SUPPORTS_FSR_HALF_FLOAT: return true; - case SUPPORTS_ATTACHMENT_VRS: - // TODO(sgc): Maybe supported via https://developer.apple.com/documentation/metal/render_passes/rendering_at_different_rasterization_rates?language=objc - // See also: - // - // * https://forum.beyond3d.com/threads/variable-rate-shading-vs-variable-rate-rasterization.62243/post-2191363 - // - return false; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; default: @@ -3928,6 +3917,14 @@ bool isArrayTexture(MTLTextureType p_type) { return multiview_capabilities; } +const RDD::FragmentShadingRateCapabilities &RenderingDeviceDriverMetal::get_fragment_shading_rate_capabilities() { + return fsr_capabilities; +} + +const RDD::FragmentDensityMapCapabilities &RenderingDeviceDriverMetal::get_fragment_density_map_capabilities() { + return fdm_capabilities; +} + String RenderingDeviceDriverMetal::get_api_version() const { return vformat("%d.%d", version_major, version_minor); } diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index b6e5ed02878b..d570f73d9a1f 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -290,7 +290,8 @@ static VkImageLayout RD_TO_VK_LAYOUT[RDD::TEXTURE_LAYOUT_MAX] = { VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // TEXTURE_LAYOUT_COPY_DST_OPTIMAL VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL - VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR, // TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL + VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR, // TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL + VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT, // TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL }; static VkPipelineStageFlags _rd_to_vk_pipeline_stages(BitField p_stages) { @@ -504,6 +505,8 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() { _register_requested_device_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true); _register_requested_device_extension(VK_KHR_MULTIVIEW_EXTENSION_NAME, false); _register_requested_device_extension(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, false); + _register_requested_device_extension(VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME, false); + _register_requested_device_extension(VK_QCOM_FRAGMENT_DENSITY_MAP_OFFSET_EXTENSION_NAME, false); _register_requested_device_extension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, false); _register_requested_device_extension(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); _register_requested_device_extension(VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, false); @@ -514,6 +517,10 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() { _register_requested_device_extension(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME, false); + // We don't actually use this extension, but some runtime components on some platforms + // can and will fill the validation layers with useless info otherwise if not enabled. + _register_requested_device_extension(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, false); + if (Engine::get_singleton()->is_generate_spirv_debug_info_enabled()) { _register_requested_device_extension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, true); } @@ -730,7 +737,9 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { void *next_features = nullptr; VkPhysicalDeviceVulkan12Features device_features_vk_1_2 = {}; VkPhysicalDeviceShaderFloat16Int8FeaturesKHR shader_features = {}; - VkPhysicalDeviceFragmentShadingRateFeaturesKHR vrs_features = {}; + VkPhysicalDeviceFragmentShadingRateFeaturesKHR fsr_features = {}; + VkPhysicalDeviceFragmentDensityMapFeaturesEXT fdm_features = {}; + VkPhysicalDeviceFragmentDensityMapOffsetFeaturesQCOM fdmo_features_qcom = {}; VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = {}; VkPhysicalDeviceMultiviewFeatures multiview_features = {}; VkPhysicalDevicePipelineCreationCacheControlFeatures pipeline_cache_control_features = {}; @@ -747,9 +756,21 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { } if (enabled_device_extension_names.has(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { - vrs_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR; - vrs_features.pNext = next_features; - next_features = &vrs_features; + fsr_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR; + fsr_features.pNext = next_features; + next_features = &fsr_features; + } + + if (enabled_device_extension_names.has(VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME)) { + fdm_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_FEATURES_EXT; + fdm_features.pNext = next_features; + next_features = &fdm_features; + } + + if (enabled_device_extension_names.has(VK_QCOM_FRAGMENT_DENSITY_MAP_OFFSET_EXTENSION_NAME)) { + fdmo_features_qcom.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_OFFSET_FEATURES_QCOM; + fdmo_features_qcom.pNext = next_features; + next_features = &fdmo_features_qcom; } if (enabled_device_extension_names.has(VK_KHR_16BIT_STORAGE_EXTENSION_NAME)) { @@ -791,11 +812,25 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { } if (enabled_device_extension_names.has(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { - vrs_capabilities.pipeline_vrs_supported = vrs_features.pipelineFragmentShadingRate; - vrs_capabilities.primitive_vrs_supported = vrs_features.primitiveFragmentShadingRate; - vrs_capabilities.attachment_vrs_supported = vrs_features.attachmentFragmentShadingRate; + fsr_capabilities.pipeline_supported = fsr_features.pipelineFragmentShadingRate; + fsr_capabilities.primitive_supported = fsr_features.primitiveFragmentShadingRate; + fsr_capabilities.attachment_supported = fsr_features.attachmentFragmentShadingRate; + } + + if (enabled_device_extension_names.has(VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME)) { + fdm_capabilities.attachment_supported = fdm_features.fragmentDensityMap; + fdm_capabilities.dynamic_attachment_supported = fdm_features.fragmentDensityMapDynamic; + fdm_capabilities.non_subsampled_images_supported = fdm_features.fragmentDensityMapNonSubsampledImages; + } + + if (enabled_device_extension_names.has(VK_QCOM_FRAGMENT_DENSITY_MAP_OFFSET_EXTENSION_NAME)) { + fdm_capabilities.offset_supported = fdmo_features_qcom.fragmentDensityMapOffset; } + // Multiple VRS techniques can't co-exist during the existence of one device, so we must + // choose one at creation time and only report one of them as available. + _choose_vrs_capabilities(); + if (enabled_device_extension_names.has(VK_KHR_MULTIVIEW_EXTENSION_NAME)) { multiview_capabilities.is_supported = multiview_features.multiview; multiview_capabilities.geometry_shader_is_supported = multiview_features.multiviewGeometryShader; @@ -825,7 +860,9 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { if (functions.GetPhysicalDeviceProperties2 != nullptr) { void *next_properties = nullptr; - VkPhysicalDeviceFragmentShadingRatePropertiesKHR vrs_properties = {}; + VkPhysicalDeviceFragmentShadingRatePropertiesKHR fsr_properties = {}; + VkPhysicalDeviceFragmentDensityMapPropertiesEXT fdm_properties = {}; + VkPhysicalDeviceFragmentDensityMapOffsetPropertiesQCOM fdmo_properties = {}; VkPhysicalDeviceMultiviewProperties multiview_properties = {}; VkPhysicalDeviceSubgroupProperties subgroup_properties = {}; VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control_properties = {}; @@ -851,10 +888,22 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { next_properties = &multiview_properties; } - if (vrs_capabilities.attachment_vrs_supported) { - vrs_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; - vrs_properties.pNext = next_properties; - next_properties = &vrs_properties; + if (fsr_capabilities.attachment_supported) { + fsr_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; + fsr_properties.pNext = next_properties; + next_properties = &fsr_properties; + } + + if (fdm_capabilities.attachment_supported) { + fdm_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_PROPERTIES_EXT; + fdm_properties.pNext = next_properties; + next_properties = &fdm_properties; + } + + if (fdm_capabilities.offset_supported) { + fdmo_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_OFFSET_PROPERTIES_QCOM; + fdmo_properties.pNext = next_properties; + next_properties = &fdmo_properties; } physical_device_properties_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; @@ -877,31 +926,62 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { subgroup_capabilities.max_size = subgroup_size_control_properties.maxSubgroupSize; } - if (vrs_capabilities.pipeline_vrs_supported || vrs_capabilities.primitive_vrs_supported || vrs_capabilities.attachment_vrs_supported) { - print_verbose("- Vulkan Variable Rate Shading supported:"); - if (vrs_capabilities.pipeline_vrs_supported) { + if (fsr_capabilities.pipeline_supported || fsr_capabilities.primitive_supported || fsr_capabilities.attachment_supported) { + print_verbose("- Vulkan Fragment Shading Rate supported:"); + if (fsr_capabilities.pipeline_supported) { print_verbose(" Pipeline fragment shading rate"); } - if (vrs_capabilities.primitive_vrs_supported) { + if (fsr_capabilities.primitive_supported) { print_verbose(" Primitive fragment shading rate"); } - if (vrs_capabilities.attachment_vrs_supported) { + if (fsr_capabilities.attachment_supported) { // TODO: Expose these somehow to the end user. - vrs_capabilities.min_texel_size.x = vrs_properties.minFragmentShadingRateAttachmentTexelSize.width; - vrs_capabilities.min_texel_size.y = vrs_properties.minFragmentShadingRateAttachmentTexelSize.height; - vrs_capabilities.max_texel_size.x = vrs_properties.maxFragmentShadingRateAttachmentTexelSize.width; - vrs_capabilities.max_texel_size.y = vrs_properties.maxFragmentShadingRateAttachmentTexelSize.height; - vrs_capabilities.max_fragment_size.x = vrs_properties.maxFragmentSize.width; // either 4 or 8 - vrs_capabilities.max_fragment_size.y = vrs_properties.maxFragmentSize.height; // generally the same as width + fsr_capabilities.min_texel_size.x = fsr_properties.minFragmentShadingRateAttachmentTexelSize.width; + fsr_capabilities.min_texel_size.y = fsr_properties.minFragmentShadingRateAttachmentTexelSize.height; + fsr_capabilities.max_texel_size.x = fsr_properties.maxFragmentShadingRateAttachmentTexelSize.width; + fsr_capabilities.max_texel_size.y = fsr_properties.maxFragmentShadingRateAttachmentTexelSize.height; + fsr_capabilities.max_fragment_size.x = fsr_properties.maxFragmentSize.width; // either 4 or 8 + fsr_capabilities.max_fragment_size.y = fsr_properties.maxFragmentSize.height; // generally the same as width + + print_verbose(String(" Attachment fragment shading rate") + + String(", min texel size: (") + itos(fsr_capabilities.min_texel_size.x) + String(", ") + itos(fsr_capabilities.min_texel_size.y) + String(")") + + String(", max texel size: (") + itos(fsr_capabilities.max_texel_size.x) + String(", ") + itos(fsr_capabilities.max_texel_size.y) + String(")") + + String(", max fragment size: (") + itos(fsr_capabilities.max_fragment_size.x) + String(", ") + itos(fsr_capabilities.max_fragment_size.y) + String(")")); + } + + } else { + print_verbose("- Vulkan Variable Rate Shading not supported"); + } + + if (fdm_capabilities.attachment_supported || fdm_capabilities.dynamic_attachment_supported || fdm_capabilities.non_subsampled_images_supported) { + print_verbose("- Vulkan Fragment Density Map supported"); - // We'll attempt to default to a texel size of 16x16. - vrs_capabilities.texel_size = Vector2i(16, 16).clamp(vrs_capabilities.min_texel_size, vrs_capabilities.max_texel_size); + fdm_capabilities.min_texel_size.x = fdm_properties.minFragmentDensityTexelSize.width; + fdm_capabilities.min_texel_size.y = fdm_properties.minFragmentDensityTexelSize.height; + fdm_capabilities.max_texel_size.x = fdm_properties.maxFragmentDensityTexelSize.width; + fdm_capabilities.max_texel_size.y = fdm_properties.maxFragmentDensityTexelSize.height; + fdm_capabilities.invocations_supported = fdm_properties.fragmentDensityInvocations; - print_verbose(String(" Attachment fragment shading rate") + String(", min texel size: (") + itos(vrs_capabilities.min_texel_size.x) + String(", ") + itos(vrs_capabilities.min_texel_size.y) + String(")") + String(", max texel size: (") + itos(vrs_capabilities.max_texel_size.x) + String(", ") + itos(vrs_capabilities.max_texel_size.y) + String(")") + String(", max fragment size: (") + itos(vrs_capabilities.max_fragment_size.x) + String(", ") + itos(vrs_capabilities.max_fragment_size.y) + String(")")); + if (fdm_capabilities.dynamic_attachment_supported) { + print_verbose(" - dynamic fragment density map supported"); } + if (fdm_capabilities.non_subsampled_images_supported) { + print_verbose(" - non-subsampled images supported"); + } } else { - print_verbose("- Vulkan Variable Rate Shading not supported"); + print_verbose("- Vulkan Fragment Density Map not supported"); + } + + if (fdm_capabilities.offset_supported) { + print_verbose("- Vulkan Fragment Density Map Offset supported"); + + fdm_capabilities.offset_granularity.x = fdmo_properties.fragmentDensityOffsetGranularity.width; + fdm_capabilities.offset_granularity.y = fdmo_properties.fragmentDensityOffsetGranularity.height; + + print_verbose(vformat(" Offset granularity: (%d, %d)", fdm_capabilities.offset_granularity.x, fdm_capabilities.offset_granularity.y)); + } else { + print_verbose("- Vulkan Fragment Density Map Offset not supported"); } if (multiview_capabilities.is_supported) { @@ -929,6 +1009,22 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { return OK; } +void RenderingDeviceDriverVulkan::_choose_vrs_capabilities() { + bool prefer_fdm_on_qualcomm = physical_device_properties.vendorID == RenderingContextDriver::VENDOR_QUALCOMM; + if (fdm_capabilities.attachment_supported && (!fsr_capabilities.attachment_supported || prefer_fdm_on_qualcomm)) { + // If available, we prefer using fragment density maps on Qualcomm as they adjust tile distribution when using + // this technique. Performance as a result is higher than when using fragment shading rate. + fsr_capabilities = FragmentShadingRateCapabilities(); + } else if (fsr_capabilities.attachment_supported) { + // Disable any possibility of fragment density maps being used. + fdm_capabilities = FragmentDensityMapCapabilities(); + } else { + // Do not report or enable any VRS capabilities if attachment is not supported. + fsr_capabilities = FragmentShadingRateCapabilities(); + fdm_capabilities = FragmentDensityMapCapabilities(); + } +} + Error RenderingDeviceDriverVulkan::_add_queue_create_info(LocalVector &r_queue_create_info) { uint32_t queue_family_count = queue_family_properties.size(); queue_families.resize(queue_family_count); @@ -971,14 +1067,24 @@ Error RenderingDeviceDriverVulkan::_initialize_device(const LocalVector RenderingDeviceDriverVulkan::texture_get_usages_ if (!(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT)) { supported.clear_flag(TEXTURE_USAGE_STORAGE_ATOMIC_BIT); } - // Validation via VK_FORMAT_FEATURE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR fails if VRS attachment is not supported. - if (p_format != DATA_FORMAT_R8_UINT) { + if (p_format != DATA_FORMAT_R8_UINT && p_format != DATA_FORMAT_R8G8_UNORM) { supported.clear_flag(TEXTURE_USAGE_VRS_ATTACHMENT_BIT); } @@ -2195,6 +2310,8 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPE static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT, VK_PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT_EXT)); // RDD::BarrierAccessBits == VkAccessFlagBits. static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_ACCESS_INDIRECT_COMMAND_READ_BIT)); @@ -2213,6 +2330,7 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_HOST_WRITE_BIT, VK_ACCESS_H static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_MEMORY_READ_BIT, VK_ACCESS_MEMORY_READ_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_MEMORY_WRITE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT, VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_FRAGMENT_DENSITY_MAP_ATTACHMENT_READ_BIT, VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT)); void RenderingDeviceDriverVulkan::command_pipeline_barrier( CommandBufferID p_cmd_buffer, @@ -2278,8 +2396,9 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( } #endif + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; vkCmdPipelineBarrier( - (VkCommandBuffer)p_cmd_buffer.id, + command_buffer->vk_command_buffer, _rd_to_vk_pipeline_stages(p_src_stages), _rd_to_vk_pipeline_stages(p_dst_stages), 0, @@ -2478,7 +2597,8 @@ Error RenderingDeviceDriverVulkan::command_queue_execute_and_present(CommandQueu signal_semaphores.clear(); for (uint32_t i = 0; i < p_cmd_buffers.size(); i++) { - command_buffers.push_back(VkCommandBuffer(p_cmd_buffers[i].id)); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)(p_cmd_buffers[i].id); + command_buffers.push_back(command_buffer->vk_command_buffer); } for (uint32_t i = 0; i < p_cmd_semaphores.size(); i++) { @@ -2675,6 +2795,10 @@ void RenderingDeviceDriverVulkan::command_pool_free(CommandPoolID p_cmd_pool) { DEV_ASSERT(p_cmd_pool); CommandPool *command_pool = (CommandPool *)(p_cmd_pool.id); + for (CommandBufferInfo *command_buffer : command_pool->command_buffers_created) { + VersatileResource::free(resources_allocator, command_buffer); + } + vkDestroyCommandPool(vk_device, command_pool->vk_command_pool, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_COMMAND_POOL)); memdelete(command_pool); } @@ -2684,7 +2808,7 @@ void RenderingDeviceDriverVulkan::command_pool_free(CommandPoolID p_cmd_pool) { RDD::CommandBufferID RenderingDeviceDriverVulkan::command_buffer_create(CommandPoolID p_cmd_pool) { DEV_ASSERT(p_cmd_pool); - const CommandPool *command_pool = (const CommandPool *)(p_cmd_pool.id); + CommandPool *command_pool = (CommandPool *)(p_cmd_pool.id); VkCommandBufferAllocateInfo cmd_buf_info = {}; cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; cmd_buf_info.commandPool = command_pool->vk_command_pool; @@ -2696,21 +2820,26 @@ RDD::CommandBufferID RenderingDeviceDriverVulkan::command_buffer_create(CommandP cmd_buf_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; } - VkCommandBuffer vk_cmd_buffer = VK_NULL_HANDLE; - VkResult err = vkAllocateCommandBuffers(vk_device, &cmd_buf_info, &vk_cmd_buffer); + VkCommandBuffer vk_command_buffer = VK_NULL_HANDLE; + VkResult err = vkAllocateCommandBuffers(vk_device, &cmd_buf_info, &vk_command_buffer); ERR_FAIL_COND_V_MSG(err, CommandBufferID(), "vkAllocateCommandBuffers failed with error " + itos(err) + "."); - return CommandBufferID(vk_cmd_buffer); + CommandBufferInfo *command_buffer = VersatileResource::allocate(resources_allocator); + command_buffer->vk_command_buffer = vk_command_buffer; + command_pool->command_buffers_created.push_back(command_buffer); + return CommandBufferID(command_buffer); } bool RenderingDeviceDriverVulkan::command_buffer_begin(CommandBufferID p_cmd_buffer) { // Reset is implicit (VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT). + CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id); + VkCommandBufferBeginInfo cmd_buf_begin_info = {}; cmd_buf_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; cmd_buf_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - VkResult err = vkBeginCommandBuffer((VkCommandBuffer)p_cmd_buffer.id, &cmd_buf_begin_info); + VkResult err = vkBeginCommandBuffer(command_buffer->vk_command_buffer, &cmd_buf_begin_info); ERR_FAIL_COND_V_MSG(err, false, "vkBeginCommandBuffer failed with error " + itos(err) + "."); return true; @@ -2720,10 +2849,12 @@ bool RenderingDeviceDriverVulkan::command_buffer_begin_secondary(CommandBufferID // Reset is implicit (VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT). Framebuffer *framebuffer = (Framebuffer *)(p_framebuffer.id); + RenderPassInfo *render_pass = (RenderPassInfo *)(p_render_pass.id); + CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id); VkCommandBufferInheritanceInfo inheritance_info = {}; inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; - inheritance_info.renderPass = (VkRenderPass)p_render_pass.id; + inheritance_info.renderPass = render_pass->vk_render_pass; inheritance_info.subpass = p_subpass; inheritance_info.framebuffer = framebuffer->vk_framebuffer; @@ -2732,18 +2863,27 @@ bool RenderingDeviceDriverVulkan::command_buffer_begin_secondary(CommandBufferID cmd_buf_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT | VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; cmd_buf_begin_info.pInheritanceInfo = &inheritance_info; - VkResult err = vkBeginCommandBuffer((VkCommandBuffer)p_cmd_buffer.id, &cmd_buf_begin_info); + VkResult err = vkBeginCommandBuffer(command_buffer->vk_command_buffer, &cmd_buf_begin_info); ERR_FAIL_COND_V_MSG(err, false, "vkBeginCommandBuffer failed with error " + itos(err) + "."); return true; } void RenderingDeviceDriverVulkan::command_buffer_end(CommandBufferID p_cmd_buffer) { - vkEndCommandBuffer((VkCommandBuffer)p_cmd_buffer.id); + CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id); + vkEndCommandBuffer(command_buffer->vk_command_buffer); } void RenderingDeviceDriverVulkan::command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView p_secondary_cmd_buffers) { - vkCmdExecuteCommands((VkCommandBuffer)p_cmd_buffer.id, p_secondary_cmd_buffers.size(), (const VkCommandBuffer *)p_secondary_cmd_buffers.ptr()); + thread_local LocalVector secondary_command_buffers; + CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id); + secondary_command_buffers.resize(p_secondary_cmd_buffers.size()); + for (uint32_t i = 0; i < p_secondary_cmd_buffers.size(); i++) { + CommandBufferInfo *secondary_command_buffer = (CommandBufferInfo *)(p_secondary_cmd_buffers[i].id); + secondary_command_buffers[i] = secondary_command_buffer->vk_command_buffer; + } + + vkCmdExecuteCommands(command_buffer->vk_command_buffer, p_secondary_cmd_buffers.size(), secondary_command_buffers.ptr()); } /********************/ @@ -2855,15 +2995,18 @@ RenderingDeviceDriver::SwapChainID RenderingDeviceDriverVulkan::swap_chain_creat pass_info.subpassCount = 1; pass_info.pSubpasses = &subpass; - VkRenderPass render_pass = VK_NULL_HANDLE; - err = _create_render_pass(vk_device, &pass_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS), &render_pass); + VkRenderPass vk_render_pass = VK_NULL_HANDLE; + err = _create_render_pass(vk_device, &pass_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS), &vk_render_pass); ERR_FAIL_COND_V(err != VK_SUCCESS, SwapChainID()); + RenderPassInfo *render_pass_info = VersatileResource::allocate(resources_allocator); + render_pass_info->vk_render_pass = vk_render_pass; + SwapChain *swap_chain = memnew(SwapChain); swap_chain->surface = p_surface; swap_chain->format = format; swap_chain->color_space = color_space; - swap_chain->render_pass = RenderPassID(render_pass); + swap_chain->render_pass = RenderPassID(render_pass_info); return SwapChainID(swap_chain); } @@ -3095,9 +3238,10 @@ Error RenderingDeviceDriverVulkan::swap_chain_resize(CommandQueueID p_cmd_queue, swap_chain->framebuffers.reserve(image_count); + const RenderPassInfo *render_pass = (const RenderPassInfo *)(swap_chain->render_pass.id); VkFramebufferCreateInfo fb_create_info = {}; fb_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - fb_create_info.renderPass = VkRenderPass(swap_chain->render_pass.id); + fb_create_info.renderPass = render_pass->vk_render_pass; fb_create_info.attachmentCount = 1; fb_create_info.width = surface->width; fb_create_info.height = surface->height; @@ -3238,8 +3382,8 @@ void RenderingDeviceDriverVulkan::swap_chain_free(SwapChainID p_swap_chain) { SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id); _swap_chain_release(swap_chain); - if (swap_chain->render_pass.id != 0) { - vkDestroyRenderPass(vk_device, VkRenderPass(swap_chain->render_pass.id), VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS)); + if (swap_chain->render_pass) { + render_pass_free(swap_chain->render_pass); } memdelete(swap_chain); @@ -3250,14 +3394,23 @@ void RenderingDeviceDriverVulkan::swap_chain_free(SwapChainID p_swap_chain) { /*********************/ RDD::FramebufferID RenderingDeviceDriverVulkan::framebuffer_create(RenderPassID p_render_pass, VectorView p_attachments, uint32_t p_width, uint32_t p_height) { + RenderPassInfo *render_pass = (RenderPassInfo *)(p_render_pass.id); + + uint32_t fragment_density_map_offsets_layers = 0; VkImageView *vk_img_views = ALLOCA_ARRAY(VkImageView, p_attachments.size()); for (uint32_t i = 0; i < p_attachments.size(); i++) { - vk_img_views[i] = ((const TextureInfo *)p_attachments[i].id)->vk_view; + const TextureInfo *texture = (const TextureInfo *)p_attachments[i].id; + vk_img_views[i] = texture->vk_view; + + if (render_pass->uses_fragment_density_map_offsets && (texture->vk_create_info.usage & VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT)) { + // If the render pass uses the FDM and the usage fits, we store the amount of layers to use it later on the render pass's end. + fragment_density_map_offsets_layers = texture->vk_create_info.arrayLayers; + } } VkFramebufferCreateInfo framebuffer_create_info = {}; framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - framebuffer_create_info.renderPass = (VkRenderPass)p_render_pass.id; + framebuffer_create_info.renderPass = render_pass->vk_render_pass; framebuffer_create_info.attachmentCount = p_attachments.size(); framebuffer_create_info.pAttachments = vk_img_views; framebuffer_create_info.width = p_width; @@ -3278,6 +3431,7 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::framebuffer_create(RenderPassID Framebuffer *framebuffer = memnew(Framebuffer); framebuffer->vk_framebuffer = vk_framebuffer; + framebuffer->fragment_density_map_offsets_layers = fragment_density_map_offsets_layers; return FramebufferID(framebuffer); } @@ -4150,14 +4304,16 @@ static void _texture_copy_region_to_vk(const RDD::TextureCopyRegion &p_copy_regi } void RenderingDeviceDriverVulkan::command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id; - vkCmdFillBuffer((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, p_offset, p_size, 0); + vkCmdFillBuffer(command_buffer->vk_command_buffer, buf_info->vk_buffer, p_offset, p_size, 0); } void RenderingDeviceDriverVulkan::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView p_regions) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *src_buf_info = (const BufferInfo *)p_src_buffer.id; const BufferInfo *dst_buf_info = (const BufferInfo *)p_dst_buffer.id; - vkCmdCopyBuffer((VkCommandBuffer)p_cmd_buffer.id, src_buf_info->vk_buffer, dst_buf_info->vk_buffer, p_regions.size(), (const VkBufferCopy *)p_regions.ptr()); + vkCmdCopyBuffer(command_buffer->vk_command_buffer, src_buf_info->vk_buffer, dst_buf_info->vk_buffer, p_regions.size(), (const VkBufferCopy *)p_regions.ptr()); } void RenderingDeviceDriverVulkan::command_copy_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView p_regions) { @@ -4166,12 +4322,14 @@ void RenderingDeviceDriverVulkan::command_copy_texture(CommandBufferID p_cmd_buf _texture_copy_region_to_vk(p_regions[i], &vk_copy_regions[i]); } + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const TextureInfo *src_tex_info = (const TextureInfo *)p_src_texture.id; const TextureInfo *dst_tex_info = (const TextureInfo *)p_dst_texture.id; - vkCmdCopyImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions); + vkCmdCopyImage(command_buffer->vk_command_buffer, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions); } void RenderingDeviceDriverVulkan::command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const TextureInfo *src_tex_info = (const TextureInfo *)p_src_texture.id; const TextureInfo *dst_tex_info = (const TextureInfo *)p_dst_texture.id; @@ -4188,7 +4346,7 @@ void RenderingDeviceDriverVulkan::command_resolve_texture(CommandBufferID p_cmd_ vk_resolve.extent.height = MAX(1u, src_tex_info->vk_create_info.extent.height >> p_src_mipmap); vk_resolve.extent.depth = MAX(1u, src_tex_info->vk_create_info.extent.depth >> p_src_mipmap); - vkCmdResolveImage((VkCommandBuffer)p_cmd_buffer.id, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], 1, &vk_resolve); + vkCmdResolveImage(command_buffer->vk_command_buffer, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], 1, &vk_resolve); } void RenderingDeviceDriverVulkan::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) { @@ -4198,8 +4356,9 @@ void RenderingDeviceDriverVulkan::command_clear_color_texture(CommandBufferID p_ VkImageSubresourceRange vk_subresources = {}; _texture_subresource_range_to_vk(p_subresources, &vk_subresources); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const TextureInfo *tex_info = (const TextureInfo *)p_texture.id; - vkCmdClearColorImage((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_texture_layout], &vk_color, 1, &vk_subresources); + vkCmdClearColorImage(command_buffer->vk_command_buffer, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_texture_layout], &vk_color, 1, &vk_subresources); } void RenderingDeviceDriverVulkan::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView p_regions) { @@ -4208,9 +4367,10 @@ void RenderingDeviceDriverVulkan::command_copy_buffer_to_texture(CommandBufferID _buffer_texture_copy_region_to_vk(p_regions[i], &vk_copy_regions[i]); } + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *buf_info = (const BufferInfo *)p_src_buffer.id; const TextureInfo *tex_info = (const TextureInfo *)p_dst_texture.id; - vkCmdCopyBufferToImage((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions); + vkCmdCopyBufferToImage(command_buffer->vk_command_buffer, buf_info->vk_buffer, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions); } void RenderingDeviceDriverVulkan::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView p_regions) { @@ -4219,9 +4379,10 @@ void RenderingDeviceDriverVulkan::command_copy_texture_to_buffer(CommandBufferID _buffer_texture_copy_region_to_vk(p_regions[i], &vk_copy_regions[i]); } + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const TextureInfo *tex_info = (const TextureInfo *)p_src_texture.id; const BufferInfo *buf_info = (const BufferInfo *)p_dst_buffer.id; - vkCmdCopyImageToBuffer((VkCommandBuffer)p_cmd_buffer.id, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], buf_info->vk_buffer, p_regions.size(), vk_copy_regions); + vkCmdCopyImageToBuffer(command_buffer->vk_command_buffer, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], buf_info->vk_buffer, p_regions.size(), vk_copy_regions); } /******************/ @@ -4235,8 +4396,9 @@ void RenderingDeviceDriverVulkan::pipeline_free(PipelineID p_pipeline) { // ----- BINDING ----- void RenderingDeviceDriverVulkan::command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_dst_first_index, VectorView p_data) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id; - vkCmdPushConstants((VkCommandBuffer)p_cmd_buffer.id, shader_info->vk_pipeline_layout, shader_info->vk_push_constant_stages, p_dst_first_index * sizeof(uint32_t), p_data.size() * sizeof(uint32_t), p_data.ptr()); + vkCmdPushConstants(command_buffer->vk_command_buffer, shader_info->vk_pipeline_layout, shader_info->vk_push_constant_stages, p_dst_first_index * sizeof(uint32_t), p_data.size() * sizeof(uint32_t), p_data.ptr()); } // ----- CACHE ----- @@ -4367,7 +4529,7 @@ static void _attachment_reference_to_vk(const RDD::AttachmentReference &p_attach r_vk_attachment_reference->aspectMask = (VkImageAspectFlags)p_attachment_reference.aspect; } -RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count) { +RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) { // These are only used if we use multiview but we need to define them in scope. const uint32_t view_mask = (1 << p_view_count) - 1; const uint32_t correlation_mask = (1 << p_view_count) - 1; @@ -4422,22 +4584,22 @@ RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorViewsType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR; - vk_subpass_vrs_attachment->attachment = p_subpasses[i].vrs_reference.attachment; - vk_subpass_vrs_attachment->layout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR; - - VkFragmentShadingRateAttachmentInfoKHR *vk_vrs_info = ALLOCA_SINGLE(VkFragmentShadingRateAttachmentInfoKHR); - *vk_vrs_info = {}; - vk_vrs_info->sType = VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR; - vk_vrs_info->pFragmentShadingRateAttachment = vk_subpass_vrs_attachment; - vk_vrs_info->shadingRateAttachmentTexelSize.width = vrs_capabilities.texel_size.x; - vk_vrs_info->shadingRateAttachmentTexelSize.height = vrs_capabilities.texel_size.y; - - vk_subpasses[i].pNext = vk_vrs_info; + // Fragment shading rate. + if (fsr_capabilities.attachment_supported && p_subpasses[i].fragment_shading_rate_reference.attachment != AttachmentReference::UNUSED) { + VkAttachmentReference2KHR *vk_subpass_fsr_attachment = ALLOCA_SINGLE(VkAttachmentReference2KHR); + *vk_subpass_fsr_attachment = {}; + vk_subpass_fsr_attachment->sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR; + vk_subpass_fsr_attachment->attachment = p_subpasses[i].fragment_shading_rate_reference.attachment; + vk_subpass_fsr_attachment->layout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR; + + VkFragmentShadingRateAttachmentInfoKHR *vk_fsr_info = ALLOCA_SINGLE(VkFragmentShadingRateAttachmentInfoKHR); + *vk_fsr_info = {}; + vk_fsr_info->sType = VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR; + vk_fsr_info->pFragmentShadingRateAttachment = vk_subpass_fsr_attachment; + vk_fsr_info->shadingRateAttachmentTexelSize.width = p_subpasses[i].fragment_shading_rate_texel_size.x; + vk_fsr_info->shadingRateAttachmentTexelSize.height = p_subpasses[i].fragment_shading_rate_texel_size.y; + + vk_subpasses[i].pNext = vk_fsr_info; } } @@ -4486,15 +4648,30 @@ RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorViewsType = VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT; + vk_fdm_info->fragmentDensityMapAttachment.attachment = p_fragment_density_map_attachment.attachment; + vk_fdm_info->fragmentDensityMapAttachment.layout = RD_TO_VK_LAYOUT[p_fragment_density_map_attachment.layout]; + vk_fdm_info->pNext = create_info.pNext; + create_info.pNext = vk_fdm_info; + } + VkRenderPass vk_render_pass = VK_NULL_HANDLE; VkResult res = _create_render_pass(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS), &vk_render_pass); ERR_FAIL_COND_V_MSG(res, RenderPassID(), "vkCreateRenderPass2KHR failed with error " + itos(res) + "."); - return RenderPassID(vk_render_pass); + RenderPassInfo *render_pass = VersatileResource::allocate(resources_allocator); + render_pass->vk_render_pass = vk_render_pass; + return RenderPassID(render_pass); } void RenderingDeviceDriverVulkan::render_pass_free(RenderPassID p_render_pass) { - vkDestroyRenderPass(vk_device, (VkRenderPass)p_render_pass.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS)); + RenderPassInfo *render_pass = (RenderPassInfo *)(p_render_pass.id); + vkDestroyRenderPass(vk_device, render_pass->vk_render_pass, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS)); + VersatileResource::free(resources_allocator, render_pass); } // ----- COMMANDS ----- @@ -4502,7 +4679,10 @@ void RenderingDeviceDriverVulkan::render_pass_free(RenderPassID p_render_pass) { static_assert(ARRAYS_COMPATIBLE_FIELDWISE(RDD::RenderPassClearValue, VkClearValue)); void RenderingDeviceDriverVulkan::command_begin_render_pass(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, FramebufferID p_framebuffer, CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView p_clear_values) { + CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id); + RenderPassInfo *render_pass = (RenderPassInfo *)(p_render_pass.id); Framebuffer *framebuffer = (Framebuffer *)(p_framebuffer.id); + if (framebuffer->swap_chain_acquired) { // Insert a barrier to wait for the acquisition of the framebuffer before the render pass begins. VkImageMemoryBarrier image_barrier = {}; @@ -4513,13 +4693,13 @@ void RenderingDeviceDriverVulkan::command_begin_render_pass(CommandBufferID p_cm image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_barrier.image = framebuffer->swap_chain_image; image_barrier.subresourceRange = framebuffer->swap_chain_image_subresource_range; - vkCmdPipelineBarrier((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_barrier); + vkCmdPipelineBarrier(command_buffer->vk_command_buffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_barrier); framebuffer->swap_chain_acquired = false; } VkRenderPassBeginInfo render_pass_begin = {}; render_pass_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - render_pass_begin.renderPass = (VkRenderPass)p_render_pass.id; + render_pass_begin.renderPass = render_pass->vk_render_pass; render_pass_begin.framebuffer = framebuffer->vk_framebuffer; render_pass_begin.renderArea.offset.x = p_rect.position.x; @@ -4531,7 +4711,10 @@ void RenderingDeviceDriverVulkan::command_begin_render_pass(CommandBufferID p_cm render_pass_begin.pClearValues = (const VkClearValue *)p_clear_values.ptr(); VkSubpassContents vk_subpass_contents = p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY ? VK_SUBPASS_CONTENTS_INLINE : VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS; - vkCmdBeginRenderPass((VkCommandBuffer)p_cmd_buffer.id, &render_pass_begin, vk_subpass_contents); + vkCmdBeginRenderPass(command_buffer->vk_command_buffer, &render_pass_begin, vk_subpass_contents); + + command_buffer->active_framebuffer = framebuffer; + command_buffer->active_render_pass = render_pass; #if PRINT_NATIVE_COMMANDS print_line(vformat("vkCmdBeginRenderPass Pass 0x%uX Framebuffer 0x%uX", p_render_pass.id, p_framebuffer.id)); @@ -4539,7 +4722,34 @@ void RenderingDeviceDriverVulkan::command_begin_render_pass(CommandBufferID p_cm } void RenderingDeviceDriverVulkan::command_end_render_pass(CommandBufferID p_cmd_buffer) { - vkCmdEndRenderPass((VkCommandBuffer)p_cmd_buffer.id); + CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id); + DEV_ASSERT(command_buffer->active_framebuffer != nullptr && "A framebuffer must be active."); + DEV_ASSERT(command_buffer->active_render_pass != nullptr && "A render pass must be active."); + + if (device_functions.EndRenderPass2KHR != nullptr && fdm_capabilities.offset_supported && command_buffer->active_render_pass->uses_fragment_density_map_offsets) { + // FIXME: This is the framework for using the offsets extension if necessary at some point. Passing the actual offset + // values has not been implemented yet and this branch is currently unused. + thread_local LocalVector fragment_density_offsets; + while (fragment_density_offsets.size() < command_buffer->active_framebuffer->fragment_density_map_offsets_layers) { + fragment_density_offsets.push_back({ 0, 0 }); + } + + VkSubpassFragmentDensityMapOffsetEndInfoQCOM offset_info = {}; + offset_info.sType = VK_STRUCTURE_TYPE_SUBPASS_FRAGMENT_DENSITY_MAP_OFFSET_END_INFO_QCOM; + offset_info.pFragmentDensityOffsets = fragment_density_offsets.ptr(); + offset_info.fragmentDensityOffsetCount = fragment_density_offsets.size(); + + VkSubpassEndInfo subpass_end_info = {}; + subpass_end_info.sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO; + subpass_end_info.pNext = &offset_info; + + device_functions.EndRenderPass2KHR(command_buffer->vk_command_buffer, &subpass_end_info); + } else { + vkCmdEndRenderPass(command_buffer->vk_command_buffer); + } + + command_buffer->active_render_pass = nullptr; + command_buffer->active_framebuffer = nullptr; #if PRINT_NATIVE_COMMANDS print_line("vkCmdEndRenderPass"); @@ -4547,11 +4757,13 @@ void RenderingDeviceDriverVulkan::command_end_render_pass(CommandBufferID p_cmd_ } void RenderingDeviceDriverVulkan::command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; VkSubpassContents vk_subpass_contents = p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY ? VK_SUBPASS_CONTENTS_INLINE : VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS; - vkCmdNextSubpass((VkCommandBuffer)p_cmd_buffer.id, vk_subpass_contents); + vkCmdNextSubpass(command_buffer->vk_command_buffer, vk_subpass_contents); } void RenderingDeviceDriverVulkan::command_render_set_viewport(CommandBufferID p_cmd_buffer, VectorView p_viewports) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; VkViewport *vk_viewports = ALLOCA_ARRAY(VkViewport, p_viewports.size()); for (uint32_t i = 0; i < p_viewports.size(); i++) { vk_viewports[i] = {}; @@ -4562,14 +4774,17 @@ void RenderingDeviceDriverVulkan::command_render_set_viewport(CommandBufferID p_ vk_viewports[i].minDepth = 0.0f; vk_viewports[i].maxDepth = 1.0f; } - vkCmdSetViewport((VkCommandBuffer)p_cmd_buffer.id, 0, p_viewports.size(), vk_viewports); + vkCmdSetViewport(command_buffer->vk_command_buffer, 0, p_viewports.size(), vk_viewports); } void RenderingDeviceDriverVulkan::command_render_set_scissor(CommandBufferID p_cmd_buffer, VectorView p_scissors) { - vkCmdSetScissor((VkCommandBuffer)p_cmd_buffer.id, 0, p_scissors.size(), (VkRect2D *)p_scissors.ptr()); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdSetScissor(command_buffer->vk_command_buffer, 0, p_scissors.size(), (VkRect2D *)p_scissors.ptr()); } void RenderingDeviceDriverVulkan::command_render_clear_attachments(CommandBufferID p_cmd_buffer, VectorView p_attachment_clears, VectorView p_rects) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + VkClearAttachment *vk_clears = ALLOCA_ARRAY(VkClearAttachment, p_attachment_clears.size()); for (uint32_t i = 0; i < p_attachment_clears.size(); i++) { vk_clears[i] = {}; @@ -4589,68 +4804,81 @@ void RenderingDeviceDriverVulkan::command_render_clear_attachments(CommandBuffer vk_rects[i].layerCount = 1; } - vkCmdClearAttachments((VkCommandBuffer)p_cmd_buffer.id, p_attachment_clears.size(), vk_clears, p_rects.size(), vk_rects); + vkCmdClearAttachments(command_buffer->vk_command_buffer, p_attachment_clears.size(), vk_clears, p_rects.size(), vk_rects); } void RenderingDeviceDriverVulkan::command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) { - vkCmdBindPipeline((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_BIND_POINT_GRAPHICS, (VkPipeline)p_pipeline.id); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdBindPipeline(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, (VkPipeline)p_pipeline.id); } void RenderingDeviceDriverVulkan::command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id; const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_set.id; - vkCmdBindDescriptorSets((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_BIND_POINT_GRAPHICS, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr); + vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr); } void RenderingDeviceDriverVulkan::command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) { - vkCmdDraw((VkCommandBuffer)p_cmd_buffer.id, p_vertex_count, p_instance_count, p_base_vertex, p_first_instance); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdDraw(command_buffer->vk_command_buffer, p_vertex_count, p_instance_count, p_base_vertex, p_first_instance); } void RenderingDeviceDriverVulkan::command_render_draw_indexed(CommandBufferID p_cmd_buffer, uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index, int32_t p_vertex_offset, uint32_t p_first_instance) { - vkCmdDrawIndexed((VkCommandBuffer)p_cmd_buffer.id, p_index_count, p_instance_count, p_first_index, p_vertex_offset, p_first_instance); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdDrawIndexed(command_buffer->vk_command_buffer, p_index_count, p_instance_count, p_first_index, p_vertex_offset, p_first_instance); } void RenderingDeviceDriverVulkan::command_render_draw_indexed_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *buf_info = (const BufferInfo *)p_indirect_buffer.id; - vkCmdDrawIndexedIndirect((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, p_offset, p_draw_count, p_stride); + vkCmdDrawIndexedIndirect(command_buffer->vk_command_buffer, buf_info->vk_buffer, p_offset, p_draw_count, p_stride); } void RenderingDeviceDriverVulkan::command_render_draw_indexed_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *indirect_buf_info = (const BufferInfo *)p_indirect_buffer.id; const BufferInfo *count_buf_info = (const BufferInfo *)p_count_buffer.id; - vkCmdDrawIndexedIndirectCount((VkCommandBuffer)p_cmd_buffer.id, indirect_buf_info->vk_buffer, p_offset, count_buf_info->vk_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); + vkCmdDrawIndexedIndirectCount(command_buffer->vk_command_buffer, indirect_buf_info->vk_buffer, p_offset, count_buf_info->vk_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); } void RenderingDeviceDriverVulkan::command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *buf_info = (const BufferInfo *)p_indirect_buffer.id; - vkCmdDrawIndirect((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, p_offset, p_draw_count, p_stride); + vkCmdDrawIndirect(command_buffer->vk_command_buffer, buf_info->vk_buffer, p_offset, p_draw_count, p_stride); } void RenderingDeviceDriverVulkan::command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *indirect_buf_info = (const BufferInfo *)p_indirect_buffer.id; const BufferInfo *count_buf_info = (const BufferInfo *)p_count_buffer.id; - vkCmdDrawIndirectCount((VkCommandBuffer)p_cmd_buffer.id, indirect_buf_info->vk_buffer, p_offset, count_buf_info->vk_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); + vkCmdDrawIndirectCount(command_buffer->vk_command_buffer, indirect_buf_info->vk_buffer, p_offset, count_buf_info->vk_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); } void RenderingDeviceDriverVulkan::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + VkBuffer *vk_buffers = ALLOCA_ARRAY(VkBuffer, p_binding_count); for (uint32_t i = 0; i < p_binding_count; i++) { vk_buffers[i] = ((const BufferInfo *)p_buffers[i].id)->vk_buffer; } - vkCmdBindVertexBuffers((VkCommandBuffer)p_cmd_buffer.id, 0, p_binding_count, vk_buffers, p_offsets); + vkCmdBindVertexBuffers(command_buffer->vk_command_buffer, 0, p_binding_count, vk_buffers, p_offsets); } void RenderingDeviceDriverVulkan::command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id; - vkCmdBindIndexBuffer((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, p_offset, p_format == INDEX_BUFFER_FORMAT_UINT16 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32); + vkCmdBindIndexBuffer(command_buffer->vk_command_buffer, buf_info->vk_buffer, p_offset, p_format == INDEX_BUFFER_FORMAT_UINT16 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32); } void RenderingDeviceDriverVulkan::command_render_set_blend_constants(CommandBufferID p_cmd_buffer, const Color &p_constants) { - vkCmdSetBlendConstants((VkCommandBuffer)p_cmd_buffer.id, p_constants.components); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdSetBlendConstants(command_buffer->vk_command_buffer, p_constants.components); } void RenderingDeviceDriverVulkan::command_render_set_line_width(CommandBufferID p_cmd_buffer, float p_width) { - vkCmdSetLineWidth((VkCommandBuffer)p_cmd_buffer.id, p_width); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdSetLineWidth(command_buffer->vk_command_buffer, p_width); } // ----- PIPELINE ----- @@ -4922,23 +5150,22 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create( dynamic_state_create_info.dynamicStateCount = vk_dynamic_states_count; dynamic_state_create_info.pDynamicStates = vk_dynamic_states; - // VRS. - void *graphics_pipeline_nextptr = nullptr; - if (vrs_capabilities.attachment_vrs_supported) { - // If VRS is used, this defines how the different VRS types are combined. - // combinerOps[0] decides how we use the output of pipeline and primitive (drawcall) VRS. - // combinerOps[1] decides how we use the output of combinerOps[0] and our attachment VRS. + if (fsr_capabilities.attachment_supported) { + // Fragment shading rate. + // If FSR is used, this defines how the different FSR types are combined. + // combinerOps[0] decides how we use the output of pipeline and primitive (drawcall) FSR. + // combinerOps[1] decides how we use the output of combinerOps[0] and our attachment FSR. - VkPipelineFragmentShadingRateStateCreateInfoKHR *vrs_create_info = ALLOCA_SINGLE(VkPipelineFragmentShadingRateStateCreateInfoKHR); - *vrs_create_info = {}; - vrs_create_info->sType = VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR; - vrs_create_info->fragmentSize = { 4, 4 }; - vrs_create_info->combinerOps[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR; // We don't use pipeline/primitive VRS so this really doesn't matter. - vrs_create_info->combinerOps[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR; // Always use the outcome of attachment VRS if enabled. + VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_create_info = ALLOCA_SINGLE(VkPipelineFragmentShadingRateStateCreateInfoKHR); + *fsr_create_info = {}; + fsr_create_info->sType = VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR; + fsr_create_info->fragmentSize = { 4, 4 }; + fsr_create_info->combinerOps[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR; // We don't use pipeline/primitive FSR so this really doesn't matter. + fsr_create_info->combinerOps[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR; // Always use the outcome of attachment FSR if enabled. - graphics_pipeline_nextptr = vrs_create_info; + graphics_pipeline_nextptr = fsr_create_info; } // Finally, pipeline create info. @@ -4978,6 +5205,7 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create( } } + const RenderPassInfo *render_pass = (const RenderPassInfo *)(p_render_pass.id); pipeline_create_info.pStages = vk_pipeline_stages; pipeline_create_info.pVertexInputState = vertex_input_state_create_info; pipeline_create_info.pInputAssemblyState = &input_assembly_create_info; @@ -4989,7 +5217,7 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create( pipeline_create_info.pColorBlendState = &color_blend_state_create_info; pipeline_create_info.pDynamicState = &dynamic_state_create_info; pipeline_create_info.layout = shader_info->vk_pipeline_layout; - pipeline_create_info.renderPass = (VkRenderPass)p_render_pass.id; + pipeline_create_info.renderPass = render_pass->vk_render_pass; pipeline_create_info.subpass = p_render_subpass; // --- @@ -5008,22 +5236,26 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create( // ----- COMMANDS ----- void RenderingDeviceDriverVulkan::command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) { - vkCmdBindPipeline((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_BIND_POINT_COMPUTE, (VkPipeline)p_pipeline.id); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdBindPipeline(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, (VkPipeline)p_pipeline.id); } void RenderingDeviceDriverVulkan::command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id; const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_set.id; - vkCmdBindDescriptorSets((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_BIND_POINT_COMPUTE, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr); + vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr); } void RenderingDeviceDriverVulkan::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { - vkCmdDispatch((VkCommandBuffer)p_cmd_buffer.id, p_x_groups, p_y_groups, p_z_groups); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdDispatch(command_buffer->vk_command_buffer, p_x_groups, p_y_groups, p_z_groups); } void RenderingDeviceDriverVulkan::command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *buf_info = (const BufferInfo *)p_indirect_buffer.id; - vkCmdDispatchIndirect((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, p_offset); + vkCmdDispatchIndirect(command_buffer->vk_command_buffer, buf_info->vk_buffer, p_offset); } // ----- PIPELINE ----- @@ -5122,11 +5354,13 @@ uint64_t RenderingDeviceDriverVulkan::timestamp_query_result_to_time(uint64_t p_ } void RenderingDeviceDriverVulkan::command_timestamp_query_pool_reset(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_query_count) { - vkCmdResetQueryPool((VkCommandBuffer)p_cmd_buffer.id, (VkQueryPool)p_pool_id.id, 0, p_query_count); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdResetQueryPool(command_buffer->vk_command_buffer, (VkQueryPool)p_pool_id.id, 0, p_query_count); } void RenderingDeviceDriverVulkan::command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) { - vkCmdWriteTimestamp((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, (VkQueryPool)p_pool_id.id, p_index); + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + vkCmdWriteTimestamp(command_buffer->vk_command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, (VkQueryPool)p_pool_id.id, p_index); } /****************/ @@ -5134,6 +5368,7 @@ void RenderingDeviceDriverVulkan::command_timestamp_write(CommandBufferID p_cmd_ /****************/ void RenderingDeviceDriverVulkan::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get(); if (!functions.CmdBeginDebugUtilsLabelEXT) { if (functions.CmdDebugMarkerBeginEXT) { @@ -5146,7 +5381,7 @@ void RenderingDeviceDriverVulkan::command_begin_label(CommandBufferID p_cmd_buff marker.color[1] = p_color[1]; marker.color[2] = p_color[2]; marker.color[3] = p_color[3]; - functions.CmdDebugMarkerBeginEXT((VkCommandBuffer)p_cmd_buffer.id, &marker); + functions.CmdDebugMarkerBeginEXT(command_buffer->vk_command_buffer, &marker); } return; } @@ -5158,19 +5393,20 @@ void RenderingDeviceDriverVulkan::command_begin_label(CommandBufferID p_cmd_buff label.color[1] = p_color[1]; label.color[2] = p_color[2]; label.color[3] = p_color[3]; - functions.CmdBeginDebugUtilsLabelEXT((VkCommandBuffer)p_cmd_buffer.id, &label); + functions.CmdBeginDebugUtilsLabelEXT(command_buffer->vk_command_buffer, &label); } void RenderingDeviceDriverVulkan::command_end_label(CommandBufferID p_cmd_buffer) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get(); if (!functions.CmdEndDebugUtilsLabelEXT) { if (functions.CmdDebugMarkerEndEXT) { // Debug marker extensions. - functions.CmdDebugMarkerEndEXT((VkCommandBuffer)p_cmd_buffer.id); + functions.CmdDebugMarkerEndEXT(command_buffer->vk_command_buffer); } return; } - functions.CmdEndDebugUtilsLabelEXT((VkCommandBuffer)p_cmd_buffer.id); + functions.CmdEndDebugUtilsLabelEXT(command_buffer->vk_command_buffer); } /****************/ @@ -5182,6 +5418,7 @@ void RenderingDeviceDriverVulkan::command_insert_breadcrumb(CommandBufferID p_cm return; } + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; if (Engine::get_singleton()->is_accurate_breadcrumbs_enabled()) { // Force a full barrier so commands are not executed in parallel. // This will mean that the last breadcrumb to see was actually the @@ -5221,7 +5458,7 @@ void RenderingDeviceDriverVulkan::command_insert_breadcrumb(CommandBufferID p_cm VK_ACCESS_HOST_WRITE_BIT; vkCmdPipelineBarrier( - (VkCommandBuffer)p_cmd_buffer.id, + command_buffer->vk_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1u, &memoryBarrier, 0u, nullptr, 0u, nullptr); @@ -5229,8 +5466,8 @@ void RenderingDeviceDriverVulkan::command_insert_breadcrumb(CommandBufferID p_cm // We write to a circular buffer. If you're getting barrier sync errors here, // increase the value of BREADCRUMB_BUFFER_ENTRIES. - vkCmdFillBuffer((VkCommandBuffer)p_cmd_buffer.id, ((BufferInfo *)breadcrumb_buffer.id)->vk_buffer, breadcrumb_offset, sizeof(uint32_t), breadcrumb_id++); - vkCmdFillBuffer((VkCommandBuffer)p_cmd_buffer.id, ((BufferInfo *)breadcrumb_buffer.id)->vk_buffer, breadcrumb_offset + sizeof(uint32_t), sizeof(uint32_t), p_data); + vkCmdFillBuffer(command_buffer->vk_command_buffer, ((BufferInfo *)breadcrumb_buffer.id)->vk_buffer, breadcrumb_offset, sizeof(uint32_t), breadcrumb_id++); + vkCmdFillBuffer(command_buffer->vk_command_buffer, ((BufferInfo *)breadcrumb_buffer.id)->vk_buffer, breadcrumb_offset + sizeof(uint32_t), sizeof(uint32_t), p_data); breadcrumb_offset += sizeof(uint32_t) * 2u; if (breadcrumb_offset >= BREADCRUMB_BUFFER_ENTRIES * sizeof(uint32_t) * 2u) { breadcrumb_offset = 0u; @@ -5642,14 +5879,6 @@ uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) { return subgroup_capabilities.supported_stages_flags_rd(); case LIMIT_SUBGROUP_OPERATIONS: return subgroup_capabilities.supported_operations_flags_rd(); - case LIMIT_VRS_TEXEL_WIDTH: - return vrs_capabilities.texel_size.x; - case LIMIT_VRS_TEXEL_HEIGHT: - return vrs_capabilities.texel_size.y; - case LIMIT_VRS_MAX_FRAGMENT_WIDTH: - return vrs_capabilities.max_fragment_size.x; - case LIMIT_VRS_MAX_FRAGMENT_HEIGHT: - return vrs_capabilities.max_fragment_size.y; default: ERR_FAIL_V(0); } @@ -5668,12 +5897,8 @@ uint64_t RenderingDeviceDriverVulkan::api_trait_get(ApiTrait p_trait) { bool RenderingDeviceDriverVulkan::has_feature(Features p_feature) { switch (p_feature) { - case SUPPORTS_MULTIVIEW: - return multiview_capabilities.is_supported && multiview_capabilities.max_view_count > 1; case SUPPORTS_FSR_HALF_FLOAT: return shader_capabilities.shader_float16_is_supported && physical_device_features.shaderInt16 && storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported; - case SUPPORTS_ATTACHMENT_VRS: - return vrs_capabilities.attachment_vrs_supported && physical_device_features.shaderStorageImageExtendedFormats; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; default: @@ -5685,6 +5910,14 @@ const RDD::MultiviewCapabilities &RenderingDeviceDriverVulkan::get_multiview_cap return multiview_capabilities; } +const RDD::FragmentShadingRateCapabilities &RenderingDeviceDriverVulkan::get_fragment_shading_rate_capabilities() { + return fsr_capabilities; +} + +const RDD::FragmentDensityMapCapabilities &RenderingDeviceDriverVulkan::get_fragment_density_map_capabilities() { + return fdm_capabilities; +} + String RenderingDeviceDriverVulkan::get_api_name() const { return "Vulkan"; } diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 06cd2a31be6e..1865e00f7dcb 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -54,6 +54,9 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { struct CommandQueue; struct SwapChain; + struct CommandBufferInfo; + struct RenderPassInfo; + struct Framebuffer; struct Queue { VkQueue queue = VK_NULL_HANDLE; @@ -76,18 +79,6 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { String supported_operations_desc() const; }; - struct VRSCapabilities { - bool pipeline_vrs_supported = false; // We can specify our fragment rate on a pipeline level. - bool primitive_vrs_supported = false; // We can specify our fragment rate on each drawcall. - bool attachment_vrs_supported = false; // We can provide a density map attachment on our framebuffer. - - Size2i min_texel_size; - Size2i max_texel_size; - Size2i max_fragment_size; - - Size2i texel_size; // The texel size we'll use - }; - struct ShaderCapabilities { bool shader_float16_is_supported = false; bool shader_int8_is_supported = false; @@ -107,6 +98,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { PFN_vkAcquireNextImageKHR AcquireNextImageKHR = nullptr; PFN_vkQueuePresentKHR QueuePresentKHR = nullptr; PFN_vkCreateRenderPass2KHR CreateRenderPass2KHR = nullptr; + PFN_vkCmdEndRenderPass2KHR EndRenderPass2KHR = nullptr; // Debug marker extensions. PFN_vkCmdDebugMarkerBeginEXT CmdDebugMarkerBeginEXT = nullptr; @@ -135,7 +127,8 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { RDD::Capabilities device_capabilities; SubgroupCapabilities subgroup_capabilities; MultiviewCapabilities multiview_capabilities; - VRSCapabilities vrs_capabilities; + FragmentShadingRateCapabilities fsr_capabilities; + FragmentDensityMapCapabilities fdm_capabilities; ShaderCapabilities shader_capabilities; StorageBufferCapabilities storage_buffer_capabilities; bool pipeline_cache_control_support = false; @@ -154,6 +147,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { Error _initialize_device_extensions(); Error _check_device_features(); Error _check_device_capabilities(); + void _choose_vrs_capabilities(); Error _add_queue_create_info(LocalVector &r_queue_create_info); Error _initialize_device(const LocalVector &p_queue_create_info); Error _initialize_allocator(); @@ -329,14 +323,23 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { struct CommandPool { VkCommandPool vk_command_pool = VK_NULL_HANDLE; CommandBufferType buffer_type = COMMAND_BUFFER_TYPE_PRIMARY; + LocalVector command_buffers_created; }; public: virtual CommandPoolID command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) override final; virtual void command_pool_free(CommandPoolID p_cmd_pool) override final; +private: // ----- BUFFER ----- + struct CommandBufferInfo { + VkCommandBuffer vk_command_buffer = VK_NULL_HANDLE; + Framebuffer *active_framebuffer = nullptr; + RenderPassInfo *active_render_pass = nullptr; + }; + +public: virtual CommandBufferID command_buffer_create(CommandPoolID p_cmd_pool) override final; virtual bool command_buffer_begin(CommandBufferID p_cmd_buffer) override final; virtual bool command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) override final; @@ -378,6 +381,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { virtual void swap_chain_set_max_fps(SwapChainID p_swap_chain, int p_max_fps) override final; virtual void swap_chain_free(SwapChainID p_swap_chain) override final; +private: /*********************/ /**** FRAMEBUFFER ****/ /*********************/ @@ -385,12 +389,16 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { struct Framebuffer { VkFramebuffer vk_framebuffer = VK_NULL_HANDLE; + // Only filled in if the framebuffer uses a fragment density map with offsets. Unused otherwise. + uint32_t fragment_density_map_offsets_layers = 0; + // Only filled in by a framebuffer created by a swap chain. Unused otherwise. VkImage swap_chain_image = VK_NULL_HANDLE; VkImageSubresourceRange swap_chain_image_subresource_range = {}; bool swap_chain_acquired = false; }; +public: virtual FramebufferID framebuffer_create(RenderPassID p_render_pass, VectorView p_attachments, uint32_t p_width, uint32_t p_height) override final; virtual void framebuffer_free(FramebufferID p_framebuffer) override final; @@ -558,9 +566,16 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { /**** RENDERING ****/ /*******************/ +private: // ----- SUBPASS ----- - virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count) override final; + struct RenderPassInfo { + VkRenderPass vk_render_pass = VK_NULL_HANDLE; + bool uses_fragment_density_map_offsets = false; + }; + +public: + virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) override final; virtual void render_pass_free(RenderPassID p_render_pass) override final; // ----- COMMANDS ----- @@ -676,6 +691,8 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { virtual uint64_t api_trait_get(ApiTrait p_trait) override final; virtual bool has_feature(Features p_feature) override final; virtual const MultiviewCapabilities &get_multiview_capabilities() override final; + virtual const FragmentShadingRateCapabilities &get_fragment_shading_rate_capabilities() override final; + virtual const FragmentDensityMapCapabilities &get_fragment_density_map_capabilities() override final; virtual String get_api_name() const override final; virtual String get_api_version() const override final; virtual String get_pipeline_cache_uuid() const override final; @@ -693,7 +710,9 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { TextureInfo, VertexFormatInfo, ShaderInfo, - UniformSetInfo>; + UniformSetInfo, + RenderPassInfo, + CommandBufferInfo>; PagedAllocator resources_allocator; /******************/ diff --git a/servers/rendering/renderer_rd/effects/vrs.cpp b/servers/rendering/renderer_rd/effects/vrs.cpp index 9cc22f6f5efc..cbeefdd4d189 100644 --- a/servers/rendering/renderer_rd/effects/vrs.cpp +++ b/servers/rendering/renderer_rd/effects/vrs.cpp @@ -44,6 +44,8 @@ VRS::VRS() { Vector vrs_modes; vrs_modes.push_back("\n"); // VRS_DEFAULT vrs_modes.push_back("\n#define USE_MULTIVIEW\n"); // VRS_MULTIVIEW + vrs_modes.push_back("\n#define SPLIT_RG\n"); // VRS_RG + vrs_modes.push_back("\n#define SPLIT_RG\n#define USE_MULTIVIEW\n"); // VRS_RG_MULTIVIEW vrs_shader.shader.initialize(vrs_modes); @@ -80,14 +82,16 @@ void VRS::copy_vrs(RID p_source_rd_texture, RID p_dest_framebuffer, bool p_multi RD::Uniform u_source_rd_texture(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_source_rd_texture })); + int mode = 0; VRSPushConstant push_constant = {}; - - int mode = p_multiview ? VRS_MULTIVIEW : VRS_DEFAULT; - - // Set maximum texel factor based on maximum fragment size, some GPUs do not support 8x8 (fragment shading rate approach). - if (MIN(RD::get_singleton()->limit_get(RD::LIMIT_VRS_MAX_FRAGMENT_WIDTH), RD::get_singleton()->limit_get(RD::LIMIT_VRS_MAX_FRAGMENT_HEIGHT)) > 4) { - push_constant.max_texel_factor = 3.0; + bool uses_rg_format = RD::get_singleton()->vrs_get_format() == RD::DATA_FORMAT_R8G8_UNORM; + if (uses_rg_format) { + mode = p_multiview ? VRS_RG_MULTIVIEW : VRS_RG; } else { + mode = p_multiview ? VRS_MULTIVIEW : VRS_DEFAULT; + + // Default to 4x4 as it's not possible to query the max fragment size from RenderingDevice. This can be improved to use the largest size + // available if this code is moved over to RenderingDevice at some point. push_constant.max_texel_factor = 2.0; } @@ -103,18 +107,8 @@ void VRS::copy_vrs(RID p_source_rd_texture, RID p_dest_framebuffer, bool p_multi } Size2i VRS::get_vrs_texture_size(const Size2i p_base_size) const { - int32_t texel_width = RD::get_singleton()->limit_get(RD::LIMIT_VRS_TEXEL_WIDTH); - int32_t texel_height = RD::get_singleton()->limit_get(RD::LIMIT_VRS_TEXEL_HEIGHT); - - int width = p_base_size.x / texel_width; - if (p_base_size.x % texel_width != 0) { - width++; - } - int height = p_base_size.y / texel_height; - if (p_base_size.y % texel_height != 0) { - height++; - } - return Size2i(width, height); + Size2i vrs_texel_size = RD::get_singleton()->vrs_get_texel_size(); + return Size2i((p_base_size.x + vrs_texel_size.x - 1) / vrs_texel_size.x, (p_base_size.y + vrs_texel_size.y - 1) / vrs_texel_size.y); } void VRS::update_vrs_texture(RID p_vrs_fb, RID p_render_target) { diff --git a/servers/rendering/renderer_rd/effects/vrs.h b/servers/rendering/renderer_rd/effects/vrs.h index 94878e46615f..be28ef59de5e 100644 --- a/servers/rendering/renderer_rd/effects/vrs.h +++ b/servers/rendering/renderer_rd/effects/vrs.h @@ -44,6 +44,8 @@ class VRS { enum VRSMode { VRS_DEFAULT, VRS_MULTIVIEW, + VRS_RG, + VRS_RG_MULTIVIEW, VRS_MAX, }; diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp index d569ce110199..b4d82e83b442 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp @@ -143,7 +143,6 @@ RID RenderForwardClustered::RenderBufferDataForwardClustered::get_color_only_fb( if (render_buffers->has_texture(RB_SCOPE_VRS, RB_TEXTURE)) { RID vrs_texture = render_buffers->get_texture(RB_SCOPE_VRS, RB_TEXTURE); - return FramebufferCacheRD::get_singleton()->get_cache_multiview(render_buffers->get_view_count(), color, depth, vrs_texture); } else { return FramebufferCacheRD::get_singleton()->get_cache_multiview(render_buffers->get_view_count(), color, depth); @@ -173,7 +172,6 @@ RID RenderForwardClustered::RenderBufferDataForwardClustered::get_color_pass_fb( if (render_buffers->has_texture(RB_SCOPE_VRS, RB_TEXTURE)) { RID vrs_texture = render_buffers->get_texture(RB_SCOPE_VRS, RB_TEXTURE); - return FramebufferCacheRD::get_singleton()->get_cache_multiview(v_count, color, specular, velocity_buffer, depth, vrs_texture); } else { return FramebufferCacheRD::get_singleton()->get_cache_multiview(v_count, color, specular, velocity_buffer, depth); diff --git a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp index e93b7f0339d5..9f2736caf8d3 100644 --- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp @@ -199,9 +199,6 @@ RID RenderForwardMobile::RenderBufferDataForwardMobile::get_color_fbs(Framebuffe RD::FramebufferPass pass; pass.color_attachments.push_back(0); pass.depth_attachment = 1; - if (vrs_texture.is_valid()) { - pass.vrs_attachment = 2; - } if (use_msaa) { // Add resolve @@ -222,9 +219,6 @@ RID RenderForwardMobile::RenderBufferDataForwardMobile::get_color_fbs(Framebuffe RD::FramebufferPass pass; pass.color_attachments.push_back(0); pass.depth_attachment = 1; - if (vrs_texture.is_valid()) { - pass.vrs_attachment = 2; - } if (use_msaa) { // add resolve @@ -2844,9 +2838,11 @@ static RD::FramebufferFormatID _get_color_framebuffer_format_for_pipeline(RD::Da attachments.push_back(attachment); if (p_vrs) { + // VRS attachment. attachment.samples = RD::TEXTURE_SAMPLES_1; attachment.format = RenderSceneBuffersRD::get_vrs_format(); attachment.usage_flags = RenderSceneBuffersRD::get_vrs_usage_bits(); + attachments.push_back(attachment); } if (multisampling) { @@ -2864,10 +2860,6 @@ static RD::FramebufferFormatID _get_color_framebuffer_format_for_pipeline(RD::Da pass.color_attachments.push_back(0); pass.depth_attachment = 1; - if (p_vrs) { - pass.vrs_attachment = 2; - } - if (multisampling) { pass.resolve_attachments.push_back(attachments.size() - 1); } @@ -2893,7 +2885,8 @@ static RD::FramebufferFormatID _get_color_framebuffer_format_for_pipeline(RD::Da passes.push_back(blit_pass); } - return RD::get_singleton()->framebuffer_format_create_multipass(attachments, passes, p_view_count); + int32_t vrs_attachment = p_vrs ? 2 : -1; + return RD::get_singleton()->framebuffer_format_create_multipass(attachments, passes, p_view_count, vrs_attachment); } static RD::FramebufferFormatID _get_reflection_probe_color_framebuffer_format_for_pipeline() { diff --git a/servers/rendering/renderer_rd/framebuffer_cache_rd.h b/servers/rendering/renderer_rd/framebuffer_cache_rd.h index abb2a5808ded..e245e3bd0d0e 100644 --- a/servers/rendering/renderer_rd/framebuffer_cache_rd.h +++ b/servers/rendering/renderer_rd/framebuffer_cache_rd.h @@ -59,7 +59,6 @@ class FramebufferCacheRD : public Object { static _FORCE_INLINE_ uint32_t _hash_pass(const RD::FramebufferPass &p, uint32_t h) { h = hash_murmur3_one_32(p.depth_attachment, h); - h = hash_murmur3_one_32(p.vrs_attachment, h); h = hash_murmur3_one_32(p.color_attachments.size(), h); for (int i = 0; i < p.color_attachments.size(); i++) { @@ -84,10 +83,6 @@ class FramebufferCacheRD : public Object { return false; } - if (a.vrs_attachment != b.vrs_attachment) { - return false; - } - if (a.color_attachments.size() != b.color_attachments.size()) { return false; } diff --git a/servers/rendering/renderer_rd/shaders/effects/vrs.glsl b/servers/rendering/renderer_rd/shaders/effects/vrs.glsl index b4fcaa467349..1d3463dd2bf5 100644 --- a/servers/rendering/renderer_rd/shaders/effects/vrs.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/vrs.glsl @@ -59,7 +59,11 @@ layout(location = 0) in vec2 uv_interp; layout(set = 0, binding = 0) uniform sampler2D source_color; #endif /* USE_MULTIVIEW */ +#ifdef SPLIT_RG +layout(location = 0) out vec2 frag_color; +#else layout(location = 0) out uint frag_color; +#endif layout(push_constant, std430) uniform Params { float max_texel_factor; @@ -79,6 +83,10 @@ void main() { // Input is standardized. R for X, G for Y, 0.0 (0) = 1, 0.33 (85) = 2, 0.66 (170) = 3, 1.0 (255) = 8 vec4 color = textureLod(source_color, uv, 0.0); +#ifdef SPLIT_RG + // Density map for VRS according to VK_EXT_fragment_density_map, we can use as is. + frag_color = max(vec2(1.0f) - color.rg, vec2(1.0f / 255.0f)); +#else // Output image shading rate image for VRS according to VK_KHR_fragment_shading_rate. color.r = clamp(floor(color.r * params.max_texel_factor + 0.1), 0.0, params.max_texel_factor); color.g = clamp(floor(color.g * params.max_texel_factor + 0.1), 0.0, params.max_texel_factor); @@ -94,4 +102,5 @@ void main() { // Encode to frag_color; frag_color = int(color.r + 0.1) << 2; frag_color += int(color.g + 0.1); +#endif } diff --git a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp index 6e1259eac176..e7f513e630f9 100644 --- a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp +++ b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp @@ -725,9 +725,9 @@ uint32_t RenderSceneBuffersRD::get_velocity_usage_bits(bool p_resolve, bool p_ms } RD::DataFormat RenderSceneBuffersRD::get_vrs_format() { - return RD::DATA_FORMAT_R8_UINT; + return RD::get_singleton()->vrs_get_format(); } uint32_t RenderSceneBuffersRD::get_vrs_usage_bits() { - return RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_VRS_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + return RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_VRS_ATTACHMENT_BIT; } diff --git a/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp b/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp index 697d9490158e..35ec61f340e5 100644 --- a/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp @@ -476,17 +476,15 @@ TextureStorage::TextureStorage() { } } - { //create default VRS - + { + // Create default VRS texture. + bool vrs_supported = RD::get_singleton()->has_feature(RD::SUPPORTS_ATTACHMENT_VRS); RD::TextureFormat tformat; - tformat.format = RD::DATA_FORMAT_R8_UINT; + tformat.format = vrs_supported ? RD::get_singleton()->vrs_get_format() : RD::DATA_FORMAT_R8_UINT; tformat.width = 4; tformat.height = 4; - tformat.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_VRS_ATTACHMENT_BIT; + tformat.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | (vrs_supported ? RD::TEXTURE_USAGE_VRS_ATTACHMENT_BIT : 0); tformat.texture_type = RD::TEXTURE_TYPE_2D; - if (!RD::get_singleton()->has_feature(RD::SUPPORTS_ATTACHMENT_VRS)) { - tformat.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; - } Vector pv; pv.resize(4 * 4); diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index ace1c34fca09..7f03e796f2f9 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -789,7 +789,7 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture ERR_FAIL_COND_V_MSG(required_mipmaps < format.mipmaps, RID(), "Too many mipmaps requested for texture format and dimensions (" + itos(format.mipmaps) + "), maximum allowed: (" + itos(required_mipmaps) + ")."); - uint32_t forced_usage_bits = 0; + uint32_t forced_usage_bits = _texture_vrs_method_to_usage_bits(); if (p_data.size()) { ERR_FAIL_COND_V_MSG(p_data.size() != (int)format.array_layers, RID(), "Default supplied data for image format is of invalid length (" + itos(p_data.size()) + "), should be (" + itos(format.array_layers) + ")."); @@ -804,7 +804,7 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture "Textures created as depth attachments can't be initialized with data directly. Use RenderingDevice::texture_update() instead."); if (!(format.usage_bits & TEXTURE_USAGE_CAN_UPDATE_BIT)) { - forced_usage_bits = TEXTURE_USAGE_CAN_UPDATE_BIT; + forced_usage_bits |= TEXTURE_USAGE_CAN_UPDATE_BIT; } } @@ -831,7 +831,7 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as atomic storage image."); } if ((format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) && !supported_usage.has_flag(TEXTURE_USAGE_VRS_ATTACHMENT_BIT)) { - ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as VRS attachment."); + ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as variable shading rate attachment."); } } @@ -889,7 +889,7 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture texture.bound = false; // Textures are only assumed to be immutable if they have initial data and none of the other bits that indicate write usage are enabled. - bool texture_mutable_by_default = texture.usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_STORAGE_ATOMIC_BIT | TEXTURE_USAGE_VRS_ATTACHMENT_BIT); + bool texture_mutable_by_default = texture.usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_STORAGE_ATOMIC_BIT); if (p_data.is_empty() || texture_mutable_by_default) { _texture_make_mutable(&texture, RID()); } @@ -1701,6 +1701,17 @@ void RenderingDevice::_texture_create_reinterpret_buffer(Texture *p_texture) { p_texture->shared_fallback->buffer_tracker = tracker; } +uint32_t RenderingDevice::_texture_vrs_method_to_usage_bits() const { + switch (vrs_method) { + case VRS_METHOD_FRAGMENT_SHADING_RATE: + return RDD::TEXTURE_USAGE_VRS_FRAGMENT_SHADING_RATE_BIT; + case VRS_METHOD_FRAGMENT_DENSITY_MAP: + return RDD::TEXTURE_USAGE_VRS_FRAGMENT_DENSITY_MAP_BIT; + default: + return 0; + } +} + Vector RenderingDevice::_texture_get_data(Texture *tex, uint32_t p_layer, bool p_2d) { uint32_t width, height, depth; uint32_t tight_mip_size = get_image_format_required_size(tex->format, tex->width, tex->height, p_2d ? 1 : tex->depth, tex->mipmaps, &width, &height, &depth); @@ -2135,7 +2146,7 @@ bool RenderingDevice::texture_is_format_supported_for_usage(DataFormat p_format, /**** FRAMEBUFFER ****/ /*********************/ -RDD::RenderPassID RenderingDevice::_render_pass_create(RenderingDeviceDriver *p_driver, const Vector &p_attachments, const Vector &p_passes, VectorView p_load_ops, VectorView p_store_ops, uint32_t p_view_count, Vector *r_samples) { +RDD::RenderPassID RenderingDevice::_render_pass_create(RenderingDeviceDriver *p_driver, const Vector &p_attachments, const Vector &p_passes, VectorView p_load_ops, VectorView p_store_ops, uint32_t p_view_count, VRSMethod p_vrs_method, int32_t p_vrs_attachment, Size2i p_vrs_texel_size, Vector *r_samples) { // NOTE: // Before the refactor to RenderingDevice-RenderingDeviceDriver, there was commented out code to // specify dependencies to external subpasses. Since it had been unused for a long timel it wasn't ported @@ -2175,15 +2186,14 @@ RDD::RenderPassID RenderingDevice::_render_pass_create(RenderingDeviceDriver *p_ // We can setup a framebuffer where we write to our VRS texture to set it up. // We make the assumption here that if our texture is actually used as our VRS attachment. // It is used as such for each subpass. This is fairly certain seeing the restrictions on subpasses. - bool is_vrs = (p_attachments[i].usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) && i == p_passes[0].vrs_attachment; - + bool is_vrs = (p_attachments[i].usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) && i == p_vrs_attachment; if (is_vrs) { description.load_op = RDD::ATTACHMENT_LOAD_OP_LOAD; description.store_op = RDD::ATTACHMENT_STORE_OP_DONT_CARE; - description.stencil_load_op = RDD::ATTACHMENT_LOAD_OP_LOAD; + description.stencil_load_op = RDD::ATTACHMENT_LOAD_OP_DONT_CARE; description.stencil_store_op = RDD::ATTACHMENT_STORE_OP_DONT_CARE; - description.initial_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - description.final_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + description.initial_layout = _vrs_layout_from_method(p_vrs_method); + description.final_layout = _vrs_layout_from_method(p_vrs_method); } else { if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { description.load_op = p_load_ops[i]; @@ -2316,14 +2326,15 @@ RDD::RenderPassID RenderingDevice::_render_pass_create(RenderingDeviceDriver *p_ subpass.depth_stencil_reference.layout = RDD::TEXTURE_LAYOUT_UNDEFINED; } - if (pass->vrs_attachment != ATTACHMENT_UNUSED) { - int32_t attachment = pass->vrs_attachment; + if (p_vrs_method == VRS_METHOD_FRAGMENT_SHADING_RATE && p_vrs_attachment >= 0) { + int32_t attachment = p_vrs_attachment; ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), RDD::RenderPassID(), "Invalid framebuffer VRS format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), VRS attachment."); ERR_FAIL_COND_V_MSG(!(p_attachments[attachment].usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT), RDD::RenderPassID(), "Invalid framebuffer VRS format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it's marked as VRS, but it's not a VRS attachment."); ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, RDD::RenderPassID(), "Invalid framebuffer VRS attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass."); - subpass.vrs_reference.attachment = attachment_remap[attachment]; - subpass.vrs_reference.layout = RDD::TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL; + subpass.fragment_shading_rate_reference.attachment = attachment_remap[attachment]; + subpass.fragment_shading_rate_reference.layout = RDD::TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL; + subpass.fragment_shading_rate_texel_size = p_vrs_texel_size; attachment_last_pass[attachment] = i; } @@ -2358,7 +2369,13 @@ RDD::RenderPassID RenderingDevice::_render_pass_create(RenderingDeviceDriver *p_ } } - RDD::RenderPassID render_pass = p_driver->render_pass_create(attachments, subpasses, subpass_dependencies, p_view_count); + RDD::AttachmentReference fragment_density_map_attachment_reference; + if (p_vrs_method == VRS_METHOD_FRAGMENT_DENSITY_MAP && p_vrs_attachment >= 0) { + fragment_density_map_attachment_reference.attachment = p_vrs_attachment; + fragment_density_map_attachment_reference.layout = RDD::TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL; + } + + RDD::RenderPassID render_pass = p_driver->render_pass_create(attachments, subpasses, subpass_dependencies, p_view_count, fragment_density_map_attachment_reference); ERR_FAIL_COND_V(!render_pass, RDD::RenderPassID()); return render_pass; @@ -2372,10 +2389,74 @@ RDD::RenderPassID RenderingDevice::_render_pass_create_from_graph(RenderingDevic // resolving the dependencies between commands. This function creates a render pass for the framebuffer accordingly. Framebuffer *framebuffer = (Framebuffer *)(p_user_data); const FramebufferFormatKey &key = framebuffer->rendering_device->framebuffer_formats[framebuffer->format_id].E->key(); - return _render_pass_create(p_driver, key.attachments, key.passes, p_load_ops, p_store_ops, framebuffer->view_count); + return _render_pass_create(p_driver, key.attachments, key.passes, p_load_ops, p_store_ops, framebuffer->view_count, key.vrs_method, key.vrs_attachment, key.vrs_texel_size); +} + +RDG::ResourceUsage RenderingDevice::_vrs_usage_from_method(VRSMethod p_method) { + switch (p_method) { + case VRS_METHOD_FRAGMENT_SHADING_RATE: + return RDG::RESOURCE_USAGE_ATTACHMENT_FRAGMENT_SHADING_RATE_READ; + case VRS_METHOD_FRAGMENT_DENSITY_MAP: + return RDG::RESOURCE_USAGE_ATTACHMENT_FRAGMENT_DENSITY_MAP_READ; + default: + return RDG::RESOURCE_USAGE_NONE; + } +} + +RDD::PipelineStageBits RenderingDevice::_vrs_stages_from_method(VRSMethod p_method) { + switch (p_method) { + case VRS_METHOD_FRAGMENT_SHADING_RATE: + return RDD::PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT; + case VRS_METHOD_FRAGMENT_DENSITY_MAP: + return RDD::PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT; + default: + return RDD::PipelineStageBits(0); + } } -RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create(const Vector &p_format, uint32_t p_view_count) { +RDD::TextureLayout RenderingDevice::_vrs_layout_from_method(VRSMethod p_method) { + switch (p_method) { + case VRS_METHOD_FRAGMENT_SHADING_RATE: + return RDD::TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL; + case VRS_METHOD_FRAGMENT_DENSITY_MAP: + return RDD::TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL; + default: + return RDD::TEXTURE_LAYOUT_UNDEFINED; + } +} + +void RenderingDevice::_vrs_detect_method() { + const RDD::FragmentShadingRateCapabilities &fsr_capabilities = driver->get_fragment_shading_rate_capabilities(); + const RDD::FragmentDensityMapCapabilities &fdm_capabilities = driver->get_fragment_density_map_capabilities(); + if (fsr_capabilities.attachment_supported) { + vrs_method = VRS_METHOD_FRAGMENT_SHADING_RATE; + } else if (fdm_capabilities.attachment_supported) { + vrs_method = VRS_METHOD_FRAGMENT_DENSITY_MAP; + } + + switch (vrs_method) { + case VRS_METHOD_FRAGMENT_SHADING_RATE: + vrs_format = DATA_FORMAT_R8_UINT; + vrs_texel_size = Vector2i(16, 16).clamp(fsr_capabilities.min_texel_size, fsr_capabilities.max_texel_size); + break; + case VRS_METHOD_FRAGMENT_DENSITY_MAP: + vrs_format = DATA_FORMAT_R8G8_UNORM; + vrs_texel_size = Vector2i(32, 32).clamp(fdm_capabilities.min_texel_size, fdm_capabilities.max_texel_size); + break; + default: + break; + } +} + +RD::DataFormat RenderingDevice::vrs_get_format() const { + return vrs_format; +} + +Size2i RenderingDevice::vrs_get_texel_size() const { + return vrs_texel_size; +} + +RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create(const Vector &p_format, uint32_t p_view_count, int32_t p_fragment_density_map_attachment) { FramebufferPass pass; for (int i = 0; i < p_format.size(); i++) { if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { @@ -2387,16 +2468,19 @@ RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create( Vector passes; passes.push_back(pass); - return framebuffer_format_create_multipass(p_format, passes, p_view_count); + return framebuffer_format_create_multipass(p_format, passes, p_view_count, p_fragment_density_map_attachment); } -RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create_multipass(const Vector &p_attachments, const Vector &p_passes, uint32_t p_view_count) { +RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create_multipass(const Vector &p_attachments, const Vector &p_passes, uint32_t p_view_count, int32_t p_vrs_attachment) { _THREAD_SAFE_METHOD_ FramebufferFormatKey key; key.attachments = p_attachments; key.passes = p_passes; key.view_count = p_view_count; + key.vrs_method = vrs_method; + key.vrs_attachment = p_vrs_attachment; + key.vrs_texel_size = vrs_texel_size; const RBMap::Element *E = framebuffer_format_cache.find(key); if (E) { @@ -2412,7 +2496,7 @@ RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create_ store_ops.push_back(RDD::ATTACHMENT_STORE_OP_STORE); } - RDD::RenderPassID render_pass = _render_pass_create(driver, p_attachments, p_passes, load_ops, store_ops, p_view_count, &samples); // Actions don't matter for this use case. + RDD::RenderPassID render_pass = _render_pass_create(driver, p_attachments, p_passes, load_ops, store_ops, p_view_count, vrs_method, p_vrs_attachment, vrs_texel_size, &samples); // Actions don't matter for this use case. if (!render_pass) { // Was likely invalid. return INVALID_ID; } @@ -2452,7 +2536,7 @@ RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create_ LocalVector subpass; subpass.resize(1); - RDD::RenderPassID render_pass = driver->render_pass_create({}, subpass, {}, 1); + RDD::RenderPassID render_pass = driver->render_pass_create({}, subpass, {}, 1, RDD::AttachmentReference()); ERR_FAIL_COND_V(!render_pass, FramebufferFormatID()); FramebufferFormatID id = FramebufferFormatID(framebuffer_format_cache.size()) | (FramebufferFormatID(ID_TYPE_FRAMEBUFFER_FORMAT) << FramebufferFormatID(ID_BASE_SHIFT)); @@ -2523,8 +2607,6 @@ RID RenderingDevice::framebuffer_create(const Vector &p_texture_attachments if (texture && texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { pass.depth_attachment = i; - } else if (texture && texture->usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) { - pass.vrs_attachment = i; } else { if (texture && texture->is_resolve_buffer) { pass.resolve_attachments.push_back(i); @@ -2546,6 +2628,7 @@ RID RenderingDevice::framebuffer_create_multipass(const Vector &p_texture_a Vector attachments; LocalVector textures; LocalVector trackers; + int32_t vrs_attachment = -1; attachments.resize(p_texture_attachments.size()); Size2i size; bool size_set = false; @@ -2560,6 +2643,11 @@ RID RenderingDevice::framebuffer_create_multipass(const Vector &p_texture_a _check_transfer_worker_texture(texture); + if (i != 0 && texture->usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) { + // Detect if the texture is the fragment density map and it's not the first attachment. + vrs_attachment = i; + } + if (!size_set) { size.width = texture->width; size.height = texture->height; @@ -2587,7 +2675,7 @@ RID RenderingDevice::framebuffer_create_multipass(const Vector &p_texture_a ERR_FAIL_COND_V_MSG(!size_set, RID(), "All attachments unused."); - FramebufferFormatID format_id = framebuffer_format_create_multipass(attachments, p_passes, p_view_count); + FramebufferFormatID format_id = framebuffer_format_create_multipass(attachments, p_passes, p_view_count, vrs_attachment); if (format_id == INVALID_ID) { return RID(); } @@ -3913,7 +4001,7 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin_for_screen(DisplayS clear_value.color = p_clear_color; RDD::RenderPassID render_pass = driver->swap_chain_get_render_pass(sc_it->value); - draw_graph.add_draw_list_begin(render_pass, fb_it->value, viewport, RDG::ATTACHMENT_OPERATION_CLEAR, clear_value, true, false, RDD::BreadcrumbMarker::BLIT_PASS); + draw_graph.add_draw_list_begin(render_pass, fb_it->value, viewport, RDG::ATTACHMENT_OPERATION_CLEAR, clear_value, RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, RDD::BreadcrumbMarker::BLIT_PASS); draw_graph.add_draw_list_set_viewport(viewport); draw_graph.add_draw_list_set_scissor(viewport); @@ -3929,6 +4017,7 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, Framebuffer *framebuffer = framebuffer_owner.get_or_null(p_framebuffer); ERR_FAIL_NULL_V(framebuffer, INVALID_ID); + const FramebufferFormatKey &framebuffer_key = framebuffer_formats[framebuffer->format_id].E->key(); Point2i viewport_offset; Point2i viewport_size = framebuffer->size; @@ -3949,12 +4038,12 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, thread_local LocalVector clear_values; thread_local LocalVector resource_trackers; thread_local LocalVector resource_usages; - bool uses_color = false; - bool uses_depth = false; + BitField stages; operations.resize(framebuffer->texture_ids.size()); clear_values.resize(framebuffer->texture_ids.size()); resource_trackers.clear(); resource_usages.clear(); + stages.clear(); uint32_t color_index = 0; for (int i = 0; i < framebuffer->texture_ids.size(); i++) { @@ -3971,7 +4060,11 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, RDG::AttachmentOperation operation = RDG::ATTACHMENT_OPERATION_DEFAULT; RDD::RenderPassClearValue clear_value; - if (texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { + if (framebuffer_key.vrs_attachment == i && (texture->usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT)) { + resource_trackers.push_back(texture->draw_tracker); + resource_usages.push_back(_vrs_usage_from_method(framebuffer_key.vrs_method)); + stages.set_flag(_vrs_stages_from_method(framebuffer_key.vrs_method)); + } else if (texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { if (p_draw_flags.has_flag(DrawFlags(DRAW_CLEAR_COLOR_0 << color_index))) { ERR_FAIL_COND_V_MSG(color_index >= p_clear_color_values.size(), INVALID_ID, vformat("Color texture (%d) was specified to be cleared but no color value was provided.", color_index)); operation = RDG::ATTACHMENT_OPERATION_CLEAR; @@ -3982,7 +4075,7 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, resource_trackers.push_back(texture->draw_tracker); resource_usages.push_back(RDG::RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE); - uses_color = true; + stages.set_flag(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); color_index++; } else if (texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { if (p_draw_flags.has_flag(DRAW_CLEAR_DEPTH) || p_draw_flags.has_flag(DRAW_CLEAR_STENCIL)) { @@ -3995,14 +4088,15 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, resource_trackers.push_back(texture->draw_tracker); resource_usages.push_back(RDG::RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE); - uses_depth = true; + stages.set_flag(RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT); + stages.set_flag(RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT); } operations[i] = operation; clear_values[i] = clear_value; } - draw_graph.add_draw_list_begin(framebuffer->framebuffer_cache, Rect2i(viewport_offset, viewport_size), operations, clear_values, uses_color, uses_depth, p_breadcrumb); + draw_graph.add_draw_list_begin(framebuffer->framebuffer_cache, Rect2i(viewport_offset, viewport_size), operations, clear_values, stages, p_breadcrumb); draw_graph.add_draw_list_usages(resource_trackers, resource_usages); // Mark textures as bound. @@ -4023,9 +4117,7 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, draw_list_framebuffer_format = framebuffer->format_id; #endif draw_list_current_subpass = 0; - - const FramebufferFormatKey &key = framebuffer_formats[framebuffer->format_id].E->key(); - draw_list_subpass_count = key.passes.size(); + draw_list_subpass_count = framebuffer_key.passes.size(); Rect2i viewport_rect(viewport_offset, viewport_size); draw_graph.add_draw_list_set_viewport(viewport_rect); @@ -6246,6 +6338,9 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ } } + // Find the best method available for VRS on the current hardware. + _vrs_detect_method(); + return OK; } @@ -6669,7 +6764,20 @@ RenderingDevice *RenderingDevice::create_local_device() { } bool RenderingDevice::has_feature(const Features p_feature) const { - return driver->has_feature(p_feature); + // Some features can be deduced from the capabilities without querying the driver and looking at the capabilities. + switch (p_feature) { + case SUPPORTS_MULTIVIEW: { + const RDD::MultiviewCapabilities &multiview_capabilities = driver->get_multiview_capabilities(); + return multiview_capabilities.is_supported && multiview_capabilities.max_view_count > 1; + } + case SUPPORTS_ATTACHMENT_VRS: { + const RDD::FragmentShadingRateCapabilities &fsr_capabilities = driver->get_fragment_shading_rate_capabilities(); + const RDD::FragmentDensityMapCapabilities &fdm_capabilities = driver->get_fragment_density_map_capabilities(); + return fsr_capabilities.attachment_supported || fdm_capabilities.attachment_supported; + } + default: + return driver->has_feature(p_feature); + } } void RenderingDevice::_bind_methods() { diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 6b19d05fca48..a644c5826f30 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -305,6 +305,7 @@ class RenderingDevice : public RenderingDeviceCommons { void _texture_free_shared_fallback(Texture *p_texture); void _texture_copy_shared(RID p_src_texture_rid, Texture *p_src_texture, RID p_dst_texture_rid, Texture *p_dst_texture); void _texture_create_reinterpret_buffer(Texture *p_texture); + uint32_t _texture_vrs_method_to_usage_bits() const; public: struct TextureView { @@ -354,6 +355,30 @@ class RenderingDevice : public RenderingDeviceCommons { void texture_set_discardable(RID p_texture, bool p_discardable); bool texture_is_discardable(RID p_texture); +private: + /*************/ + /**** VRS ****/ + /*************/ + + enum VRSMethod { + VRS_METHOD_NONE, + VRS_METHOD_FRAGMENT_SHADING_RATE, + VRS_METHOD_FRAGMENT_DENSITY_MAP, + }; + + VRSMethod vrs_method = VRS_METHOD_NONE; + DataFormat vrs_format = DATA_FORMAT_MAX; + Size2i vrs_texel_size; + + static RDG::ResourceUsage _vrs_usage_from_method(VRSMethod p_method); + static RDD::PipelineStageBits _vrs_stages_from_method(VRSMethod p_method); + static RDD::TextureLayout _vrs_layout_from_method(VRSMethod p_method); + void _vrs_detect_method(); + +public: + DataFormat vrs_get_format() const; + Size2i vrs_get_texel_size() const; + /*********************/ /**** FRAMEBUFFER ****/ /*********************/ @@ -384,7 +409,6 @@ class RenderingDevice : public RenderingDeviceCommons { Vector resolve_attachments; Vector preserve_attachments; int32_t depth_attachment = ATTACHMENT_UNUSED; - int32_t vrs_attachment = ATTACHMENT_UNUSED; // density map for VRS, only used if supported }; typedef int64_t FramebufferFormatID; @@ -394,8 +418,23 @@ class RenderingDevice : public RenderingDeviceCommons { Vector attachments; Vector passes; uint32_t view_count = 1; + VRSMethod vrs_method = VRS_METHOD_NONE; + int32_t vrs_attachment = ATTACHMENT_UNUSED; + Size2i vrs_texel_size; bool operator<(const FramebufferFormatKey &p_key) const { + if (vrs_texel_size != p_key.vrs_texel_size) { + return vrs_texel_size < p_key.vrs_texel_size; + } + + if (vrs_attachment != p_key.vrs_attachment) { + return vrs_attachment < p_key.vrs_attachment; + } + + if (vrs_method != p_key.vrs_method) { + return vrs_method < p_key.vrs_method; + } + if (view_count != p_key.view_count) { return view_count < p_key.view_count; } @@ -500,7 +539,7 @@ class RenderingDevice : public RenderingDeviceCommons { } }; - static RDD::RenderPassID _render_pass_create(RenderingDeviceDriver *p_driver, const Vector &p_attachments, const Vector &p_passes, VectorView p_load_ops, VectorView p_store_ops, uint32_t p_view_count = 1, Vector *r_samples = nullptr); + static RDD::RenderPassID _render_pass_create(RenderingDeviceDriver *p_driver, const Vector &p_attachments, const Vector &p_passes, VectorView p_load_ops, VectorView p_store_ops, uint32_t p_view_count = 1, VRSMethod p_vrs_method = VRS_METHOD_NONE, int32_t p_vrs_attachment = -1, Size2i p_vrs_texel_size = Size2i(), Vector *r_samples = nullptr); static RDD::RenderPassID _render_pass_create_from_graph(RenderingDeviceDriver *p_driver, VectorView p_load_ops, VectorView p_store_ops, void *p_user_data); // This is a cache and it's never freed, it ensures @@ -531,8 +570,8 @@ class RenderingDevice : public RenderingDeviceCommons { public: // This ID is warranted to be unique for the same formats, does not need to be freed - FramebufferFormatID framebuffer_format_create(const Vector &p_format, uint32_t p_view_count = 1); - FramebufferFormatID framebuffer_format_create_multipass(const Vector &p_attachments, const Vector &p_passes, uint32_t p_view_count = 1); + FramebufferFormatID framebuffer_format_create(const Vector &p_format, uint32_t p_view_count = 1, int32_t p_vrs_attachment = -1); + FramebufferFormatID framebuffer_format_create_multipass(const Vector &p_attachments, const Vector &p_passes, uint32_t p_view_count = 1, int32_t p_vrs_attachment = -1); FramebufferFormatID framebuffer_format_create_empty(TextureSamples p_samples = TEXTURE_SAMPLES_1); TextureSamples framebuffer_format_get_texture_samples(FramebufferFormatID p_format, uint32_t p_pass = 0); diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index 9d01b6955085..6bd2e57e1685 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -359,6 +359,7 @@ class RenderingDeviceCommons : public Object { TEXTURE_USAGE_CAN_COPY_TO_BIT = (1 << 8), TEXTURE_USAGE_INPUT_ATTACHMENT_BIT = (1 << 9), TEXTURE_USAGE_VRS_ATTACHMENT_BIT = (1 << 10), + TEXTURE_USAGE_MAX_BIT = TEXTURE_USAGE_VRS_ATTACHMENT_BIT, }; struct TextureFormat { @@ -851,10 +852,6 @@ class RenderingDeviceCommons : public Object { LIMIT_SUBGROUP_MAX_SIZE, LIMIT_SUBGROUP_IN_SHADERS, // Set flags using SHADER_STAGE_VERTEX_BIT, SHADER_STAGE_FRAGMENT_BIT, etc. LIMIT_SUBGROUP_OPERATIONS, - LIMIT_VRS_TEXEL_WIDTH, - LIMIT_VRS_TEXEL_HEIGHT, - LIMIT_VRS_MAX_FRAGMENT_WIDTH, - LIMIT_VRS_MAX_FRAGMENT_HEIGHT, }; enum Features { diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index d2d14676db3f..0b350ddcc521 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -228,7 +228,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { TEXTURE_LAYOUT_COPY_DST_OPTIMAL, TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL, TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL, - TEXTURE_LAYOUT_VRS_ATTACHMENT_OPTIMAL, + TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL, + TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL, TEXTURE_LAYOUT_MAX }; @@ -239,6 +240,11 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { TEXTURE_ASPECT_MAX }; + enum TextureUsageMethod { + TEXTURE_USAGE_VRS_FRAGMENT_SHADING_RATE_BIT = TEXTURE_USAGE_MAX_BIT << 1, + TEXTURE_USAGE_VRS_FRAGMENT_DENSITY_MAP_BIT = TEXTURE_USAGE_MAX_BIT << 2, + }; + enum TextureAspectBits { TEXTURE_ASPECT_COLOR_BIT = (1 << TEXTURE_ASPECT_COLOR), TEXTURE_ASPECT_DEPTH_BIT = (1 << TEXTURE_ASPECT_DEPTH), @@ -325,6 +331,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { PIPELINE_STAGE_ALL_GRAPHICS_BIT = (1 << 15), PIPELINE_STAGE_ALL_COMMANDS_BIT = (1 << 16), PIPELINE_STAGE_CLEAR_STORAGE_BIT = (1 << 17), + PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT = (1 << 22), + PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT = (1 << 23), }; enum BarrierAccessBits { @@ -346,6 +354,7 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { BARRIER_ACCESS_MEMORY_READ_BIT = (1 << 15), BARRIER_ACCESS_MEMORY_WRITE_BIT = (1 << 16), BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT = (1 << 23), + BARRIER_ACCESS_FRAGMENT_DENSITY_MAP_ATTACHMENT_READ_BIT = (1 << 24), BARRIER_ACCESS_RESOLVE_READ_BIT = (1 << 24), BARRIER_ACCESS_RESOLVE_WRITE_BIT = (1 << 25), BARRIER_ACCESS_STORAGE_CLEAR_BIT = (1 << 27), @@ -599,7 +608,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { AttachmentReference depth_stencil_reference; LocalVector resolve_references; LocalVector preserve_attachments; - AttachmentReference vrs_reference; + AttachmentReference fragment_shading_rate_reference; + Size2i fragment_shading_rate_texel_size; }; struct SubpassDependency { @@ -611,7 +621,7 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { BitField dst_access; }; - virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count) = 0; + virtual RenderPassID render_pass_create(VectorView p_attachments, VectorView p_subpasses, VectorView p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) = 0; virtual void render_pass_free(RenderPassID p_render_pass) = 0; // ----- COMMANDS ----- @@ -749,6 +759,26 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { uint32_t max_instance_count = 0; }; + struct FragmentShadingRateCapabilities { + Size2i min_texel_size; + Size2i max_texel_size; + Size2i max_fragment_size; + bool pipeline_supported = false; + bool primitive_supported = false; + bool attachment_supported = false; + }; + + struct FragmentDensityMapCapabilities { + Size2i min_texel_size; + Size2i max_texel_size; + Size2i offset_granularity; + bool attachment_supported = false; + bool dynamic_attachment_supported = false; + bool non_subsampled_images_supported = false; + bool invocations_supported = false; + bool offset_supported = false; + }; + enum ApiTrait { API_TRAIT_HONORS_PIPELINE_BARRIERS, API_TRAIT_SHADER_CHANGE_INVALIDATION, @@ -789,6 +819,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { virtual uint64_t api_trait_get(ApiTrait p_trait); virtual bool has_feature(Features p_feature) = 0; virtual const MultiviewCapabilities &get_multiview_capabilities() = 0; + virtual const FragmentShadingRateCapabilities &get_fragment_shading_rate_capabilities() = 0; + virtual const FragmentDensityMapCapabilities &get_fragment_density_map_capabilities() = 0; virtual String get_api_name() const = 0; virtual String get_api_version() const = 0; virtual String get_pipeline_cache_uuid() const = 0; diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index b2779af6207f..bc9090da99a1 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -55,6 +55,8 @@ bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) { case RESOURCE_USAGE_INDEX_BUFFER_READ: case RESOURCE_USAGE_TEXTURE_SAMPLE: case RESOURCE_USAGE_STORAGE_IMAGE_READ: + case RESOURCE_USAGE_ATTACHMENT_FRAGMENT_SHADING_RATE_READ: + case RESOURCE_USAGE_ATTACHMENT_FRAGMENT_DENSITY_MAP_READ: return false; case RESOURCE_USAGE_COPY_TO: case RESOURCE_USAGE_RESOLVE_TO: @@ -89,6 +91,10 @@ RDD::TextureLayout RenderingDeviceGraph::_usage_to_image_layout(ResourceUsage p_ return RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE: return RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + case RESOURCE_USAGE_ATTACHMENT_FRAGMENT_SHADING_RATE_READ: + return RDD::TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL; + case RESOURCE_USAGE_ATTACHMENT_FRAGMENT_DENSITY_MAP_READ: + return RDD::TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL; case RESOURCE_USAGE_NONE: return RDD::TEXTURE_LAYOUT_UNDEFINED; default: @@ -133,6 +139,10 @@ RDD::BarrierAccessBits RenderingDeviceGraph::_usage_to_access_bits(ResourceUsage return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT | RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE: return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); + case RESOURCE_USAGE_ATTACHMENT_FRAGMENT_SHADING_RATE_READ: + return RDD::BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT; + case RESOURCE_USAGE_ATTACHMENT_FRAGMENT_DENSITY_MAP_READ: + return RDD::BARRIER_ACCESS_FRAGMENT_DENSITY_MAP_ATTACHMENT_READ_BIT; default: DEV_ASSERT(false && "Invalid usage."); return RDD::BarrierAccessBits(0); @@ -874,7 +884,7 @@ void RenderingDeviceGraph::_run_draw_list_command(RDD::CommandBufferID p_command } } -void RenderingDeviceGraph::_add_draw_list_begin(FramebufferCache *p_framebuffer_cache, RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, bool p_uses_color, bool p_uses_depth, uint32_t p_breadcrumb) { +void RenderingDeviceGraph::_add_draw_list_begin(FramebufferCache *p_framebuffer_cache, RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, BitField p_stages, uint32_t p_breadcrumb) { DEV_ASSERT(p_attachment_operations.size() == p_attachment_clear_values.size()); draw_instruction_list.clear(); @@ -883,6 +893,7 @@ void RenderingDeviceGraph::_add_draw_list_begin(FramebufferCache *p_framebuffer_ draw_instruction_list.render_pass = p_render_pass; draw_instruction_list.framebuffer = p_framebuffer; draw_instruction_list.region = p_region; + draw_instruction_list.stages = p_stages; draw_instruction_list.attachment_operations.resize(p_attachment_operations.size()); draw_instruction_list.attachment_clear_values.resize(p_attachment_clear_values.size()); @@ -891,15 +902,6 @@ void RenderingDeviceGraph::_add_draw_list_begin(FramebufferCache *p_framebuffer_ draw_instruction_list.attachment_clear_values[i] = p_attachment_clear_values[i]; } - if (p_uses_color) { - draw_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); - } - - if (p_uses_depth) { - draw_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT); - draw_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT); - } - #if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) draw_instruction_list.breadcrumb = p_breadcrumb; #endif @@ -1682,12 +1684,12 @@ void RenderingDeviceGraph::add_compute_list_end() { _add_command_to_graph(compute_instruction_list.command_trackers.ptr(), compute_instruction_list.command_tracker_usages.ptr(), compute_instruction_list.command_trackers.size(), command_index, command); } -void RenderingDeviceGraph::add_draw_list_begin(FramebufferCache *p_framebuffer_cache, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, bool p_uses_color, bool p_uses_depth, uint32_t p_breadcrumb) { - _add_draw_list_begin(p_framebuffer_cache, RDD::RenderPassID(), RDD::FramebufferID(), p_region, p_attachment_operations, p_attachment_clear_values, p_uses_color, p_uses_depth, p_breadcrumb); +void RenderingDeviceGraph::add_draw_list_begin(FramebufferCache *p_framebuffer_cache, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, BitField p_stages, uint32_t p_breadcrumb) { + _add_draw_list_begin(p_framebuffer_cache, RDD::RenderPassID(), RDD::FramebufferID(), p_region, p_attachment_operations, p_attachment_clear_values, p_stages, p_breadcrumb); } -void RenderingDeviceGraph::add_draw_list_begin(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, bool p_uses_color, bool p_uses_depth, uint32_t p_breadcrumb) { - _add_draw_list_begin(nullptr, p_render_pass, p_framebuffer, p_region, p_attachment_operations, p_attachment_clear_values, p_uses_color, p_uses_depth, p_breadcrumb); +void RenderingDeviceGraph::add_draw_list_begin(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, BitField p_stages, uint32_t p_breadcrumb) { + _add_draw_list_begin(nullptr, p_render_pass, p_framebuffer, p_region, p_attachment_operations, p_attachment_clear_values, p_stages, p_breadcrumb); } void RenderingDeviceGraph::add_draw_list_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint32_t p_offset) { diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index adfbb47e840e..8c3d6511d0ef 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -147,7 +147,9 @@ class RenderingDeviceGraph { RESOURCE_USAGE_STORAGE_IMAGE_READ, RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE, RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE, - RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE + RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE, + RESOURCE_USAGE_ATTACHMENT_FRAGMENT_SHADING_RATE_READ, + RESOURCE_USAGE_ATTACHMENT_FRAGMENT_DENSITY_MAP_READ, }; struct ResourceTracker { @@ -726,7 +728,7 @@ class RenderingDeviceGraph { void _run_compute_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _get_draw_list_render_pass_and_framebuffer(const RecordedDrawListCommand *p_draw_list_command, RDD::RenderPassID &r_render_pass, RDD::FramebufferID &r_framebuffer); void _run_draw_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); - void _add_draw_list_begin(FramebufferCache *p_framebuffer_cache, RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, bool p_uses_color, bool p_uses_depth, uint32_t p_breadcrumb); + void _add_draw_list_begin(FramebufferCache *p_framebuffer_cache, RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, BitField p_stages, uint32_t p_breadcrumb); void _run_secondary_command_buffer_task(const SecondaryCommandBuffer *p_secondary); void _wait_for_secondary_command_buffer_tasks(); void _run_render_commands(int32_t p_level, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, RDD::CommandBufferID &r_command_buffer, CommandBufferPool &r_command_buffer_pool, int32_t &r_current_label_index, int32_t &r_current_label_level); @@ -757,8 +759,8 @@ class RenderingDeviceGraph { void add_compute_list_usage(ResourceTracker *p_tracker, ResourceUsage p_usage); void add_compute_list_usages(VectorView p_trackers, VectorView p_usages); void add_compute_list_end(); - void add_draw_list_begin(FramebufferCache *p_framebuffer_cache, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, bool p_uses_color, bool p_uses_depth, uint32_t p_breadcrumb = 0); - void add_draw_list_begin(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, bool p_uses_color, bool p_uses_depth, uint32_t p_breadcrumb = 0); + void add_draw_list_begin(FramebufferCache *p_framebuffer_cache, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, BitField p_stages, uint32_t p_breadcrumb = 0); + void add_draw_list_begin(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, BitField p_stages, uint32_t p_breadcrumb = 0); void add_draw_list_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint32_t p_offset); void add_draw_list_bind_pipeline(RDD::PipelineID p_pipeline, BitField p_pipeline_stage_bits); void add_draw_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); diff --git a/servers/xr/xr_vrs.cpp b/servers/xr/xr_vrs.cpp index 3e283a42f061..6598a2a97a5a 100644 --- a/servers/xr/xr_vrs.cpp +++ b/servers/xr/xr_vrs.cpp @@ -91,13 +91,12 @@ void XRVRS::set_vrs_strength(float p_vrs_strength) { RID XRVRS::make_vrs_texture(const Size2 &p_target_size, const PackedVector2Array &p_eye_foci) { ERR_FAIL_COND_V(p_eye_foci.is_empty(), RID()); - int32_t texel_width = RD::get_singleton()->limit_get(RD::LIMIT_VRS_TEXEL_WIDTH); - int32_t texel_height = RD::get_singleton()->limit_get(RD::LIMIT_VRS_TEXEL_HEIGHT); + Size2i texel_size = RD::get_singleton()->vrs_get_texel_size(); // Should return sensible data or graphics API does not support VRS. - ERR_FAIL_COND_V(texel_width < 1 || texel_height < 1, RID()); + ERR_FAIL_COND_V(texel_size.x < 1 || texel_size.y < 1, RID()); - Size2 vrs_size = Size2(0.5 + p_target_size.x / texel_width, 0.5 + p_target_size.y / texel_height).round(); + Size2 vrs_size = Size2(0.5 + p_target_size.x / texel_size.x, 0.5 + p_target_size.y / texel_size.y).round(); // Make sure we have at least one pixel. vrs_size = vrs_size.maxf(1.0); @@ -130,16 +129,18 @@ RID XRVRS::make_vrs_texture(const Size2 &p_target_size, const PackedVector2Array Vector2i view_center; view_center.x = int(vrs_size.x * (eye_foci[i].x + 1.0) * 0.5); - view_center.y = int(vrs_size.y * (eye_foci[i].y + 1.0) * 0.5); + view_center.y = int(vrs_size.y * (-eye_foci[i].y + 1.0) * 0.5); int d = 0; for (int y = 0; y < vrs_sizei.y; y++) { for (int x = 0; x < vrs_sizei.x; x++) { + // Generate a density map that represents the distance to the view focus point. While this leaves the opportunities + // offered by the density map being different in each direction currently unused, it was found to give better tile + // distribution on hardware that supports the feature natively. This area is open to improvements in the future. Vector2 offset = Vector2(x - view_center.x, y - view_center.y); - real_t density = 255.0 * MAX(0.0, (Math::abs(offset.x) - min_radius) / outer_radius); - data_ptr[d++] = MIN(255, density); - density = 255.0 * MAX(0.0, (Math::abs(offset.y) - min_radius) / outer_radius); - data_ptr[d++] = MIN(255, density); + real_t density = MAX(offset.length() - min_radius, 0.0) / outer_radius; + data_ptr[d++] = CLAMP(255.0 * density, 0, 255); + data_ptr[d++] = CLAMP(255.0 * density, 0, 255); } } images.push_back(Image::create_from_data(vrs_sizei.x, vrs_sizei.y, false, Image::FORMAT_RG8, data));