From ffee765a1f65ef6226e1ca5acdf3d8d198701bc1 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Fri, 22 Dec 2023 11:40:00 +0100 Subject: [PATCH 01/38] Add compute sample. --- src/CMakeLists.txt | 1 + src/Samples/Compute/CMakeLists.txt | 110 +++++ .../Compute/shaders/compute_blur_cs.hlsl | 48 +++ .../Compute/shaders/compute_geom_fs.hlsl | 23 + .../Compute/shaders/compute_geom_vs.hlsl | 41 ++ src/Samples/Compute/src/main.cpp | 165 +++++++ src/Samples/Compute/src/sample.cpp | 403 ++++++++++++++++++ src/Samples/Compute/src/sample.h | 113 +++++ 8 files changed, 904 insertions(+) create mode 100644 src/Samples/Compute/CMakeLists.txt create mode 100644 src/Samples/Compute/shaders/compute_blur_cs.hlsl create mode 100644 src/Samples/Compute/shaders/compute_geom_fs.hlsl create mode 100644 src/Samples/Compute/shaders/compute_geom_vs.hlsl create mode 100644 src/Samples/Compute/src/main.cpp create mode 100644 src/Samples/Compute/src/sample.cpp create mode 100644 src/Samples/Compute/src/sample.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 68280db82..5cdc8a79a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -55,6 +55,7 @@ IF(BUILD_EXAMPLES) ADD_SUBDIRECTORY(Samples/Multisampling) ADD_SUBDIRECTORY(Samples/Multithreading) ADD_SUBDIRECTORY(Samples/Bindless) + ADD_SUBDIRECTORY(Samples/Compute) ENDIF(BUILD_EXAMPLES) # Include tests. diff --git a/src/Samples/Compute/CMakeLists.txt b/src/Samples/Compute/CMakeLists.txt new file mode 100644 index 000000000..9132a8fb7 --- /dev/null +++ b/src/Samples/Compute/CMakeLists.txt @@ -0,0 +1,110 @@ +################################################################################################### +##### ##### +##### LiteFX.Samples.Compute - Contains the compute shader sample. ##### +##### ##### +################################################################################################### + +PROJECT(LiteFX.Samples.Compute VERSION ${LITEFX_VERSION} LANGUAGES CXX) +MESSAGE(STATUS "Initializing: ${PROJECT_NAME}...") + +IF(NOT BUILD_WITH_GLM) + MESSAGE(FATAL_ERROR "This sample requires the glm converters for the math module. Set the BUILD_WITH_GLM option to ON and retry.") +ENDIF(NOT BUILD_WITH_GLM) + +# Resolve package dependencies. +FIND_PACKAGE(glfw3 CONFIG REQUIRED) +FIND_PACKAGE(cli11 CONFIG REQUIRED) +FIND_PATH(RENDERDOC_INCLUDE_DIR "renderdoc_app.h") + +IF(NOT RENDERDOC_INCLUDE_DIR AND BUILD_EXAMPLES_RENDERDOC_LOADER) + MESSAGE(WARNING "RenderDoc is not installed on the system. Loader will not be created.") + SET(BUILD_EXAMPLES_RENDERDOC_LOADER OFF CACHE BOOL "" FORCE) +ENDIF(NOT RENDERDOC_INCLUDE_DIR AND BUILD_EXAMPLES_RENDERDOC_LOADER) + +CONFIGURE_FILE("../config.tmpl" "${CMAKE_CURRENT_BINARY_DIR}/src/config.h") + +# Collect header & source files. +SET(SAMPLE_BASIC_RENDERING_HEADERS + "src/sample.h" +) + +SET(SAMPLE_BASIC_RENDERING_SOURCES + "src/main.cpp" + "src/sample.cpp" +) + +# Add shared library project. +ADD_EXECUTABLE(${PROJECT_NAME} + ${SAMPLE_BASIC_RENDERING_HEADERS} + ${SAMPLE_BASIC_RENDERING_SOURCES} + "${CMAKE_CURRENT_BINARY_DIR}/src/config.h" +) + +# Create source groups for better code organization. +SOURCE_GROUP(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SAMPLE_BASIC_RENDERING_HEADERS} ${SAMPLE_BASIC_RENDERING_SOURCES}) + +# Setup project properties. +SET_TARGET_PROPERTIES(${PROJECT_NAME} PROPERTIES + FOLDER "Samples" + VERSION ${LITEFX_VERSION} + SOVERSION ${LITEFX_YEAR} +) + +# Setup target include directories. +TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/src/") + +IF(BUILD_EXAMPLES_RENDERDOC_LOADER) + TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PRIVATE ${RENDERDOC_INCLUDE_DIR}) +ENDIF(BUILD_EXAMPLES_RENDERDOC_LOADER) + +# Link project dependencies. +TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE LiteFX.Core LiteFX.Math LiteFX.AppModel LiteFX.Rendering glfw CLI11::CLI11) + +IF(BUILD_VULKAN_BACKEND) + TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE LiteFX.Backends.Vulkan) + + ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Geom.VS SOURCE "shaders/compute_geom_vs.hlsl" LANGUAGE HLSL TYPE VERTEX COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Geom.FS SOURCE "shaders/compute_geom_fs.hlsl" LANGUAGE HLSL TYPE FRAGMENT COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Blur.CS SOURCE "shaders/compute_blur_cs.hlsl" LANGUAGE HLSL TYPE COMPUTE COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.Geom.VS PROPERTIES FOLDER "Samples/Shaders/Vulkan") + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.Geom.FS PROPERTIES FOLDER "Samples/Shaders/Vulkan") + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.Blur.CS PROPERTIES FOLDER "Samples/Shaders/Vulkan") + + TARGET_LINK_SHADERS(${PROJECT_NAME} + INSTALL_DESTINATION "${CMAKE_INSTALL_BINARY_DIR}/${SHADER_DEFAULT_SUBDIR}" + SHADERS ${PROJECT_NAME}.Vk.Shaders.Geom.VS ${PROJECT_NAME}.Vk.Shaders.Geom.FS ${PROJECT_NAME}.Vk.Shaders.Blur.CS + ) +ENDIF(BUILD_VULKAN_BACKEND) + +IF(BUILD_DIRECTX_12_BACKEND) + TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE LiteFX.Backends.DirectX12) + + ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Geom.VS SOURCE "shaders/compute_geom_vs.hlsl" LANGUAGE HLSL TYPE VERTEX COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Geom.PS SOURCE "shaders/compute_geom_fs.hlsl" LANGUAGE HLSL TYPE PIXEL COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Blur.CS SOURCE "shaders/compute_blur_cs.hlsl" LANGUAGE HLSL TYPE COMPUTE COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.Geom.VS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.Geom.PS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.Blur.CS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") + + TARGET_LINK_SHADERS(${PROJECT_NAME} + INSTALL_DESTINATION "${CMAKE_INSTALL_BINARY_DIR}/${SHADER_DEFAULT_SUBDIR}" + SHADERS ${PROJECT_NAME}.Dx.Shaders.Geom.VS ${PROJECT_NAME}.Dx.Shaders.Geom.PS ${PROJECT_NAME}.Dx.Shaders.Blur.CS + ) +ENDIF(BUILD_DIRECTX_12_BACKEND) + +# Re-use pre-compiled core header. +IF(BUILD_PRECOMPILED_HEADERS) + TARGET_PRECOMPILE_HEADERS(${PROJECT_NAME} REUSE_FROM LiteFX.Core) +ENDIF(BUILD_PRECOMPILED_HEADERS) + +# Setup installer. +INSTALL(TARGETS ${PROJECT_NAME} EXPORT LiteFXSamples + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBRARY_DIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBRARY_DIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINARY_DIR} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDE_DIR} +) + +# Export config. +INSTALL(EXPORT LiteFXSamples DESTINATION ${CMAKE_INSTALL_EXPORT_DIR}) +EXPORT(TARGETS ${PROJECT_NAME} FILE LiteFXSamplesBasicRenderingConfig.cmake) \ No newline at end of file diff --git a/src/Samples/Compute/shaders/compute_blur_cs.hlsl b/src/Samples/Compute/shaders/compute_blur_cs.hlsl new file mode 100644 index 000000000..4bef4629b --- /dev/null +++ b/src/Samples/Compute/shaders/compute_blur_cs.hlsl @@ -0,0 +1,48 @@ +#pragma pack_matrix(row_major) + +#define KERNEL_SIZE 8 // NOTE: Must be even! +#define SIGMA 10 + +RWTexture2D FrameBuffer : register(u0, space0); + +// Evaluates gaussian bell curve for a value n. +float gauss(float sigma, int n) +{ + return exp(-pow(n, 2.0) / (2.0 * pow(sigma, 2.0))); +} + +[numthreads(8, 8, 1)] +void main(uint3 id : SV_DispatchThreadID) +{ + int width, height; + FrameBuffer.GetDimensions(width, height); + + // Compute 1D gaussian kernel. + float kernel[KERNEL_SIZE + 1]; + float weight = 0.0; + + [unroll((KERNEL_SIZE / 2) + 1)] + for (int n = 0; n <= KERNEL_SIZE / 2; ++n) + kernel[KERNEL_SIZE / 2 + n] = kernel[KERNEL_SIZE / 2 - n] = gauss(SIGMA, n); + + [unroll(KERNEL_SIZE + 1)] + for (int s = 0; s < KERNEL_SIZE + 1; ++s) + weight += kernel[s]; + + // Compute weighted color. + float3 color = float3(0.0, 0.0, 0.0); + + for (int x = -KERNEL_SIZE / 2; x <= KERNEL_SIZE / 2; ++x) + { + [unroll(KERNEL_SIZE + 1)] + for (int y = -KERNEL_SIZE / 2; y <= KERNEL_SIZE / 2; ++y) + { + // NOTE: This samples pixels on the edges of the frame buffer multiple times, which is inaccurate but serves the purpose of demonstration. + float coefficient = kernel[(KERNEL_SIZE / 2) + x] * kernel[(KERNEL_SIZE / 2) + y]; + int2 sampleLocation = int2(max(min(id.x + x, width), 0), max(min(id.y + y, height), 0)); + color += FrameBuffer.Load(sampleLocation).rgb; + } + } + + FrameBuffer[id.xy] = float4(color / pow(weight, 2.0), 1.0); +} \ No newline at end of file diff --git a/src/Samples/Compute/shaders/compute_geom_fs.hlsl b/src/Samples/Compute/shaders/compute_geom_fs.hlsl new file mode 100644 index 000000000..5040528e3 --- /dev/null +++ b/src/Samples/Compute/shaders/compute_geom_fs.hlsl @@ -0,0 +1,23 @@ +#pragma pack_matrix(row_major) + +struct VertexData +{ + float4 Position : SV_POSITION; + float4 Color : COLOR; +}; + +struct FragmentData +{ + float4 Color : SV_TARGET; + float Depth : SV_DEPTH; +}; + +FragmentData main(VertexData input) +{ + FragmentData fragment; + + fragment.Depth = input.Position.z; + fragment.Color = input.Color; + + return fragment; +} \ No newline at end of file diff --git a/src/Samples/Compute/shaders/compute_geom_vs.hlsl b/src/Samples/Compute/shaders/compute_geom_vs.hlsl new file mode 100644 index 000000000..1b21c5ff8 --- /dev/null +++ b/src/Samples/Compute/shaders/compute_geom_vs.hlsl @@ -0,0 +1,41 @@ +#pragma pack_matrix(row_major) + +struct VertexData +{ + float4 Position : SV_POSITION; + float4 Color : COLOR; +}; + +struct VertexInput +{ + //[[vk::location(0)]] + float3 Position : POSITION; + + //[[vk::location(1)]] + float4 Color : COLOR; +}; + +struct CameraData +{ + float4x4 ViewProjection; +}; + +struct TransformData +{ + float4x4 Model; +}; + +ConstantBuffer camera : register(b0, space0); +ConstantBuffer transform : register(b0, space1); + +VertexData main(in VertexInput input) +{ + VertexData vertex; + + float4 position = mul(float4(input.Position, 1.0), transform.Model); + vertex.Position = mul(position, camera.ViewProjection); + + vertex.Color = input.Color; + + return vertex; +} \ No newline at end of file diff --git a/src/Samples/Compute/src/main.cpp b/src/Samples/Compute/src/main.cpp new file mode 100644 index 000000000..bae82f6a8 --- /dev/null +++ b/src/Samples/Compute/src/main.cpp @@ -0,0 +1,165 @@ +#define LITEFX_DEFINE_GLOBAL_EXPORTS +#define LITEFX_AUTO_IMPORT_BACKEND_HEADERS +#include + +#include "sample.h" + +// CLI11 parses optional values as double by default, which yields an implicit-cast warning. +#pragma warning(disable: 4244) + +#include +#include +#include +#include + +#ifdef BUILD_EXAMPLES_DX12_PIX_LOADER +bool loadPixCapturer() +{ + // Check if Pix has already been loaded. + if (::GetModuleHandleW(L"WinPixGpuCapturer.dll") != 0) + return true; + + // Search for latest version of Pix. + LPWSTR programFilesPath = nullptr; + ::SHGetKnownFolderPath(FOLDERID_ProgramFiles, KF_FLAG_DEFAULT, NULL, &programFilesPath); + + std::filesystem::path pixInstallationPath = programFilesPath; + pixInstallationPath /= "Microsoft PIX"; + + std::wstring newestVersionFound; + + for (auto const& directory_entry : std::filesystem::directory_iterator(pixInstallationPath)) + if (directory_entry.is_directory()) + if (newestVersionFound.empty() || newestVersionFound < directory_entry.path().filename().c_str()) + newestVersionFound = directory_entry.path().filename().c_str(); + + if (newestVersionFound.empty()) + return false; + + auto pixPath = pixInstallationPath / newestVersionFound / L"WinPixGpuCapturer.dll"; + std::wcout << "Found PIX: " << pixPath.c_str() << std::endl; + ::LoadLibraryW(pixPath.c_str()); + + return true; +} +#endif // BUILD_EXAMPLES_DX12_PIX_LOADER + +#ifdef BUILD_EXAMPLES_RENDERDOC_LOADER +RENDERDOC_API_1_5_0* renderDoc = nullptr; + +bool loadRenderDocApi() +{ + HMODULE renderDocModule = ::GetModuleHandleW(L"renderdoc.dll"); + + if (renderDocModule != 0) + { + pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)::GetProcAddress(renderDocModule, "RENDERDOC_GetAPI"); + int result = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_5_0, reinterpret_cast(&::renderDoc)); + + return result == 1; + } + + return false; +} +#endif // BUILD_EXAMPLES_RENDERDOC_LOADER + +int main(const int argc, const char** argv) +{ +#if WIN32 + // Enable console colors. + HANDLE console = ::GetStdHandle(STD_OUTPUT_HANDLE); + DWORD consoleMode = 0; + + if (console == INVALID_HANDLE_VALUE || !::GetConsoleMode(console, &consoleMode)) + return ::GetLastError(); + + ::SetConsoleMode(console, consoleMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING); +#endif + + // Parse the command line parameters. + const String appName = SampleApp::Name(); + + CLI::App app{ "Demonstrates compute shaders, dedicated queue usage and post-processing techniques.", appName }; + + Optional adapterId; + app.add_option("-a,--adapter", adapterId)->take_first(); + auto validationLayers = app.add_option("-l,--vk-validation-layers")->take_all(); + +#ifdef BUILD_EXAMPLES_DX12_PIX_LOADER + bool loadPix{ false }; + app.add_option("--dx-load-pix", loadPix)->take_first(); +#endif // BUILD_EXAMPLES_DX12_PIX_LOADER + +#ifdef BUILD_EXAMPLES_RENDERDOC_LOADER + bool loadRenderDoc{ false }; + app.add_option("--load-render-doc", loadRenderDoc)->take_first(); +#endif // BUILD_EXAMPLES_RENDERDOC_LOADER + + try + { + app.parse(argc, argv); + } + catch (const CLI::ParseError& ex) + { + return app.exit(ex); + } + +#ifdef BUILD_EXAMPLES_DX12_PIX_LOADER + if (loadPix && !loadPixCapturer()) + std::cout << "No PIX distribution found. Make sure you have installed PIX for Windows." << std::endl; +#endif // BUILD_EXAMPLES_DX12_PIX_LOADER + +#ifdef BUILD_EXAMPLES_RENDERDOC_LOADER + if (loadRenderDoc && !loadRenderDocApi()) + std::cout << "RenderDoc API could not be loaded. Make sure you have version 1.5 or higher installed on your system." << std::endl; +#endif // BUILD_EXAMPLES_RENDERDOC_LOADER + + // Turn the validation layers into a list. + Array enabledLayers; + + if (validationLayers->count() > 0) + for (const auto& result : validationLayers->results()) + enabledLayers.push_back(result); + + // Create glfw window. + if (!::glfwInit()) + throw std::runtime_error("Unable to initialize glfw."); + + ::glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); + ::glfwWindowHint(GLFW_RESIZABLE, GLFW_TRUE); + + auto window = GlfwWindowPtr(::glfwCreateWindow(800, 600, appName.c_str(), nullptr, nullptr)); + + // Get the required Vulkan extensions from glfw. + uint32_t extensions = 0; + const char** extensionNames = ::glfwGetRequiredInstanceExtensions(&extensions); + Array requiredExtensions; + + for (uint32_t i(0); i < extensions; ++i) + requiredExtensions.push_back(String(extensionNames[i])); + + // Create the app. + try + { + UniquePtr app = App::build(std::move(window), adapterId) + .logTo(LogLevel::Trace) + .logTo("sample.log", LogLevel::Debug) +#ifdef BUILD_VULKAN_BACKEND + .useBackend(requiredExtensions, enabledLayers) +#endif // BUILD_VULKAN_BACKEND +#ifdef BUILD_DIRECTX_12_BACKEND + .useBackend() +#endif // BUILD_DIRECTX_12_BACKEND + ; + + app->run(); + } + catch (const LiteFX::Exception& ex) + { + std::cerr << "\033[3;41;37mUnhandled exception: " << ex.what() << "\033[0m" << std::endl; + + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/src/Samples/Compute/src/sample.cpp b/src/Samples/Compute/src/sample.cpp new file mode 100644 index 000000000..fa309b8b3 --- /dev/null +++ b/src/Samples/Compute/src/sample.cpp @@ -0,0 +1,403 @@ +#include "sample.h" +#include + +enum DescriptorSets : UInt32 +{ + Constant = 0, // All buffers that are immutable. + PerFrame = 1, // All buffers that are updated each frame. +}; + +const Array vertices = +{ + { { -0.5f, -0.5f, 0.5f }, { 1.0f, 0.0f, 0.0f, 1.0f }, { 0.0f, 0.0f, 0.0f }, { 0.0f, 0.0f } }, + { { 0.5f, 0.5f, 0.5f }, { 0.0f, 1.0f, 0.0f, 1.0f }, { 0.0f, 0.0f, 0.0f }, { 0.0f, 0.0f } }, + { { -0.5f, 0.5f, -0.5f }, { 0.0f, 0.0f, 1.0f, 1.0f }, { 0.0f, 0.0f, 0.0f }, { 0.0f, 0.0f } }, + { { 0.5f, -0.5f, -0.5f }, { 1.0f, 1.0f, 1.0f, 1.0f }, { 0.0f, 0.0f, 0.0f }, { 0.0f, 0.0f } } +}; + +const Array indices = { 0, 2, 1, 0, 1, 3, 0, 3, 2, 1, 2, 3 }; + +struct CameraBuffer { + glm::mat4 ViewProjection; +} camera; + +struct TransformBuffer { + glm::mat4 World; +} transform; + +template requires + rtti::implements +struct FileExtensions { + static const String SHADER; +}; + +#ifdef BUILD_VULKAN_BACKEND +const String FileExtensions::SHADER = "spv"; +#endif // BUILD_VULKAN_BACKEND +#ifdef BUILD_DIRECTX_12_BACKEND +const String FileExtensions::SHADER = "dxi"; +#endif // BUILD_DIRECTX_12_BACKEND + +template requires + rtti::implements +void initRenderGraph(TRenderBackend* backend, SharedPtr& inputAssemblerState) +{ + using RenderPass = TRenderBackend::render_pass_type; + using RenderPipeline = TRenderBackend::render_pipeline_type; + using PipelineLayout = TRenderBackend::pipeline_layout_type; + using ShaderProgram = TRenderBackend::shader_program_type; + using InputAssembler = TRenderBackend::input_assembler_type; + using Rasterizer = TRenderBackend::rasterizer_type; + + // Get the default device. + auto device = backend->device("Default"); + + // Create input assembler state. + SharedPtr inputAssembler = device->buildInputAssembler() + .topology(PrimitiveTopology::TriangleList) + .indexType(IndexType::UInt16) + .vertexBuffer(sizeof(Vertex), 0) + .withAttribute(0, BufferFormat::XYZ32F, offsetof(Vertex, Position), AttributeSemantic::Position) + .withAttribute(1, BufferFormat::XYZW32F, offsetof(Vertex, Color), AttributeSemantic::Color) + .add(); + + inputAssemblerState = std::static_pointer_cast(inputAssembler); + + // Create a geometry render pass. + UniquePtr renderPass = device->buildRenderPass("Opaque") + .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, {0.1f, 0.1f, 0.1f, 1.f}, true, false, false) + .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, {1.f, 0.f, 0.f, 0.f}, true, false, false); + + // Create the shader program. + SharedPtr shaderProgram = device->buildShaderProgram() + .withVertexShaderModule("shaders/compute_geom_vs." + FileExtensions::SHADER) + .withFragmentShaderModule("shaders/compute_geom_fs." + FileExtensions::SHADER); + + // Create a render pipeline. + UniquePtr renderPipeline = device->buildRenderPipeline(*renderPass, "Geometry") + .inputAssembler(inputAssembler) + .rasterizer(device->buildRasterizer() + .polygonMode(PolygonMode::Solid) + .cullMode(CullMode::BackFaces) + .cullOrder(CullOrder::ClockWise) + .lineWidth(1.f)) + .layout(shaderProgram->reflectPipelineLayout()) + .shaderProgram(shaderProgram); + + // Add the resources to the device state. + device->state().add(std::move(renderPass)); + device->state().add(std::move(renderPipeline)); +} + +void SampleApp::initBuffers(IRenderBackend* backend) +{ + // Get a command buffer + auto commandBuffer = m_device->defaultQueue(QueueType::Transfer).createCommandBuffer(true); + + // Create the staging buffer. + // NOTE: The mapping works, because vertex and index buffers have an alignment of 0, so we can treat the whole buffer as a single element the size of the + // whole buffer. + auto stagedVertices = m_device->factory().createVertexBuffer(*m_inputAssembler->vertexBufferLayout(0), BufferUsage::Staging, vertices.size()); + stagedVertices->map(vertices.data(), vertices.size() * sizeof(::Vertex), 0); + + // Create the actual vertex buffer and transfer the staging buffer into it. + auto vertexBuffer = m_device->factory().createVertexBuffer("Vertex Buffer", *m_inputAssembler->vertexBufferLayout(0), BufferUsage::Resource, vertices.size()); + commandBuffer->transfer(asShared(std::move(stagedVertices)), *vertexBuffer, 0, 0, vertices.size()); + + // Create the staging buffer for the indices. For infos about the mapping see the note about the vertex buffer mapping above. + auto stagedIndices = m_device->factory().createIndexBuffer(*m_inputAssembler->indexBufferLayout(), BufferUsage::Staging, indices.size()); + stagedIndices->map(indices.data(), indices.size() * m_inputAssembler->indexBufferLayout()->elementSize(), 0); + + // Create the actual index buffer and transfer the staging buffer into it. + auto indexBuffer = m_device->factory().createIndexBuffer("Index Buffer", *m_inputAssembler->indexBufferLayout(), BufferUsage::Resource, indices.size()); + commandBuffer->transfer(asShared(std::move(stagedIndices)), *indexBuffer, 0, 0, indices.size()); + + // Initialize the camera buffer. The camera buffer is constant, so we only need to create one buffer, that can be read from all frames. Since this is a + // write-once/read-multiple scenario, we also transfer the buffer to the more efficient memory heap on the GPU. + auto& geometryPipeline = m_device->state().pipeline("Geometry"); + auto& cameraBindingLayout = geometryPipeline.layout()->descriptorSet(DescriptorSets::Constant); + auto cameraBuffer = m_device->factory().createBuffer("Camera", cameraBindingLayout, 0, BufferUsage::Resource); + auto cameraBindings = cameraBindingLayout.allocate({ { .resource = *cameraBuffer } }); + + // Update the camera. Since the descriptor set already points to the proper buffer, all changes are implicitly visible. + this->updateCamera(*commandBuffer, *cameraBuffer); + + // Next, we create the descriptor sets for the transform buffer. The transform changes with every frame. Since we have three frames in flight, we + // create a buffer with three elements and bind the appropriate element to the descriptor set for every frame. + auto& transformBindingLayout = geometryPipeline.layout()->descriptorSet(DescriptorSets::PerFrame); + auto transformBuffer = m_device->factory().createBuffer("Transform", transformBindingLayout, 0, BufferUsage::Dynamic, 3); + auto transformBindings = transformBindingLayout.allocateMultiple(3, { + { { .resource = *transformBuffer, .firstElement = 0, .elements = 1 } }, + { { .resource = *transformBuffer, .firstElement = 1, .elements = 1 } }, + { { .resource = *transformBuffer, .firstElement = 2, .elements = 1 } } + }); + + // End and submit the command buffer. + m_transferFence = commandBuffer->submit(); + + // Add everything to the state. + m_device->state().add(std::move(vertexBuffer)); + m_device->state().add(std::move(indexBuffer)); + m_device->state().add(std::move(cameraBuffer)); + m_device->state().add(std::move(transformBuffer)); + m_device->state().add("Camera Bindings", std::move(cameraBindings)); + std::ranges::for_each(transformBindings, [this, i = 0](auto& binding) mutable { m_device->state().add(fmt::format("Transform Bindings {0}", i++), std::move(binding)); }); +} + +void SampleApp::updateCamera(const ICommandBuffer& commandBuffer, IBuffer& buffer) const +{ + // Calculate the camera view/projection matrix. + auto aspectRatio = m_viewport->getRectangle().width() / m_viewport->getRectangle().height(); + glm::mat4 view = glm::lookAt(glm::vec3(1.5f, 1.5f, 1.5f), glm::vec3(0.0f, 0.0f, 0.0f), glm::vec3(0.0f, 0.0f, 1.0f)); + glm::mat4 projection = glm::perspective(glm::radians(60.0f), aspectRatio, 0.0001f, 1000.0f); + camera.ViewProjection = projection * view; + + // Create a staging buffer and use to transfer the new uniform buffer to. + auto cameraStagingBuffer = m_device->factory().createBuffer(m_device->state().pipeline("Geometry"), DescriptorSets::Constant, 0, BufferUsage::Staging); + cameraStagingBuffer->map(reinterpret_cast(&camera), sizeof(camera)); + commandBuffer.transfer(asShared(std::move(cameraStagingBuffer)), buffer); +} + +void SampleApp::onStartup() +{ + // Run application loop until the window is closed. + while (!::glfwWindowShouldClose(m_window.get())) + { + this->handleEvents(); + this->drawFrame(); + this->updateWindowTitle(); + } +} + +void SampleApp::onShutdown() +{ + // Destroy the window. + ::glfwDestroyWindow(m_window.get()); + ::glfwTerminate(); +} + +void SampleApp::onInit() +{ + ::glfwSetWindowUserPointer(m_window.get(), this); + + ::glfwSetFramebufferSizeCallback(m_window.get(), [](GLFWwindow* window, int width, int height) { + auto app = reinterpret_cast(::glfwGetWindowUserPointer(window)); + app->resize(width, height); + }); + + ::glfwSetKeyCallback(m_window.get(), [](GLFWwindow* window, int key, int scancode, int action, int mods) { + auto app = reinterpret_cast(::glfwGetWindowUserPointer(window)); + app->keyDown(key, scancode, action, mods); + }); + + // Create a callback for backend startup and shutdown. + auto startCallback = [this](TBackend * backend) { + // Store the window handle. + auto window = m_window.get(); + + // Get the proper frame buffer size. + int width, height; + ::glfwGetFramebufferSize(window, &width, &height); + + // Create viewport and scissors. + m_viewport = makeShared(RectF(0.f, 0.f, static_cast(width), static_cast(height))); + m_scissor = makeShared(RectF(0.f, 0.f, static_cast(width), static_cast(height))); + + auto adapter = backend->findAdapter(m_adapterId); + + if (adapter == nullptr) + adapter = backend->findAdapter(std::nullopt); + + auto surface = backend->createSurface(::glfwGetWin32Window(window)); + + // Create the device. + m_device = backend->createDevice("Default", *adapter, std::move(surface), Format::B8G8R8A8_UNORM, m_viewport->getRectangle().extent(), 3); + + // Initialize resources. + ::initRenderGraph(backend, m_inputAssembler); + this->initBuffers(backend); + + return true; + }; + + auto stopCallback = [](TBackend * backend) { + backend->releaseDevice("Default"); + }; + +#ifdef BUILD_VULKAN_BACKEND + // Register the Vulkan backend de-/initializer. + this->onBackendStart(startCallback); + this->onBackendStop(stopCallback); +#endif // BUILD_VULKAN_BACKEND + +#ifdef BUILD_DIRECTX_12_BACKEND + // We do not need to provide a root signature for shader reflection (refer to the project wiki for more information: https://github.com/crud89/LiteFX/wiki/Shader-Development). + DirectX12ShaderProgram::suppressMissingRootSignatureWarning(); + + // Register the DirectX 12 backend de-/initializer. + this->onBackendStart(startCallback); + this->onBackendStop(stopCallback); +#endif // BUILD_DIRECTX_12_BACKEND +} + +void SampleApp::onResize(const void* sender, ResizeEventArgs e) +{ + // In order to re-create the swap chain, we need to wait for all frames in flight to finish. + m_device->wait(); + + // Resize the frame buffer and recreate the swap chain. + auto surfaceFormat = m_device->swapChain().surfaceFormat(); + auto renderArea = Size2d(e.width(), e.height()); + m_device->swapChain().reset(surfaceFormat, renderArea, 3); + + // NOTE: Important to do this in order, since dependencies (i.e. input attachments) are re-created and might be mapped to images that do no longer exist when a dependency + // gets re-created. This is hard to detect, since some frame buffers can have a constant size, that does not change with the render area and do not need to be + // re-created. We should either think of a clever implicit dependency management for this, or at least document this behavior! + m_device->state().renderPass("Opaque").resizeFrameBuffers(renderArea); + + // Also resize viewport and scissor. + m_viewport->setRectangle(RectF(0.f, 0.f, static_cast(e.width()), static_cast(e.height()))); + m_scissor->setRectangle(RectF(0.f, 0.f, static_cast(e.width()), static_cast(e.height()))); + + // Also update the camera. + auto& cameraBuffer = m_device->state().buffer("Camera"); + auto commandBuffer = m_device->defaultQueue(QueueType::Transfer).createCommandBuffer(true); + this->updateCamera(*commandBuffer, cameraBuffer); + m_transferFence = commandBuffer->submit(); +} + +void SampleApp::keyDown(int key, int scancode, int action, int mods) +{ +#ifdef BUILD_VULKAN_BACKEND + if (key == GLFW_KEY_F9 && action == GLFW_PRESS) + this->startBackend(); +#endif // BUILD_VULKAN_BACKEND + +#ifdef BUILD_DIRECTX_12_BACKEND + if (key == GLFW_KEY_F10 && action == GLFW_PRESS) + this->startBackend(); +#endif // BUILD_DIRECTX_12_BACKEND + + if (key == GLFW_KEY_F8 && action == GLFW_PRESS) + { + static RectI windowRect; + + // Check if we're switching from fullscreen to windowed or the other way around. + if (::glfwGetWindowMonitor(m_window.get()) == nullptr) + { + // Find the monitor, that contains most of the window. + RectI clientRect, monitorRect; + GLFWmonitor* currentMonitor = nullptr; + const GLFWvidmode* currentVideoMode = nullptr; + int monitorCount; + + ::glfwGetWindowPos(m_window.get(), &clientRect.x(), &clientRect.y()); + ::glfwGetWindowSize(m_window.get(), &clientRect.width(), &clientRect.height()); + auto monitors = ::glfwGetMonitors(&monitorCount); + int highestOverlap = 0; + + for (int i(0); i < monitorCount; ++i) + { + auto monitor = monitors[i]; + auto mode = ::glfwGetVideoMode(monitor); + ::glfwGetMonitorPos(monitor, &monitorRect.x(), &monitorRect.y()); + monitorRect.width() = mode->width; + monitorRect.height() = mode->height; + + auto overlap = + std::max(0, std::min(clientRect.x() + clientRect.width(), monitorRect.x() + monitorRect.width()) - std::max(clientRect.x(), monitorRect.x())) * + std::max(0, std::min(clientRect.y() + clientRect.height(), monitorRect.y() + monitorRect.height()) - std::max(clientRect.y(), monitorRect.y())); + + if (highestOverlap < overlap) + { + highestOverlap = overlap; + currentMonitor = monitor; + currentVideoMode = mode; + } + } + + // Save the current window rect in order to restore it later. + windowRect = clientRect; + + // Switch to fullscreen. + if (currentVideoMode != nullptr) + ::glfwSetWindowMonitor(m_window.get(), currentMonitor, 0, 0, currentVideoMode->width, currentVideoMode->height, currentVideoMode->refreshRate); + } + else + { + // NOTE: If we were to launch in fullscreen mode, we should use something like `max(windowRect.width(), defaultWidth)`. + ::glfwSetWindowMonitor(m_window.get(), nullptr, windowRect.x(), windowRect.y(), windowRect.width(), windowRect.height(), 0); + } + } + + if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS) + { + // Close the window with the next loop. + ::glfwSetWindowShouldClose(m_window.get(), GLFW_TRUE); + } +} + +void SampleApp::updateWindowTitle() +{ + static auto lastTime = std::chrono::high_resolution_clock::now(); + auto frameTime = std::chrono::duration(std::chrono::high_resolution_clock::now() - lastTime).count(); + + std::stringstream title; + title << this->name() << " | " << "Backend: " << this->activeBackend(BackendType::Rendering)->name() << " | " << static_cast(1000.0f / frameTime) << " FPS"; + + ::glfwSetWindowTitle(m_window.get(), title.str().c_str()); + lastTime = std::chrono::high_resolution_clock::now(); +} + +void SampleApp::handleEvents() +{ + ::glfwPollEvents(); +} + +void SampleApp::drawFrame() +{ + // Store the initial time this method has been called first. + static auto start = std::chrono::high_resolution_clock::now(); + + // Swap the back buffers for the next frame. + auto backBuffer = m_device->swapChain().swapBackBuffer(); + + // Query state. For performance reasons, those state variables should be cached for more complex applications, instead of looking them up every frame. + auto& renderPass = m_device->state().renderPass("Opaque"); + auto& geometryPipeline = m_device->state().pipeline("Geometry"); + auto& transformBuffer = m_device->state().buffer("Transform"); + auto& cameraBindings = m_device->state().descriptorSet("Camera Bindings"); + auto& transformBindings = m_device->state().descriptorSet(fmt::format("Transform Bindings {0}", backBuffer)); + auto& vertexBuffer = m_device->state().vertexBuffer("Vertex Buffer"); + auto& indexBuffer = m_device->state().indexBuffer("Index Buffer"); + + // Wait for all transfers to finish. + renderPass.commandQueue().waitFor(m_device->defaultQueue(QueueType::Transfer), m_transferFence); + + // Begin rendering on the render pass and use the only pipeline we've created for it. + renderPass.begin(backBuffer); + auto commandBuffer = renderPass.activeFrameBuffer().commandBuffer(0); + commandBuffer->use(geometryPipeline); + commandBuffer->setViewports(m_viewport.get()); + commandBuffer->setScissors(m_scissor.get()); + + // Get the amount of time that has passed since the first frame. + auto now = std::chrono::high_resolution_clock::now(); + auto time = std::chrono::duration(now - start).count(); + + // Compute world transform and update the transform buffer. + transform.World = glm::rotate(glm::mat4(1.0f), time * glm::radians(42.0f), glm::vec3(0.0f, 0.0f, 1.0f)); + transformBuffer.map(reinterpret_cast(&transform), sizeof(transform), backBuffer); + + // Bind both descriptor sets to the pipeline. + commandBuffer->bind(cameraBindings); + commandBuffer->bind(transformBindings); + + // Bind the vertex and index buffers. + commandBuffer->bind(vertexBuffer); + commandBuffer->bind(indexBuffer); + + // Draw the object and present the frame by ending the render pass. + commandBuffer->drawIndexed(indexBuffer.elements()); + renderPass.end(); +} \ No newline at end of file diff --git a/src/Samples/Compute/src/sample.h b/src/Samples/Compute/src/sample.h new file mode 100644 index 000000000..efb4fbcc7 --- /dev/null +++ b/src/Samples/Compute/src/sample.h @@ -0,0 +1,113 @@ +#pragma once + +#define LITEFX_AUTO_IMPORT_BACKEND_HEADERS +#include + +#if (defined _WIN32 || defined WINCE) +# define GLFW_EXPOSE_NATIVE_WIN32 +#else +# pragma message ("Compute Sample: No supported surface platform detected.") +#endif + +#include +#include +#include + +#include "config.h" + +#ifdef BUILD_EXAMPLES_RENDERDOC_LOADER +#include + +extern RENDERDOC_API_1_5_0* renderDoc; +#endif + +using namespace LiteFX; +using namespace LiteFX::Rendering; +using namespace LiteFX::Rendering::Backends; + +struct GlfwWindowDeleter { + void operator()(GLFWwindow* ptr) noexcept { + ::glfwDestroyWindow(ptr); + } +}; + +typedef UniquePtr GlfwWindowPtr; + +class SampleApp : public LiteFX::App { +public: + static String Name() noexcept { return "LiteFX Sample: Compute"; } + String name() const noexcept override { return Name(); } + + static AppVersion Version() noexcept { return AppVersion(1, 0, 0, 0); } + AppVersion version() const noexcept override { return Version(); } + +private: + /// + /// Stores the GLFW window pointer. + /// + GlfwWindowPtr m_window; + + /// + /// Stores the preferred adapter ID (std::nullopt, if the default adapter is used). + /// + Optional m_adapterId; + + /// + /// Stores a reference of the input assembler state. + /// + SharedPtr m_inputAssembler; + + /// + /// Stores the viewport. + /// + SharedPtr m_viewport; + + /// + /// Stores the scissor. + /// + SharedPtr m_scissor; + + /// + /// Stores a pointer to the currently active device. + /// + IGraphicsDevice* m_device; + + /// + /// Stores the fence created at application load time. + /// + UInt64 m_transferFence = 0; + +public: + SampleApp(GlfwWindowPtr&& window, Optional adapterId) : + App(), m_window(std::move(window)), m_adapterId(adapterId), m_device(nullptr) + { + this->initializing += std::bind(&SampleApp::onInit, this); + this->startup += std::bind(&SampleApp::onStartup, this); + this->resized += std::bind(&SampleApp::onResize, this, std::placeholders::_1, std::placeholders::_2); + this->shutdown += std::bind(&SampleApp::onShutdown, this); + } + +private: + /// + /// Initializes the buffers. + /// + /// The render backend to use. + void initBuffers(IRenderBackend* backend); + + /// + /// Updates the camera buffer. This needs to be done whenever the frame buffer changes, since we need to pass changes in the aspect ratio to the view/projection matrix. + /// + void updateCamera(const ICommandBuffer& commandBuffer, IBuffer& buffer) const; + +private: + void onInit(); + void onStartup(); + void onShutdown(); + void onResize(const void* sender, ResizeEventArgs e); + +public: + void keyDown(int key, int scancode, int action, int mods); + void handleEvents(); + void drawFrame(); + void updateWindowTitle(); +}; \ No newline at end of file From d774df80303350357faa71a8fd88016ad2cd9fe2 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Fri, 22 Dec 2023 12:24:39 +0100 Subject: [PATCH 02/38] Add shaders for presentation. --- src/Samples/Compute/CMakeLists.txt | 24 ++-- .../Compute/shaders/compute_present_fs.hlsl | 18 +++ .../Compute/shaders/compute_present_vs.hlsl | 36 ++++++ src/Samples/Compute/src/sample.cpp | 107 +++++++++++++----- 4 files changed, 151 insertions(+), 34 deletions(-) create mode 100644 src/Samples/Compute/shaders/compute_present_fs.hlsl create mode 100644 src/Samples/Compute/shaders/compute_present_vs.hlsl diff --git a/src/Samples/Compute/CMakeLists.txt b/src/Samples/Compute/CMakeLists.txt index 9132a8fb7..22c3ba6c1 100644 --- a/src/Samples/Compute/CMakeLists.txt +++ b/src/Samples/Compute/CMakeLists.txt @@ -63,32 +63,40 @@ TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE LiteFX.Core LiteFX.Math LiteFX.App IF(BUILD_VULKAN_BACKEND) TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE LiteFX.Backends.Vulkan) - ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Geom.VS SOURCE "shaders/compute_geom_vs.hlsl" LANGUAGE HLSL TYPE VERTEX COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) - ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Geom.FS SOURCE "shaders/compute_geom_fs.hlsl" LANGUAGE HLSL TYPE FRAGMENT COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) - ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Blur.CS SOURCE "shaders/compute_blur_cs.hlsl" LANGUAGE HLSL TYPE COMPUTE COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Geom.VS SOURCE "shaders/compute_geom_vs.hlsl" LANGUAGE HLSL TYPE VERTEX COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Geom.FS SOURCE "shaders/compute_geom_fs.hlsl" LANGUAGE HLSL TYPE FRAGMENT COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Pres.VS SOURCE "shaders/compute_present_vs.hlsl" LANGUAGE HLSL TYPE VERTEX COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Pres.FS SOURCE "shaders/compute_present_fs.hlsl" LANGUAGE HLSL TYPE FRAGMENT COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Blur.CS SOURCE "shaders/compute_blur_cs.hlsl" LANGUAGE HLSL TYPE COMPUTE COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.Geom.VS PROPERTIES FOLDER "Samples/Shaders/Vulkan") SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.Geom.FS PROPERTIES FOLDER "Samples/Shaders/Vulkan") + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.Pres.VS PROPERTIES FOLDER "Samples/Shaders/Vulkan") + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.Pres.FS PROPERTIES FOLDER "Samples/Shaders/Vulkan") SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.Blur.CS PROPERTIES FOLDER "Samples/Shaders/Vulkan") TARGET_LINK_SHADERS(${PROJECT_NAME} INSTALL_DESTINATION "${CMAKE_INSTALL_BINARY_DIR}/${SHADER_DEFAULT_SUBDIR}" - SHADERS ${PROJECT_NAME}.Vk.Shaders.Geom.VS ${PROJECT_NAME}.Vk.Shaders.Geom.FS ${PROJECT_NAME}.Vk.Shaders.Blur.CS + SHADERS ${PROJECT_NAME}.Vk.Shaders.Geom.VS ${PROJECT_NAME}.Vk.Shaders.Geom.FS ${PROJECT_NAME}.Vk.Shaders.Blur.CS SHADERS ${PROJECT_NAME}.Vk.Shaders.Pres.VS ${PROJECT_NAME}.Vk.Shaders.Pres.FS ) ENDIF(BUILD_VULKAN_BACKEND) IF(BUILD_DIRECTX_12_BACKEND) TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE LiteFX.Backends.DirectX12) - ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Geom.VS SOURCE "shaders/compute_geom_vs.hlsl" LANGUAGE HLSL TYPE VERTEX COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) - ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Geom.PS SOURCE "shaders/compute_geom_fs.hlsl" LANGUAGE HLSL TYPE PIXEL COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) - ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Blur.CS SOURCE "shaders/compute_blur_cs.hlsl" LANGUAGE HLSL TYPE COMPUTE COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Geom.VS SOURCE "shaders/compute_geom_vs.hlsl" LANGUAGE HLSL TYPE VERTEX COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Geom.PS SOURCE "shaders/compute_geom_fs.hlsl" LANGUAGE HLSL TYPE PIXEL COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Pres.VS SOURCE "shaders/compute_present_vs.hlsl" LANGUAGE HLSL TYPE VERTEX COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Pres.PS SOURCE "shaders/compute_present_fs.hlsl" LANGUAGE HLSL TYPE PIXEL COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Blur.CS SOURCE "shaders/compute_blur_cs.hlsl" LANGUAGE HLSL TYPE COMPUTE COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.Geom.VS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.Geom.PS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.Pres.VS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.Pres.PS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.Blur.CS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") TARGET_LINK_SHADERS(${PROJECT_NAME} INSTALL_DESTINATION "${CMAKE_INSTALL_BINARY_DIR}/${SHADER_DEFAULT_SUBDIR}" - SHADERS ${PROJECT_NAME}.Dx.Shaders.Geom.VS ${PROJECT_NAME}.Dx.Shaders.Geom.PS ${PROJECT_NAME}.Dx.Shaders.Blur.CS + SHADERS ${PROJECT_NAME}.Dx.Shaders.Geom.VS ${PROJECT_NAME}.Dx.Shaders.Geom.PS ${PROJECT_NAME}.Dx.Shaders.Blur.CS SHADERS ${PROJECT_NAME}.Dx.Shaders.Pres.VS ${PROJECT_NAME}.Dx.Shaders.Pres.PS ) ENDIF(BUILD_DIRECTX_12_BACKEND) diff --git a/src/Samples/Compute/shaders/compute_present_fs.hlsl b/src/Samples/Compute/shaders/compute_present_fs.hlsl new file mode 100644 index 000000000..fc17a9ab5 --- /dev/null +++ b/src/Samples/Compute/shaders/compute_present_fs.hlsl @@ -0,0 +1,18 @@ +#pragma pack_matrix(row_major) + +struct VertexData +{ + float4 Position : SV_POSITION; + float4 Color : COLOR; + float2 TexCoord : TEXCOORD0; +}; + +Texture2D FrameBuffer : register(t0, space0); + +float4 main(VertexData vertex) : SV_TARGET +{ + int width, height; + FrameBuffer.GetDimensions(width, height); + + return FrameBuffer.Load(int3(vertex.TexCoord * float2(width, height), 0)); +} \ No newline at end of file diff --git a/src/Samples/Compute/shaders/compute_present_vs.hlsl b/src/Samples/Compute/shaders/compute_present_vs.hlsl new file mode 100644 index 000000000..a00f35283 --- /dev/null +++ b/src/Samples/Compute/shaders/compute_present_vs.hlsl @@ -0,0 +1,36 @@ +#pragma pack_matrix(row_major) + +struct VertexData +{ + float4 Position : SV_POSITION; + float4 Color : COLOR; + float2 TexCoord : TEXCOORD0; +}; + +static float4 Corners[] = +{ + float4(-1.0f, -1.0f, 0.0f, 1.0f), + float4( 1.0f, -1.0f, 0.0f, 1.0f), + float4(-1.0f, 1.0f, 0.0f, 1.0f), + float4( 1.0f, 1.0f, 0.0f, 1.0f) +}; + +static float2 TexCoords[] = +{ + float2(0.0, 0.0), + float2(1.0, 0.0), + float2(0.0, 1.0), + float2(1.0, 1.0) +}; + +VertexData main(uint id : SV_InstanceID) +{ + // This shader is intended to be called without any inputs, just 4 instances for which the corners of the screen quad are created. + VertexData vertex; + + vertex.Position = Corners[id]; + vertex.TexCoord = TexCoords[id]; + vertex.Color = float4(0.0, 0.0, 0.0, 1.0); + + return vertex; +} \ No newline at end of file diff --git a/src/Samples/Compute/src/sample.cpp b/src/Samples/Compute/src/sample.cpp index fa309b8b3..185bae6d2 100644 --- a/src/Samples/Compute/src/sample.cpp +++ b/src/Samples/Compute/src/sample.cpp @@ -44,6 +44,7 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA { using RenderPass = TRenderBackend::render_pass_type; using RenderPipeline = TRenderBackend::render_pipeline_type; + using ComputePipeline = TRenderBackend::compute_pipeline_type; using PipelineLayout = TRenderBackend::pipeline_layout_type; using ShaderProgram = TRenderBackend::shader_program_type; using InputAssembler = TRenderBackend::input_assembler_type; @@ -65,7 +66,7 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA // Create a geometry render pass. UniquePtr renderPass = device->buildRenderPass("Opaque") - .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, {0.1f, 0.1f, 0.1f, 1.f}, true, false, false) + .renderTarget("Color Target", RenderTargetType::Color, Format::R8G8B8A8_UNORM, {0.1f, 0.1f, 0.1f, 1.f}, true, false, false) .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, {1.f, 0.f, 0.f, 0.f}, true, false, false); // Create the shader program. @@ -84,9 +85,44 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA .layout(shaderProgram->reflectPipelineLayout()) .shaderProgram(shaderProgram); + // Create the blur shader program. + SharedPtr blurProgram = device->buildShaderProgram() + .withComputeShaderModule("shaders/compute_blur_cs." + FileExtensions::SHADER); + + // Create a compute pipeline. + UniquePtr blurPipeline = device->buildComputePipeline("Blur") + .layout(blurProgram->reflectPipelineLayout()) + .shaderProgram(blurProgram); + + // Build a present render pass. + UniquePtr presentPass = device->buildRenderPass("Present") + .renderTarget("Present Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, { 0.0f, 0.0f, 0.0f, 1.f }, false, false, false); + + // Create a shader program for resolving the blurred image. + SharedPtr presentProgram = device->buildShaderProgram() + .withVertexShaderModule("shaders/compute_present_vs." + FileExtensions::SHADER) + .withFragmentShaderModule("shaders/compute_present_fs." + FileExtensions::SHADER); + + // Create a render pipeline for presentation. + SharedPtr screenQuadAssembler = device->buildInputAssembler() + .topology(PrimitiveTopology::TriangleStrip); + + UniquePtr presentPipeline = device->buildRenderPipeline(*presentPass, "Present") + .inputAssembler(device->buildInputAssembler() + .topology(PrimitiveTopology::TriangleStrip)) + .rasterizer(device->buildRasterizer() + .polygonMode(PolygonMode::Solid) + .cullMode(CullMode::Disabled) + .cullOrder(CullOrder::ClockWise)) + .layout(presentProgram->reflectPipelineLayout()) + .shaderProgram(presentProgram); + // Add the resources to the device state. device->state().add(std::move(renderPass)); + device->state().add(std::move(presentPass)); device->state().add(std::move(renderPipeline)); + device->state().add(std::move(blurPipeline)); + device->state().add(std::move(presentPipeline)); } void SampleApp::initBuffers(IRenderBackend* backend) @@ -364,6 +400,7 @@ void SampleApp::drawFrame() // Query state. For performance reasons, those state variables should be cached for more complex applications, instead of looking them up every frame. auto& renderPass = m_device->state().renderPass("Opaque"); + auto& presentPass = m_device->state().renderPass("Present"); auto& geometryPipeline = m_device->state().pipeline("Geometry"); auto& transformBuffer = m_device->state().buffer("Transform"); auto& cameraBindings = m_device->state().descriptorSet("Camera Bindings"); @@ -371,33 +408,51 @@ void SampleApp::drawFrame() auto& vertexBuffer = m_device->state().vertexBuffer("Vertex Buffer"); auto& indexBuffer = m_device->state().indexBuffer("Index Buffer"); - // Wait for all transfers to finish. - renderPass.commandQueue().waitFor(m_device->defaultQueue(QueueType::Transfer), m_transferFence); - - // Begin rendering on the render pass and use the only pipeline we've created for it. - renderPass.begin(backBuffer); - auto commandBuffer = renderPass.activeFrameBuffer().commandBuffer(0); - commandBuffer->use(geometryPipeline); - commandBuffer->setViewports(m_viewport.get()); - commandBuffer->setScissors(m_scissor.get()); - - // Get the amount of time that has passed since the first frame. - auto now = std::chrono::high_resolution_clock::now(); - auto time = std::chrono::duration(now - start).count(); + // Draw geometry. + { + // Wait for all transfers to finish. + renderPass.commandQueue().waitFor(m_device->defaultQueue(QueueType::Transfer), m_transferFence); + + // Begin rendering on the render pass and use the only pipeline we've created for it. + renderPass.begin(backBuffer); + auto commandBuffer = renderPass.activeFrameBuffer().commandBuffer(0); + commandBuffer->use(geometryPipeline); + commandBuffer->setViewports(m_viewport.get()); + commandBuffer->setScissors(m_scissor.get()); + + // Get the amount of time that has passed since the first frame. + auto now = std::chrono::high_resolution_clock::now(); + auto time = std::chrono::duration(now - start).count(); + + // Compute world transform and update the transform buffer. + transform.World = glm::rotate(glm::mat4(1.0f), time * glm::radians(42.0f), glm::vec3(0.0f, 0.0f, 1.0f)); + transformBuffer.map(reinterpret_cast(&transform), sizeof(transform), backBuffer); + + // Bind both descriptor sets to the pipeline. + commandBuffer->bind(cameraBindings); + commandBuffer->bind(transformBindings); + + // Bind the vertex and index buffers. + commandBuffer->bind(vertexBuffer); + commandBuffer->bind(indexBuffer); + + // Draw the object and present the frame by ending the render pass. + commandBuffer->drawIndexed(indexBuffer.elements()); + renderPass.end(); + } - // Compute world transform and update the transform buffer. - transform.World = glm::rotate(glm::mat4(1.0f), time * glm::radians(42.0f), glm::vec3(0.0f, 0.0f, 1.0f)); - transformBuffer.map(reinterpret_cast(&transform), sizeof(transform), backBuffer); + // Perform post processing on compute queue. + { - // Bind both descriptor sets to the pipeline. - commandBuffer->bind(cameraBindings); - commandBuffer->bind(transformBindings); + } - // Bind the vertex and index buffers. - commandBuffer->bind(vertexBuffer); - commandBuffer->bind(indexBuffer); + // Execute present pass. + { + presentPass.begin(backBuffer); + auto commandBuffer = presentPass.activeFrameBuffer().commandBuffer(0); - // Draw the object and present the frame by ending the render pass. - commandBuffer->drawIndexed(indexBuffer.elements()); - renderPass.end(); + // Draw 4 instances of "nothing" to create the screen quad and end the render pass. + commandBuffer->draw(0, 4); + presentPass.end(); + } } \ No newline at end of file From 5ff693edc12c28fea595cd33a732d50021dc16af Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Fri, 22 Dec 2023 12:42:08 +0100 Subject: [PATCH 03/38] Abstract input attachment mappings, so that they are accessible from `IRenderPass`. --- .../include/litefx/backends/dx12.hpp | 2 +- .../Vulkan/include/litefx/backends/vulkan.hpp | 2 +- src/Rendering/include/litefx/rendering.hpp | 21 +++++------------ .../include/litefx/rendering_api.hpp | 23 ++++++++++++++++++- 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/Backends/DirectX12/include/litefx/backends/dx12.hpp b/src/Backends/DirectX12/include/litefx/backends/dx12.hpp index 4803ce0d9..f787fd10e 100644 --- a/src/Backends/DirectX12/include/litefx/backends/dx12.hpp +++ b/src/Backends/DirectX12/include/litefx/backends/dx12.hpp @@ -1222,7 +1222,7 @@ namespace LiteFX::Rendering::Backends { /// The name of the render pass state resource. explicit DirectX12RenderPass(const DirectX12Device& device, const String& name = "") noexcept; - // IInputAttachmentMappingSource interface. + // InputAttachmentMappingSource interface. public: /// const DirectX12FrameBuffer& frameBuffer(UInt32 buffer) const override; diff --git a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp index e72834e32..f2f50bfa1 100644 --- a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp +++ b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp @@ -1216,7 +1216,7 @@ namespace LiteFX::Rendering::Backends { /// The name of the render pass state resource. explicit VulkanRenderPass(const VulkanDevice& device, const String& name = "") noexcept; - // IInputAttachmentMappingSource interface. + // InputAttachmentMappingSource interface. public: /// const VulkanFrameBuffer& frameBuffer(UInt32 buffer) const override; diff --git a/src/Rendering/include/litefx/rendering.hpp b/src/Rendering/include/litefx/rendering.hpp index 2d6dcf84a..6acf0bd82 100644 --- a/src/Rendering/include/litefx/rendering.hpp +++ b/src/Rendering/include/litefx/rendering.hpp @@ -752,36 +752,27 @@ namespace LiteFX::Rendering { /// /// Represents the source for an input attachment mapping. /// - /// - /// This interface is implemented by a to return the frame buffer for a given back buffer. It is called by a - /// during initialization or re-creation, in order to resolve input attachment dependencies. - /// /// The type of the frame buffer. Must implement . template requires rtti::implements> - class IInputAttachmentMappingSource { + class InputAttachmentMappingSource : public IInputAttachmentMappingSource { public: using frame_buffer_type = TFrameBuffer; public: - virtual ~IInputAttachmentMappingSource() noexcept = default; + virtual ~InputAttachmentMappingSource() noexcept = default; public: - /// - /// Returns the frame buffer with the index provided in . - /// - /// The index of a frame buffer within the source. - /// The frame buffer with the index provided in . - /// Thrown, if the does not map to a frame buffer within the source. + /// virtual const frame_buffer_type& frameBuffer(UInt32 buffer) const = 0; }; /// /// Represents a mapping between a set of instances and the input attachments of a . /// - /// The type of the input attachment mapping source. Must implement . + /// The type of the input attachment mapping source. Must implement . template requires - rtti::implements> + rtti::implements> class IInputAttachmentMapping { public: using input_attachment_mapping_source_type = TInputAttachmentMappingSource; @@ -829,7 +820,7 @@ namespace LiteFX::Rendering { /*rtti::implements> &&*/ rtti::implements> /*&& rtti::implements>*/ - class RenderPass : public virtual StateResource, public IRenderPass, public IInputAttachmentMappingSource { + class RenderPass : public virtual StateResource, public IRenderPass, public InputAttachmentMappingSource { public: using IRenderPass::updateAttachments; diff --git a/src/Rendering/include/litefx/rendering_api.hpp b/src/Rendering/include/litefx/rendering_api.hpp index 67011a11b..09ae30706 100644 --- a/src/Rendering/include/litefx/rendering_api.hpp +++ b/src/Rendering/include/litefx/rendering_api.hpp @@ -4558,10 +4558,31 @@ namespace LiteFX::Rendering { virtual Enumerable getImages() const noexcept = 0; }; + /// + /// Interface for an input attachment mapping source. + /// + /// + /// This interface is implemented by a to return the frame buffer for a given back buffer. It is called by a + /// during initialization or re-creation, in order to resolve input attachment dependencies. + /// + class IInputAttachmentMappingSource { + public: + virtual ~IInputAttachmentMappingSource() noexcept = default; + + public: + /// + /// Returns the frame buffer with the index provided in . + /// + /// The index of a frame buffer within the source. + /// The frame buffer with the index provided in . + /// Thrown, if the does not map to a frame buffer within the source. + virtual const IFrameBuffer& frameBuffer(UInt32 buffer) const = 0; + }; + /// /// The interface for a render pass. /// - class LITEFX_RENDERING_API IRenderPass : public virtual IStateResource { + class LITEFX_RENDERING_API IRenderPass : public virtual IInputAttachmentMappingSource, public virtual IStateResource { public: /// /// Event arguments that are published to subscribers when a render pass is beginning. From df26bec2bcdbb1fa7ffa3fb7ff1c43b0c2f9736c Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Fri, 22 Dec 2023 13:01:23 +0100 Subject: [PATCH 04/38] Define overloads to access layout for const resources. --- src/Backends/DirectX12/include/litefx/backends/dx12.hpp | 1 + src/Backends/Vulkan/include/litefx/backends/vulkan.hpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Backends/DirectX12/include/litefx/backends/dx12.hpp b/src/Backends/DirectX12/include/litefx/backends/dx12.hpp index f787fd10e..5e33008d1 100644 --- a/src/Backends/DirectX12/include/litefx/backends/dx12.hpp +++ b/src/Backends/DirectX12/include/litefx/backends/dx12.hpp @@ -136,6 +136,7 @@ namespace LiteFX::Rendering::Backends { private: virtual ImageLayout& layout(UInt32 subresource) = 0; + virtual ImageLayout layout(UInt32 subresource) const = 0; }; /// diff --git a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp index f2f50bfa1..040f22015 100644 --- a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp +++ b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp @@ -152,6 +152,7 @@ namespace LiteFX::Rendering::Backends { private: virtual ImageLayout& layout(UInt32 subresource) = 0; + virtual ImageLayout layout(UInt32 subresource) const = 0; }; /// From 9de18da8b6e4753735e6f5092e06b2b0edf23cdc Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Fri, 22 Dec 2023 13:11:05 +0100 Subject: [PATCH 05/38] Relax const-ness on swap chain and frame buffer images. --- .../DirectX12/include/litefx/backends/dx12.hpp | 8 ++++---- src/Backends/DirectX12/src/frame_buffer.cpp | 12 ++++++------ src/Backends/DirectX12/src/render_pass.cpp | 2 +- src/Backends/DirectX12/src/swapchain.cpp | 6 +++--- .../Vulkan/include/litefx/backends/vulkan.hpp | 8 ++++---- src/Backends/Vulkan/src/frame_buffer.cpp | 6 +++--- src/Backends/Vulkan/src/swapchain.cpp | 6 +++--- src/Rendering/include/litefx/rendering.hpp | 10 +++++----- src/Rendering/include/litefx/rendering_api.hpp | 12 ++++++------ 9 files changed, 35 insertions(+), 35 deletions(-) diff --git a/src/Backends/DirectX12/include/litefx/backends/dx12.hpp b/src/Backends/DirectX12/include/litefx/backends/dx12.hpp index 5e33008d1..ba8409703 100644 --- a/src/Backends/DirectX12/include/litefx/backends/dx12.hpp +++ b/src/Backends/DirectX12/include/litefx/backends/dx12.hpp @@ -1140,10 +1140,10 @@ namespace LiteFX::Rendering::Backends { SharedPtr commandBuffer(UInt32 index) const override; /// - Enumerable images() const noexcept override; + Enumerable images() const noexcept override; /// - const IDirectX12Image& image(UInt32 location) const override; + IDirectX12Image& image(UInt32 location) const override; public: /// @@ -1398,10 +1398,10 @@ namespace LiteFX::Rendering::Backends { const Size2d& renderArea() const noexcept override; /// - const IDirectX12Image* image(UInt32 backBuffer) const override; + IDirectX12Image* image(UInt32 backBuffer) const override; /// - Enumerable images() const noexcept override; + Enumerable images() const noexcept override; /// void present(const DirectX12FrameBuffer& frameBuffer) const override; diff --git a/src/Backends/DirectX12/src/frame_buffer.cpp b/src/Backends/DirectX12/src/frame_buffer.cpp index ccbf44c95..d595ab615 100644 --- a/src/Backends/DirectX12/src/frame_buffer.cpp +++ b/src/Backends/DirectX12/src/frame_buffer.cpp @@ -12,7 +12,7 @@ class DirectX12FrameBuffer::DirectX12FrameBufferImpl : public Implement> m_outputAttachments; - Array m_renderTargetViews; + Array m_renderTargetViews; Array> m_commandBuffers; ComPtr m_renderTargetHeap, m_depthStencilHeap; UInt32 m_renderTargetDescriptorSize, m_depthStencilDescriptorSize; @@ -71,7 +71,7 @@ class DirectX12FrameBuffer::DirectX12FrameBufferImpl : public ImplementCreateDepthStencilView(renderTargetView->handle().Get(), &depthStencilViewDesc, depthStencilViewDescriptor); + m_renderPass.device().handle()->CreateDepthStencilView(std::as_const(*renderTargetView).handle().Get(), &depthStencilViewDesc, depthStencilViewDescriptor); depthStencilViewDescriptor = depthStencilViewDescriptor.Offset(m_depthStencilDescriptorSize); } else @@ -108,7 +108,7 @@ class DirectX12FrameBuffer::DirectX12FrameBufferImpl : public ImplementCreateRenderTargetView(renderTargetView->handle().Get(), &renderTargetViewDesc, renderTargetViewDescriptor); + m_renderPass.device().handle()->CreateRenderTargetView(std::as_const(*renderTargetView).handle().Get(), &renderTargetViewDesc, renderTargetViewDescriptor); renderTargetViewDescriptor = renderTargetViewDescriptor.Offset(m_renderTargetDescriptorSize); } @@ -187,12 +187,12 @@ Enumerable> DirectX12FrameBuffer::comman return m_impl->m_commandBuffers; } -Enumerable DirectX12FrameBuffer::images() const noexcept +Enumerable DirectX12FrameBuffer::images() const noexcept { return m_impl->m_renderTargetViews; } -const IDirectX12Image& DirectX12FrameBuffer::image(UInt32 location) const +IDirectX12Image& DirectX12FrameBuffer::image(UInt32 location) const { if (location >= m_impl->m_renderTargetViews.size()) throw ArgumentOutOfRangeException("No render target is mapped to location {0}.", location); diff --git a/src/Backends/DirectX12/src/render_pass.cpp b/src/Backends/DirectX12/src/render_pass.cpp index 8368794d1..90150fae4 100644 --- a/src/Backends/DirectX12/src/render_pass.cpp +++ b/src/Backends/DirectX12/src/render_pass.cpp @@ -145,7 +145,7 @@ class DirectX12RenderPass::DirectX12RenderPassImpl : public Implementhandle()->SetName(Widen(m_renderTargets[renderTarget++].name()).c_str()); + std::as_const(*image).handle()->SetName(Widen(m_renderTargets[renderTarget++].name()).c_str()); auto secondaryCommandBuffers = frameBuffer->commandBuffers(); int commandBuffer = 0; diff --git a/src/Backends/DirectX12/src/swapchain.cpp b/src/Backends/DirectX12/src/swapchain.cpp index 7fc0e1ea7..d675237f8 100644 --- a/src/Backends/DirectX12/src/swapchain.cpp +++ b/src/Backends/DirectX12/src/swapchain.cpp @@ -261,7 +261,7 @@ const Size2d& DirectX12SwapChain::renderArea() const noexcept return m_impl->m_renderArea; } -const IDirectX12Image* DirectX12SwapChain::image(UInt32 backBuffer) const +IDirectX12Image* DirectX12SwapChain::image(UInt32 backBuffer) const { if (backBuffer >= m_impl->m_presentImages.size()) [[unlikely]] throw ArgumentOutOfRangeException("The back buffer must be a valid index."); @@ -269,9 +269,9 @@ const IDirectX12Image* DirectX12SwapChain::image(UInt32 backBuffer) const return m_impl->m_presentImages[backBuffer].get(); } -Enumerable DirectX12SwapChain::images() const noexcept +Enumerable DirectX12SwapChain::images() const noexcept { - return m_impl->m_presentImages | std::views::transform([](const UniquePtr& image) { return image.get(); }); + return m_impl->m_presentImages | std::views::transform([](UniquePtr& image) { return image.get(); }); } void DirectX12SwapChain::present(const DirectX12FrameBuffer& frameBuffer) const diff --git a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp index 040f22015..7cc8509a9 100644 --- a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp +++ b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp @@ -1134,10 +1134,10 @@ namespace LiteFX::Rendering::Backends { Enumerable> commandBuffers() const noexcept override; /// - Enumerable images() const noexcept override; + Enumerable images() const noexcept override; /// - const IVulkanImage& image(UInt32 location) const override; + IVulkanImage& image(UInt32 location) const override; public: /// @@ -1391,10 +1391,10 @@ namespace LiteFX::Rendering::Backends { const Size2d& renderArea() const noexcept override; /// - const IVulkanImage* image(UInt32 backBuffer) const override; + IVulkanImage* image(UInt32 backBuffer) const override; /// - Enumerable images() const noexcept override; + Enumerable images() const noexcept override; /// void present(const VulkanFrameBuffer& frameBuffer) const override; diff --git a/src/Backends/Vulkan/src/frame_buffer.cpp b/src/Backends/Vulkan/src/frame_buffer.cpp index 50d971e3e..426790183 100644 --- a/src/Backends/Vulkan/src/frame_buffer.cpp +++ b/src/Backends/Vulkan/src/frame_buffer.cpp @@ -13,7 +13,7 @@ class VulkanFrameBuffer::VulkanFrameBufferImpl : public Implement> m_outputAttachments; - Array m_renderTargetViews; + Array m_renderTargetViews; Array> m_commandBuffers; Size2d m_size; VkSemaphore m_semaphore; @@ -175,12 +175,12 @@ Enumerable> VulkanFrameBuffer::commandBuffe return m_impl->m_commandBuffers; } -Enumerable VulkanFrameBuffer::images() const noexcept +Enumerable VulkanFrameBuffer::images() const noexcept { return m_impl->m_renderTargetViews; } -const IVulkanImage& VulkanFrameBuffer::image(UInt32 location) const +IVulkanImage& VulkanFrameBuffer::image(UInt32 location) const { if (location >= m_impl->m_renderTargetViews.size()) throw ArgumentOutOfRangeException("No render target is mapped to location {0}.", location); diff --git a/src/Backends/Vulkan/src/swapchain.cpp b/src/Backends/Vulkan/src/swapchain.cpp index b9d147ce5..42dc44957 100644 --- a/src/Backends/Vulkan/src/swapchain.cpp +++ b/src/Backends/Vulkan/src/swapchain.cpp @@ -899,7 +899,7 @@ const Size2d& VulkanSwapChain::renderArea() const noexcept return m_impl->m_renderArea; } -const IVulkanImage* VulkanSwapChain::image(UInt32 backBuffer) const +IVulkanImage* VulkanSwapChain::image(UInt32 backBuffer) const { if (backBuffer >= m_impl->m_presentImages.size()) [[unlikely]] throw ArgumentOutOfRangeException("The back buffer must be a valid index."); @@ -907,9 +907,9 @@ const IVulkanImage* VulkanSwapChain::image(UInt32 backBuffer) const return m_impl->m_presentImages[backBuffer].get(); } -Enumerable VulkanSwapChain::images() const noexcept +Enumerable VulkanSwapChain::images() const noexcept { - return m_impl->m_presentImages | std::views::transform([](const UniquePtr& image) { return image.get(); }); + return m_impl->m_presentImages | std::views::transform([](UniquePtr& image) { return image.get(); }); } void VulkanSwapChain::present(const VulkanFrameBuffer& frameBuffer) const diff --git a/src/Rendering/include/litefx/rendering.hpp b/src/Rendering/include/litefx/rendering.hpp index 6acf0bd82..83cdd7fbe 100644 --- a/src/Rendering/include/litefx/rendering.hpp +++ b/src/Rendering/include/litefx/rendering.hpp @@ -730,10 +730,10 @@ namespace LiteFX::Rendering { virtual SharedPtr commandBuffer(UInt32 index) const = 0; /// - virtual Enumerable images() const noexcept = 0; + virtual Enumerable images() const noexcept = 0; /// - virtual const image_type& image(UInt32 location) const = 0; + virtual image_type& image(UInt32 location) const = 0; private: inline SharedPtr getCommandBuffer(UInt32 index) const noexcept override { @@ -744,7 +744,7 @@ namespace LiteFX::Rendering { return this->commandBuffers(); } - inline Enumerable getImages() const noexcept override { + inline Enumerable getImages() const noexcept override { return this->images(); } }; @@ -889,7 +889,7 @@ namespace LiteFX::Rendering { public: /// - virtual Enumerable images() const noexcept = 0; + virtual Enumerable images() const noexcept = 0; /// /// Queues a present that gets executed after signals its readiness. @@ -903,7 +903,7 @@ namespace LiteFX::Rendering { } private: - inline Enumerable getImages() const noexcept override { + inline Enumerable getImages() const noexcept override { return this->images(); } }; diff --git a/src/Rendering/include/litefx/rendering_api.hpp b/src/Rendering/include/litefx/rendering_api.hpp index 09ae30706..ce4e9e688 100644 --- a/src/Rendering/include/litefx/rendering_api.hpp +++ b/src/Rendering/include/litefx/rendering_api.hpp @@ -4529,7 +4529,7 @@ namespace LiteFX::Rendering { /// Returns the images that store the output attachments for the render targets of the . /// /// The images that store the output attachments for the render targets of the . - inline Enumerable images() const noexcept { + inline Enumerable images() const noexcept { return this->getImages(); } @@ -4537,7 +4537,7 @@ namespace LiteFX::Rendering { /// Returns the image that stores the output attachment for the render target mapped the location passed with . /// /// The image that stores the output attachment for the render target mapped the location passed with . - virtual const IImage& image(UInt32 location) const = 0; + virtual IImage& image(UInt32 location) const = 0; public: /// @@ -4555,7 +4555,7 @@ namespace LiteFX::Rendering { private: virtual SharedPtr getCommandBuffer(UInt32 index) const noexcept = 0; virtual Enumerable> getCommandBuffers() const noexcept = 0; - virtual Enumerable getImages() const noexcept = 0; + virtual Enumerable getImages() const noexcept = 0; }; /// @@ -4879,13 +4879,13 @@ namespace LiteFX::Rendering { /// /// The index of the back buffer for which to return the swap chain present image. /// A pointer to the back buffers swap chain present image. - virtual const IImage* image(UInt32 backBuffer) const = 0; + virtual IImage* image(UInt32 backBuffer) const = 0; /// /// Returns an array of the swap chain present images. /// /// Returns an array of the swap chain present images. - inline Enumerable images() const noexcept { + inline Enumerable images() const noexcept { return this->getImages(); }; @@ -4939,7 +4939,7 @@ namespace LiteFX::Rendering { [[nodiscard]] virtual UInt32 swapBackBuffer() const = 0; private: - virtual Enumerable getImages() const noexcept = 0; + virtual Enumerable getImages() const noexcept = 0; virtual void addTimingEvent(SharedPtr timingEvent) = 0; }; From 08f8f4fd63611bf3fc324ead7771672ae93069c2 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Fri, 22 Dec 2023 13:56:59 +0100 Subject: [PATCH 06/38] Release finished command buffers on submit. --- src/Backends/DirectX12/src/queue.cpp | 32 ++++++++++++++++++-------- src/Backends/Vulkan/src/queue.cpp | 34 ++++++++++++++++++++-------- 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/src/Backends/DirectX12/src/queue.cpp b/src/Backends/DirectX12/src/queue.cpp index 6f19a56f9..ad920d0cd 100644 --- a/src/Backends/DirectX12/src/queue.cpp +++ b/src/Backends/DirectX12/src/queue.cpp @@ -73,6 +73,20 @@ class DirectX12Queue::DirectX12QueueImpl : public Implement { return commandQueue; } + + void releaseCommandBuffers(UInt64 beforeFence) + { + // Release all shared command buffers until this point. + const auto [from, to] = std::ranges::remove_if(m_submittedCommandBuffers, [this, &beforeFence](auto& pair) { + if (std::get<0>(pair) > beforeFence) + return false; + + this->m_parent->releaseSharedState(*std::get<1>(pair)); + return true; + }); + + this->m_submittedCommandBuffers.erase(from, to); + } }; // ------------------------------------------------------------------------------------------------ @@ -137,6 +151,10 @@ UInt64 DirectX12Queue::submit(SharedPtr commandBuf // Begin event. this->submitting(this, { { std::static_pointer_cast(commandBuffer) } }); + // Remove all previously submitted command buffers, that have already finished. + auto completedValue = m_impl->m_fence->GetCompletedValue(); + m_impl->releaseCommandBuffers(completedValue); + // End the command buffer. commandBuffer->end(); @@ -172,6 +190,10 @@ UInt64 DirectX12Queue::submit(const Enumerable>>(); this->submitting(this, { buffers }); + // Remove all previously submitted command buffers, that have already finished. + auto completedValue = m_impl->m_fence->GetCompletedValue(); + m_impl->releaseCommandBuffers(completedValue); + // End and submit the command buffers. auto handles = [&commandBuffers]() -> std::generator { for (auto buffer = commandBuffers.begin(); buffer != commandBuffers.end(); ++buffer) { @@ -210,15 +232,7 @@ void DirectX12Queue::waitFor(UInt64 fence) const noexcept raiseIfFailed(hr, "Unable to register fence completion event."); } - // Release all shared command buffers until this point. - const auto [from, to] = std::ranges::remove_if(m_impl->m_submittedCommandBuffers, [this, &completedValue](auto& pair) { - if (std::get<0>(pair) > completedValue) - return false; - - this->releaseSharedState(*std::get<1>(pair)); - return true; - }); - m_impl->m_submittedCommandBuffers.erase(from, to); + m_impl->releaseCommandBuffers(fence); } void DirectX12Queue::waitFor(const DirectX12Queue& queue, UInt64 fence) const noexcept diff --git a/src/Backends/Vulkan/src/queue.cpp b/src/Backends/Vulkan/src/queue.cpp index 5e3f57e78..c7992d0ac 100644 --- a/src/Backends/Vulkan/src/queue.cpp +++ b/src/Backends/Vulkan/src/queue.cpp @@ -70,6 +70,20 @@ class VulkanQueue::VulkanQueueImpl : public Implement { return queue; } + + void releaseCommandBuffers(UInt64 beforeFence) + { + // Release all shared command buffers until this point. + const auto [from, to] = std::ranges::remove_if(m_submittedCommandBuffers, [this, &beforeFence](auto& pair) { + if (std::get<0>(pair) > beforeFence) + return false; + + this->m_parent->releaseSharedState(*std::get<1>(pair)); + return true; + }); + + this->m_submittedCommandBuffers.erase(from, to); + } }; // ------------------------------------------------------------------------------------------------ @@ -166,6 +180,11 @@ UInt64 VulkanQueue::submit(SharedPtr commandBuffer, S // Begin event. this->submitting(this, { { std::static_pointer_cast(commandBuffer) } }); + // Remove all previously submitted command buffers, that have already finished. + UInt64 completedValue = 0; + ::vkGetSemaphoreCounterValue(m_impl->m_device.handle(), m_impl->m_timelineSemaphore, &completedValue); + m_impl->releaseCommandBuffers(completedValue); + // End the command buffer. commandBuffer->end(); @@ -231,6 +250,11 @@ UInt64 VulkanQueue::submit(const Enumerable auto buffers = commandBuffers | std::views::transform([](auto& buffer) { return std::static_pointer_cast(buffer); }); this->submitting(this, { buffers }); + // Remove all previously submitted command buffers, that have already finished. + UInt64 completedValue = 0; + ::vkGetSemaphoreCounterValue(m_impl->m_device.handle(), m_impl->m_timelineSemaphore, &completedValue); + m_impl->releaseCommandBuffers(completedValue); + // End the command buffer. auto handles = [&commandBuffers]() -> std::generator { for (auto buffer = commandBuffers.begin(); buffer != commandBuffers.end(); ++buffer) { @@ -301,15 +325,7 @@ void VulkanQueue::waitFor(UInt64 fence) const noexcept ::vkWaitSemaphores(m_impl->m_device.handle(), &waitInfo, std::numeric_limits::max()); } - // Release all shared command buffers until this point. - const auto [from, to] = std::ranges::remove_if(m_impl->m_submittedCommandBuffers, [this, &completedValue](auto& pair) { - if (std::get<0>(pair) > completedValue) - return false; - - this->releaseSharedState(*std::get<1>(pair)); - return true; - }); - m_impl->m_submittedCommandBuffers.erase(from, to); + m_impl->releaseCommandBuffers(fence); } void VulkanQueue::waitFor(const VulkanQueue& queue, UInt64 fence) const noexcept From e1f0225e31b41b0fc75f50eb81051013bcca4b54 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:11:27 +0100 Subject: [PATCH 07/38] Use proper image layout for non-interop swap chain. --- src/Backends/Vulkan/src/swapchain.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Backends/Vulkan/src/swapchain.cpp b/src/Backends/Vulkan/src/swapchain.cpp index 42dc44957..420ea5138 100644 --- a/src/Backends/Vulkan/src/swapchain.cpp +++ b/src/Backends/Vulkan/src/swapchain.cpp @@ -128,7 +128,7 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { ::vkGetSwapchainImagesKHR(m_device.handle(), swapChain, &images, imageChain.data()); m_presentImages = imageChain | - std::views::transform([this, &actualRenderArea, &selectedFormat](const VkImage& image) { return makeUnique(m_device, image, Size3d{ actualRenderArea.width(), actualRenderArea.height(), 1 }, selectedFormat, ImageDimensions::DIM_2, 1, 1, MultiSamplingLevel::x1, false, ResourceState::Undefined); }) | + std::views::transform([this, &actualRenderArea, &selectedFormat](const VkImage& image) { return makeUnique(m_device, image, Size3d{ actualRenderArea.width(), actualRenderArea.height(), 1 }, selectedFormat, ImageDimensions::DIM_2, 1, 1, MultiSamplingLevel::x1, false, ImageLayout::Present); }) | std::ranges::to>>(); // Store state variables. From f69e3f953c76644848f15e7f4e94466e59224ead Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:21:18 +0100 Subject: [PATCH 08/38] Prototype compute and present workflows. --- src/Samples/Compute/src/sample.cpp | 75 ++++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 5 deletions(-) diff --git a/src/Samples/Compute/src/sample.cpp b/src/Samples/Compute/src/sample.cpp index 185bae6d2..70ab8128e 100644 --- a/src/Samples/Compute/src/sample.cpp +++ b/src/Samples/Compute/src/sample.cpp @@ -107,7 +107,7 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA SharedPtr screenQuadAssembler = device->buildInputAssembler() .topology(PrimitiveTopology::TriangleStrip); - UniquePtr presentPipeline = device->buildRenderPipeline(*presentPass, "Present") + UniquePtr presentPipeline = device->buildRenderPipeline(*presentPass, "Resolve") .inputAssembler(device->buildInputAssembler() .topology(PrimitiveTopology::TriangleStrip)) .rasterizer(device->buildRasterizer() @@ -157,6 +157,7 @@ void SampleApp::initBuffers(IRenderBackend* backend) // Update the camera. Since the descriptor set already points to the proper buffer, all changes are implicitly visible. this->updateCamera(*commandBuffer, *cameraBuffer); + m_transferFence = commandBuffer->submit(); // Next, we create the descriptor sets for the transform buffer. The transform changes with every frame. Since we have three frames in flight, we // create a buffer with three elements and bind the appropriate element to the descriptor set for every frame. @@ -167,9 +168,15 @@ void SampleApp::initBuffers(IRenderBackend* backend) { { .resource = *transformBuffer, .firstElement = 1, .elements = 1 } }, { { .resource = *transformBuffer, .firstElement = 2, .elements = 1 } } }); - - // End and submit the command buffer. - m_transferFence = commandBuffer->submit(); + + // Allocate bindings for the blur pass and presentation. + auto& blurPipeline = m_device->state().pipeline("Blur"); + auto& blurInputLayout = blurPipeline.layout()->descriptorSet(0); + auto blurBindings = blurInputLayout.allocate({ { } }); + + auto& presentPipeline = m_device->state().pipeline("Resolve"); + auto& presentInputLayout = presentPipeline.layout()->descriptorSet(0); + auto presentBindings = presentInputLayout.allocate({ { } }); // Add everything to the state. m_device->state().add(std::move(vertexBuffer)); @@ -177,6 +184,8 @@ void SampleApp::initBuffers(IRenderBackend* backend) m_device->state().add(std::move(cameraBuffer)); m_device->state().add(std::move(transformBuffer)); m_device->state().add("Camera Bindings", std::move(cameraBindings)); + m_device->state().add("Blur Bindings", std::move(blurBindings)); + m_device->state().add("Present Bindings", std::move(presentBindings)); std::ranges::for_each(transformBindings, [this, i = 0](auto& binding) mutable { m_device->state().add(fmt::format("Transform Bindings {0}", i++), std::move(binding)); }); } @@ -290,6 +299,7 @@ void SampleApp::onResize(const void* sender, ResizeEventArgs e) // gets re-created. This is hard to detect, since some frame buffers can have a constant size, that does not change with the render area and do not need to be // re-created. We should either think of a clever implicit dependency management for this, or at least document this behavior! m_device->state().renderPass("Opaque").resizeFrameBuffers(renderArea); + m_device->state().renderPass("Present").resizeFrameBuffers(renderArea); // Also resize viewport and scissor. m_viewport->setRectangle(RectF(0.f, 0.f, static_cast(e.width()), static_cast(e.height()))); @@ -401,9 +411,13 @@ void SampleApp::drawFrame() // Query state. For performance reasons, those state variables should be cached for more complex applications, instead of looking them up every frame. auto& renderPass = m_device->state().renderPass("Opaque"); auto& presentPass = m_device->state().renderPass("Present"); + auto& blurPipeline = m_device->state().pipeline("Blur"); auto& geometryPipeline = m_device->state().pipeline("Geometry"); + auto& resolvePipeline = m_device->state().pipeline("Resolve"); auto& transformBuffer = m_device->state().buffer("Transform"); auto& cameraBindings = m_device->state().descriptorSet("Camera Bindings"); + auto& blurBindings = m_device->state().descriptorSet("Blur Bindings"); + auto& presentBindings = m_device->state().descriptorSet("Present Bindings"); auto& transformBindings = m_device->state().descriptorSet(fmt::format("Transform Bindings {0}", backBuffer)); auto& vertexBuffer = m_device->state().vertexBuffer("Vertex Buffer"); auto& indexBuffer = m_device->state().indexBuffer("Index Buffer"); @@ -442,17 +456,68 @@ void SampleApp::drawFrame() } // Perform post processing on compute queue. + UInt64 postProcessFence = 0; + { + // Create a command buffer. + auto commandBuffer = m_device->defaultQueue(QueueType::Compute).createCommandBuffer(true); + commandBuffer->use(blurPipeline); + + // Get the image from the back buffer of the geometry pass. + auto& frameBuffer = renderPass.frameBuffer(backBuffer); + auto& image = frameBuffer.image(0); + + // Create a barrier that handles image transition. + auto barrier = m_device->makeBarrier(PipelineStage::Fragment, PipelineStage::Compute); + barrier->transition(image, ResourceAccess::RenderTarget, ResourceAccess::ShaderReadWrite, ImageLayout::ReadWrite); + commandBuffer->barrier(*barrier); + + // Bind the image to the texture descriptor. + blurBindings.update(0, image); + commandBuffer->bind(blurBindings); + // Dispatch the blur pass. + commandBuffer->dispatch({ static_cast(image.extent().x()), static_cast(image.extent().y()), 1 }); + + // Submit the command buffer. + //m_device->defaultQueue(QueueType::Compute).waitFor(renderPass.commandQueue(), frameBuffer.lastFence()); + postProcessFence = commandBuffer->submit(); + + // NOTE: Since the queues might have different priorities, we have to wait for the dispatch either later by using a barrier, or explicitly somewhere. Otherwise more + // command buffers will be allocated than actually being processed. } // Execute present pass. { presentPass.begin(backBuffer); auto commandBuffer = presentPass.activeFrameBuffer().commandBuffer(0); + commandBuffer->use(resolvePipeline); + commandBuffer->setViewports(m_viewport.get()); + commandBuffer->setScissors(m_scissor.get()); + + // Get the image from the back buffer of the geometry pass, as it is the one that was previously handled in the compute queue. + auto& frameBuffer = renderPass.frameBuffer(backBuffer); + auto& image = frameBuffer.image(0); - // Draw 4 instances of "nothing" to create the screen quad and end the render pass. + // Transition the image back to a shader resource. + auto barrier = m_device->makeBarrier(PipelineStage::Compute, PipelineStage::Vertex); + barrier->transition(image, ResourceAccess::ShaderReadWrite, ResourceAccess::ShaderRead, ImageLayout::ShaderResource); + commandBuffer->barrier(*barrier); + + // Bind the image to the + presentBindings.update(0, image); + commandBuffer->bind(presentBindings); + + // Draw 4 instances of "nothing" to create the screen quad. commandBuffer->draw(0, 4); + + // Important: transition the image back to a render target, for the next iteration of this back buffer to be able to render into it. + barrier = m_device->makeBarrier(PipelineStage::Fragment, PipelineStage::Fragment); + barrier->transition(image, ResourceAccess::ShaderRead, ResourceAccess::RenderTarget, ImageLayout::RenderTarget); + commandBuffer->barrier(*barrier); + + // End the render pass in order to present the image. + //presentPass.commandQueue().waitFor(m_device->defaultQueue(QueueType::Compute), postProcessFence); presentPass.end(); } } \ No newline at end of file From 11522bc43b841188dd920d43c554fcb39bc3d199 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Sun, 24 Dec 2023 13:36:27 +0100 Subject: [PATCH 09/38] Return the last frame buffer fence when ending a render pass. --- .../include/litefx/backends/dx12.hpp | 22 ++++++++++--------- src/Backends/DirectX12/src/frame_buffer.cpp | 7 +++++- src/Backends/DirectX12/src/render_pass.cpp | 9 +++++--- .../Vulkan/include/litefx/backends/vulkan.hpp | 16 ++++++++------ src/Backends/Vulkan/src/frame_buffer.cpp | 7 +++++- src/Backends/Vulkan/src/render_pass.cpp | 9 +++++--- .../include/litefx/rendering_api.hpp | 12 +++++++++- 7 files changed, 56 insertions(+), 26 deletions(-) diff --git a/src/Backends/DirectX12/include/litefx/backends/dx12.hpp b/src/Backends/DirectX12/include/litefx/backends/dx12.hpp index 495317532..61691b161 100644 --- a/src/Backends/DirectX12/include/litefx/backends/dx12.hpp +++ b/src/Backends/DirectX12/include/litefx/backends/dx12.hpp @@ -1083,7 +1083,7 @@ namespace LiteFX::Rendering::Backends { /// A pointer to the descriptor heap that allocates the render targets for this frame buffer. /// /// - virtual ID3D12DescriptorHeap* renderTargetHeap() const noexcept; + ID3D12DescriptorHeap* renderTargetHeap() const noexcept; /// /// Returns a pointer to the descriptor heap that allocates the depth/stencil views for this frame buffer. @@ -1094,33 +1094,35 @@ namespace LiteFX::Rendering::Backends { /// A pointer to the descriptor heap that allocates the depth/stencil views for this frame buffer. /// /// - virtual ID3D12DescriptorHeap* depthStencilTargetHeap() const noexcept; + ID3D12DescriptorHeap* depthStencilTargetHeap() const noexcept; /// /// Returns the size of a descriptor for a render target within the frame buffer. /// /// The size of a descriptor for a render target within the frame buffer. /// - virtual UInt32 renderTargetDescriptorSize() const noexcept; + UInt32 renderTargetDescriptorSize() const noexcept; /// /// Returns the size of a descriptor for a depth/stencil view within the frame buffer. /// /// The size of a descriptor for a depth/stencil view within the frame buffer. /// - virtual UInt32 depthStencilTargetDescriptorSize() const noexcept; + UInt32 depthStencilTargetDescriptorSize() const noexcept; - /// - /// Returns a reference of the last fence value for the frame buffer. + /// Returns a reference to the value of the fence that indicates the last submission drawing into the frame buffer. /// /// - /// The frame buffer must only be re-used, if this fence is reached in the graphics queue. + /// The frame buffer must only be re-used if this fence has been passed in the command queue that executes the parent render pass. /// - /// A reference of the last fence value for the frame buffer. - virtual UInt64& lastFence() const noexcept; + /// A reference to the of the last submission targeting the frame buffer. + UInt64& lastFence() noexcept; // FrameBuffer interface. public: + /// + UInt64 lastFence() const noexcept override; + /// UInt32 bufferIndex() const noexcept override; @@ -1268,7 +1270,7 @@ namespace LiteFX::Rendering::Backends { void begin(UInt32 buffer) override; /// - void end() const override; + UInt64 end() const override; /// void resizeFrameBuffers(const Size2d& renderArea) override; diff --git a/src/Backends/DirectX12/src/frame_buffer.cpp b/src/Backends/DirectX12/src/frame_buffer.cpp index 9731a2b7b..f121e2a37 100644 --- a/src/Backends/DirectX12/src/frame_buffer.cpp +++ b/src/Backends/DirectX12/src/frame_buffer.cpp @@ -149,7 +149,12 @@ UInt32 DirectX12FrameBuffer::depthStencilTargetDescriptorSize() const noexcept return m_impl->m_depthStencilDescriptorSize; } -UInt64& DirectX12FrameBuffer::lastFence() const noexcept +UInt64& DirectX12FrameBuffer::lastFence() noexcept +{ + return m_impl->m_lastFence; +} + +UInt64 DirectX12FrameBuffer::lastFence() const noexcept { return m_impl->m_lastFence; } diff --git a/src/Backends/DirectX12/src/render_pass.cpp b/src/Backends/DirectX12/src/render_pass.cpp index 906d0c8ca..c44c6d44a 100644 --- a/src/Backends/DirectX12/src/render_pass.cpp +++ b/src/Backends/DirectX12/src/render_pass.cpp @@ -20,7 +20,7 @@ class DirectX12RenderPass::DirectX12RenderPassImpl : public Implement m_inputAttachments; Array> m_frameBuffers; Array> m_beginCommandBuffers, m_endCommandBuffers; - const DirectX12FrameBuffer* m_activeFrameBuffer = nullptr; + DirectX12FrameBuffer* m_activeFrameBuffer = nullptr; UInt32 m_backBuffer{ 0 }; const RenderTarget* m_presentTarget = nullptr; const RenderTarget* m_depthStencilTarget = nullptr; @@ -340,7 +340,7 @@ void DirectX12RenderPass::begin(UInt32 buffer) this->beginning(this, { buffer }); } -void DirectX12RenderPass::end() const +UInt64 DirectX12RenderPass::end() const { // Check if we are running. if (m_impl->m_activeFrameBuffer == nullptr) @@ -420,7 +420,7 @@ void DirectX12RenderPass::end() const commandBuffers.push_back(endCommandBuffer); // Submit and store the fence. - m_impl->m_activeFrameBuffer->lastFence() = m_impl->m_queue->submit(commandBuffers | std::ranges::to>>()); + UInt64 fence = m_impl->m_activeFrameBuffer->lastFence() = m_impl->m_queue->submit(commandBuffers | std::ranges::to>>()); if (!m_impl->m_name.empty()) m_impl->m_queue->endDebugRegion(); @@ -433,6 +433,9 @@ void DirectX12RenderPass::end() const // Reset the frame buffer. m_impl->m_activeFrameBuffer = nullptr; + + // Return the last fence of the frame buffer. + return fence; } void DirectX12RenderPass::resizeFrameBuffers(const Size2d& renderArea) diff --git a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp index 4b532011b..3c2d07c1b 100644 --- a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp +++ b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp @@ -1102,19 +1102,21 @@ namespace LiteFX::Rendering::Backends { /// Returns a reference of the semaphore, that can be used to signal, that the frame buffer is finished. /// /// A reference of the semaphore, that can be used to signal, that the frame buffer is finished. - virtual const VkSemaphore& semaphore() const noexcept; + const VkSemaphore& semaphore() const noexcept; - /// - /// Returns a reference of the last fence value for the frame buffer. + /// Returns a reference to the value of the fence that indicates the last submission drawing into the frame buffer. /// /// - /// The frame buffer must only be re-used, if this fence is reached in the graphics queue. + /// The frame buffer must only be re-used if this fence has been passed in the command queue that executes the parent render pass. /// - /// A reference of the last fence value for the frame buffer. - virtual UInt64& lastFence() const noexcept; + /// A reference to the of the last submission targeting the frame buffer. + UInt64& lastFence() noexcept; // FrameBuffer interface. public: + /// + UInt64 lastFence() const noexcept override; + /// UInt32 bufferIndex() const noexcept override; @@ -1262,7 +1264,7 @@ namespace LiteFX::Rendering::Backends { void begin(UInt32 buffer) override; /// - void end() const override; + UInt64 end() const override; /// void resizeFrameBuffers(const Size2d& renderArea) override; diff --git a/src/Backends/Vulkan/src/frame_buffer.cpp b/src/Backends/Vulkan/src/frame_buffer.cpp index f622e21c7..a264b1073 100644 --- a/src/Backends/Vulkan/src/frame_buffer.cpp +++ b/src/Backends/Vulkan/src/frame_buffer.cpp @@ -137,7 +137,12 @@ const VkSemaphore& VulkanFrameBuffer::semaphore() const noexcept return m_impl->m_semaphore; } -UInt64& VulkanFrameBuffer::lastFence() const noexcept +UInt64& VulkanFrameBuffer::lastFence() noexcept +{ + return m_impl->m_lastFence; +} + +UInt64 VulkanFrameBuffer::lastFence() const noexcept { return m_impl->m_lastFence; } diff --git a/src/Backends/Vulkan/src/render_pass.cpp b/src/Backends/Vulkan/src/render_pass.cpp index 2a88f9fc3..bf0529c47 100644 --- a/src/Backends/Vulkan/src/render_pass.cpp +++ b/src/Backends/Vulkan/src/render_pass.cpp @@ -18,7 +18,7 @@ class VulkanRenderPass::VulkanRenderPassImpl : public Implement m_inputAttachments; Array> m_frameBuffers; Array> m_primaryCommandBuffers; - const VulkanFrameBuffer* m_activeFrameBuffer = nullptr; + VulkanFrameBuffer* m_activeFrameBuffer = nullptr; SharedPtr m_activeCommandBuffer; Array m_clearValues; UInt32 m_backBuffer{ 0 }; @@ -421,7 +421,7 @@ void VulkanRenderPass::begin(UInt32 buffer) VkRenderPassBeginInfo renderPassInfo{}; renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; renderPassInfo.renderPass = this->handle(); - renderPassInfo.framebuffer = frameBuffer->handle(); + renderPassInfo.framebuffer = std::as_const(*frameBuffer).handle(); renderPassInfo.renderArea.offset = { 0, 0 }; renderPassInfo.renderArea.extent.width = static_cast(frameBuffer->getWidth()); renderPassInfo.renderArea.extent.height = static_cast(frameBuffer->getHeight()); @@ -437,7 +437,7 @@ void VulkanRenderPass::begin(UInt32 buffer) this->beginning(this, { buffer }); } -void VulkanRenderPass::end() const +UInt64 VulkanRenderPass::end() const { // Check if we are running. if (m_impl->m_activeFrameBuffer == nullptr) [[unlikely]] @@ -475,6 +475,9 @@ void VulkanRenderPass::end() const // Reset the frame buffer. m_impl->m_activeFrameBuffer = nullptr; m_impl->m_activeCommandBuffer = nullptr; + + // Return the last fence. + return frameBuffer->lastFence(); } void VulkanRenderPass::resizeFrameBuffers(const Size2d& renderArea) diff --git a/src/Rendering/include/litefx/rendering_api.hpp b/src/Rendering/include/litefx/rendering_api.hpp index ce4e9e688..6d65d65d0 100644 --- a/src/Rendering/include/litefx/rendering_api.hpp +++ b/src/Rendering/include/litefx/rendering_api.hpp @@ -4468,6 +4468,15 @@ namespace LiteFX::Rendering { virtual ~IFrameBuffer() noexcept = default; public: + /// + /// Returns the value of the fence that indicates the last submission drawing into the frame buffer. + /// + /// + /// The frame buffer must only be re-used if this fence has been passed in the command queue that executes the parent render pass. + /// + /// The value of the last submission targeting the frame buffer. + virtual UInt64 lastFence() const noexcept = 0; + /// /// Returns the index of the buffer within the . /// @@ -4717,7 +4726,8 @@ namespace LiteFX::Rendering { /// If the frame buffer has a present render target, this causes the render pass to synchronize with the swap chain and issue a present command. /// /// The back buffer to use. Typically this is the same as the value returned from . - virtual void end() const = 0; + /// The value of the fence that indicates the end of the render pass. + virtual UInt64 end() const = 0; /// /// Resets the frame buffers of the render pass. From 770d91adf0678aa28f5bc08004673533512bb0b5 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Sun, 24 Dec 2023 13:38:03 +0100 Subject: [PATCH 10/38] Set argument name when throwing. --- src/Backends/DirectX12/src/buffer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Backends/DirectX12/src/buffer.cpp b/src/Backends/DirectX12/src/buffer.cpp index 305973746..6eb6ff4c1 100644 --- a/src/Backends/DirectX12/src/buffer.cpp +++ b/src/Backends/DirectX12/src/buffer.cpp @@ -224,7 +224,7 @@ UniquePtr DirectX12VertexBuffer::allocate(const DirectX1 UniquePtr DirectX12VertexBuffer::allocate(const String& name, const DirectX12VertexBufferLayout& layout, AllocatorPtr allocator, UInt32 elements, const D3D12_RESOURCE_DESC1& resourceDesc, const D3D12MA::ALLOCATION_DESC& allocationDesc) { if (allocator == nullptr) [[unlikely]] - throw ArgumentNotInitializedException("The allocator must be initialized."); + throw ArgumentNotInitializedException("allocator", "The allocator must be initialized."); ComPtr resource; D3D12MA::Allocation* allocation; @@ -294,7 +294,7 @@ UniquePtr DirectX12IndexBuffer::allocate(const DirectX12I UniquePtr DirectX12IndexBuffer::allocate(const String& name, const DirectX12IndexBufferLayout& layout, AllocatorPtr allocator, UInt32 elements, const D3D12_RESOURCE_DESC1& resourceDesc, const D3D12MA::ALLOCATION_DESC& allocationDesc) { if (allocator == nullptr) [[unlikely]] - throw ArgumentNotInitializedException("The allocator must be initialized."); + throw ArgumentNotInitializedException("allocator", "The allocator must be initialized."); ComPtr resource; D3D12MA::Allocation* allocation; From 6d66e5b0598585dde1adb01a4feda7772890a709 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Fri, 29 Dec 2023 11:39:09 +0100 Subject: [PATCH 11/38] Refactor render target creation flags. --- README.md | 4 +- docs/tutorials/quick-start.markdown | 6 +- docs/website/content/homepage/fluent.md | 3 +- .../include/litefx/backends/dx12.hpp | 4 +- src/Backends/DirectX12/src/factory.cpp | 24 ++-- src/Backends/DirectX12/src/frame_buffer.cpp | 2 +- .../Vulkan/include/litefx/backends/vulkan.hpp | 4 +- src/Backends/Vulkan/src/factory.cpp | 29 ++-- src/Backends/Vulkan/src/frame_buffer.cpp | 2 +- src/Rendering/include/litefx/rendering.hpp | 12 +- .../include/litefx/rendering_api.hpp | 125 +++++++++++++++--- .../include/litefx/rendering_builders.hpp | 70 ++++------ src/Rendering/src/render_target.cpp | 54 +++++--- src/Samples/BasicRendering/src/sample.cpp | 4 +- src/Samples/Bindless/src/sample.cpp | 4 +- src/Samples/Multisampling/src/sample.cpp | 4 +- src/Samples/Multithreading/src/sample.cpp | 4 +- src/Samples/PushConstants/src/sample.cpp | 4 +- src/Samples/RenderPasses/src/sample.cpp | 8 +- src/Samples/Textures/src/sample.cpp | 4 +- src/Samples/UniformArrays/src/sample.cpp | 4 +- 21 files changed, 242 insertions(+), 133 deletions(-) diff --git a/README.md b/README.md index 090ea7b01..4fe9f78ab 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,8 @@ The engine design follows an descriptive approach, which means that an applicati ```cxx UniquePtr renderPass = device->buildRenderPass("Geometry") - .renderTarget(RenderTargetType::Present, Format::B8G8R8A8_UNORM, MultiSamplingLevel::x1, { 0.f, 0.f, 0.f, 1.f }, true, false) - .renderTarget(RenderTargetType::DepthStencil, Format::D32_SFLOAT, MultiSamplingLevel::x1, { 1.f, 0.f, 0.f, 0.f }, true, false); + .renderTarget(RenderTargetType::Present, Format::B8G8R8A8_UNORM, MultiSamplingLevel::x1, RenderTargetFlags::Clear, { 0.f, 0.f, 0.f, 1.f }) + .renderTarget(RenderTargetType::DepthStencil, Format::D32_SFLOAT, MultiSamplingLevel::x1, RenderTargetFlags::Clear, { 1.f, 0.f, 0.f, 0.f }); UniquePtr renderPipeline = device->buildRenderPipeline(*renderPass, "Geometry") .inputAssembler(inputAssembler) diff --git a/docs/tutorials/quick-start.markdown b/docs/tutorials/quick-start.markdown index e84807e8b..d2ab7a1f6 100644 --- a/docs/tutorials/quick-start.markdown +++ b/docs/tutorials/quick-start.markdown @@ -237,13 +237,11 @@ The other values that are provided to a render target are: - The render target format, which in our example is dictated by the swap chain format we've chosen earlier. - A clear value vector, which contains the values that the render target will be cleared with when starting the render pass. For our *BGRA* image, we want to clear it with black and an alpha value of `0.0`. -- A boolean switch to enable or disable clearing the values, which we set to true, since we want to clear our image with the clear values specified earlier. -- A boolean switch to enable clearing for stencil buffers. This switch is only used, if the render target is a `DepthStencil` target and the format supports stencil values. It can be used to disable clearing stencil values and only clear depth values for depth/stencil targets. -- A boolean switch that states, if we want to preserve the contents of the image after the render pass has finished. Since we do not want to use our render target as input attachment for another render pass, we also set this value to `false`. +- A flag set that in our example enables clearing the render target when starting the render pass. ```cxx m_renderPass = m_device->buildRenderPass() - .renderTarget(RenderTargetType::Present, Format::B8G8R8A8_SRGB, { 0.f, 0.f, 0.f, 0.f }, true, false, false); + .renderTarget(RenderTargetType::Present, Format::B8G8R8A8_SRGB, RenderTargetFlags::Clear, { 0.f, 0.f, 0.f, 0.f }); ``` #### Creating a Render Pipeline diff --git a/docs/website/content/homepage/fluent.md b/docs/website/content/homepage/fluent.md index bcf89e0e6..136562122 100644 --- a/docs/website/content/homepage/fluent.md +++ b/docs/website/content/homepage/fluent.md @@ -19,7 +19,8 @@ SharedPtr inputAssembler = device->buildInputAssembler() UniquePtr renderPass = device-> buildRenderPass("Render Pass") .renderTarget(RenderTargetType::Present, - Format::B8G8R8A8_UNORM, { 0.0f, 0.0f, 0.0f, 1.f }) + Format::B8G8R8A8_UNORM, RenderTargetFlags::Clear, + { 0.0f, 0.0f, 0.0f, 1.f }) UniquePtr renderPipeline = device->buildRenderPipeline(*renderPass, "Render Pipeline") diff --git a/src/Backends/DirectX12/include/litefx/backends/dx12.hpp b/src/Backends/DirectX12/include/litefx/backends/dx12.hpp index 61691b161..7891a3d73 100644 --- a/src/Backends/DirectX12/include/litefx/backends/dx12.hpp +++ b/src/Backends/DirectX12/include/litefx/backends/dx12.hpp @@ -1556,10 +1556,10 @@ namespace LiteFX::Rendering::Backends { UniquePtr createIndexBuffer(const String& name, const DirectX12IndexBufferLayout& layout, BufferUsage usage, UInt32 elements) const override; /// - UniquePtr createAttachment(Format format, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const override; + UniquePtr createAttachment(const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const override; /// - UniquePtr createAttachment(const String& name, Format format, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const override; + UniquePtr createAttachment(const String& name, const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const override; /// UniquePtr createTexture(Format format, const Size3d& size, ImageDimensions dimension = ImageDimensions::DIM_2, UInt32 levels = 1, UInt32 layers = 1, MultiSamplingLevel samples = MultiSamplingLevel::x1, bool allowWrite = false) const override; diff --git a/src/Backends/DirectX12/src/factory.cpp b/src/Backends/DirectX12/src/factory.cpp index 3c714fa0a..8afce9fcf 100644 --- a/src/Backends/DirectX12/src/factory.cpp +++ b/src/Backends/DirectX12/src/factory.cpp @@ -174,15 +174,16 @@ UniquePtr DirectX12GraphicsFactory::createIndexBuffer(con return DirectX12IndexBuffer::allocate(name, layout, m_impl->m_allocator, elements, resourceDesc, allocationDesc); } -UniquePtr DirectX12GraphicsFactory::createAttachment(Format format, const Size2d& size, MultiSamplingLevel samples) const +UniquePtr DirectX12GraphicsFactory::createAttachment(const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples) const { - return this->createAttachment("", format, size, samples); + return this->createAttachment("", target, size, samples); } -UniquePtr DirectX12GraphicsFactory::createAttachment(const String& name, Format format, const Size2d& size, MultiSamplingLevel samples) const +UniquePtr DirectX12GraphicsFactory::createAttachment(const String& name, const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples) const { - auto width = std::max(1, size.width()); - auto height = std::max(1, size.height()); + const auto format = target.format(); + const auto width = std::max(1, size.width()); + const auto height = std::max(1, size.height()); D3D12_RESOURCE_DESC1 resourceDesc { }; resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; @@ -194,18 +195,25 @@ UniquePtr DirectX12GraphicsFactory::createAttachment(const Stri resourceDesc.Format = DX12::getFormat(format); resourceDesc.SampleDesc = samples == MultiSamplingLevel::x1 ? DXGI_SAMPLE_DESC{ 1, 0 } : DXGI_SAMPLE_DESC{ static_cast(samples), DXGI_STANDARD_MULTISAMPLE_QUALITY_PATTERN }; resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + if (target.allowStorage()) + resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + if (target.multiQueueAccess()) + resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS; D3D12MA::ALLOCATION_DESC allocationDesc { .HeapType = D3D12_HEAP_TYPE_DEFAULT }; if (::hasDepth(format) || ::hasStencil(format)) { - resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; return DirectX12Image::allocate(name, m_impl->m_device, m_impl->m_allocator, { width, height, 1 }, format, ImageDimensions::DIM_2, 1, 1, samples, false, ImageLayout::DepthRead, resourceDesc, allocationDesc); } else { - resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; - return DirectX12Image::allocate(name, m_impl->m_device, m_impl->m_allocator, { width, height, 1 }, format, ImageDimensions::DIM_2, 1, 1, samples, false, ImageLayout::Common, resourceDesc, allocationDesc); + resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + return DirectX12Image::allocate(name, m_impl->m_device, m_impl->m_allocator, { width, height, 1 }, format, ImageDimensions::DIM_2, 1, 1, samples, true, ImageLayout::Common, resourceDesc, allocationDesc); } } diff --git a/src/Backends/DirectX12/src/frame_buffer.cpp b/src/Backends/DirectX12/src/frame_buffer.cpp index f121e2a37..af51d44c0 100644 --- a/src/Backends/DirectX12/src/frame_buffer.cpp +++ b/src/Backends/DirectX12/src/frame_buffer.cpp @@ -84,7 +84,7 @@ class DirectX12FrameBuffer::DirectX12FrameBufferImpl : public Implement createIndexBuffer(const String& name, const VulkanIndexBufferLayout& layout, BufferUsage usage, UInt32 elements) const override; /// - UniquePtr createAttachment(Format format, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const override; + UniquePtr createAttachment(const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const override; /// - UniquePtr createAttachment(const String& name, Format format, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const override; + UniquePtr createAttachment(const String& name, const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const override; /// UniquePtr createTexture(Format format, const Size3d& size, ImageDimensions dimension = ImageDimensions::DIM_2, UInt32 levels = 1, UInt32 layers = 1, MultiSamplingLevel samples = MultiSamplingLevel::x1, bool allowWrite = false) const override; diff --git a/src/Backends/Vulkan/src/factory.cpp b/src/Backends/Vulkan/src/factory.cpp index 38c54f335..705d51bd5 100644 --- a/src/Backends/Vulkan/src/factory.cpp +++ b/src/Backends/Vulkan/src/factory.cpp @@ -229,15 +229,16 @@ UniquePtr VulkanGraphicsFactory::createIndexBuffer(const Str return buffer; } -UniquePtr VulkanGraphicsFactory::createAttachment(Format format, const Size2d& size, MultiSamplingLevel samples) const +UniquePtr VulkanGraphicsFactory::createAttachment(const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples) const { - return this->createAttachment("", format, size, samples); + return this->createAttachment("", target, size, samples); } -UniquePtr VulkanGraphicsFactory::createAttachment(const String& name, Format format, const Size2d& size, MultiSamplingLevel samples) const +UniquePtr VulkanGraphicsFactory::createAttachment(const String& name, const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples) const { - auto width = std::max(1, size.width()); - auto height = std::max(1, size.height()); + const auto format = target.format(); + const auto width = std::max(1, size.width()); + const auto height = std::max(1, size.height()); VkImageCreateInfo imageInfo{}; imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; @@ -251,12 +252,20 @@ UniquePtr VulkanGraphicsFactory::createAttachment(const String& na imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; imageInfo.samples = Vk::getSamples(samples); - imageInfo.usage = (::hasDepth(format) ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; + imageInfo.usage = (::hasDepth(format) || ::hasStencil(format) ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; - auto queueFamilies = m_impl->m_device.queueFamilyIndices() | std::ranges::to(); - imageInfo.sharingMode = VK_SHARING_MODE_CONCURRENT; - imageInfo.queueFamilyIndexCount = static_cast(queueFamilies.size()); - imageInfo.pQueueFamilyIndices = queueFamilies.data(); + if (target.allowStorage()) + imageInfo.usage |= VK_IMAGE_USAGE_STORAGE_BIT; + + if (!target.multiQueueAccess()) + imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + else + { + auto queueFamilies = m_impl->m_device.queueFamilyIndices() | std::ranges::to(); + imageInfo.sharingMode = VK_SHARING_MODE_CONCURRENT; + imageInfo.queueFamilyIndexCount = static_cast(queueFamilies.size()); + imageInfo.pQueueFamilyIndices = queueFamilies.data(); + } VmaAllocationCreateInfo allocInfo = {}; allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; diff --git a/src/Backends/Vulkan/src/frame_buffer.cpp b/src/Backends/Vulkan/src/frame_buffer.cpp index a264b1073..86a10853d 100644 --- a/src/Backends/Vulkan/src/frame_buffer.cpp +++ b/src/Backends/Vulkan/src/frame_buffer.cpp @@ -85,7 +85,7 @@ class VulkanFrameBuffer::VulkanFrameBufferImpl : public ImplementimageView()); m_renderTargetViews.push_back(image.get()); m_outputAttachments.push_back(std::move(image)); diff --git a/src/Rendering/include/litefx/rendering.hpp b/src/Rendering/include/litefx/rendering.hpp index 83cdd7fbe..7652176bc 100644 --- a/src/Rendering/include/litefx/rendering.hpp +++ b/src/Rendering/include/litefx/rendering.hpp @@ -1016,10 +1016,10 @@ namespace LiteFX::Rendering { virtual UniquePtr createIndexBuffer(const String& name, const index_buffer_layout_type& layout, BufferUsage usage, UInt32 elements) const = 0; /// - virtual UniquePtr createAttachment(Format format, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const = 0; + virtual UniquePtr createAttachment(const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const = 0; /// - virtual UniquePtr createAttachment(const String& name, Format format, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const = 0; + virtual UniquePtr createAttachment(const String& name, const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const = 0; /// virtual UniquePtr createTexture(Format format, const Size3d& size, ImageDimensions dimension = ImageDimensions::DIM_2, UInt32 levels = 1, UInt32 layers = 1, MultiSamplingLevel samples = MultiSamplingLevel::x1, bool allowWrite = false) const = 0; @@ -1064,12 +1064,12 @@ namespace LiteFX::Rendering { return this->createIndexBuffer(name, dynamic_cast(layout), usage, elements); } - inline UniquePtr getAttachment(Format format, const Size2d& size, MultiSamplingLevel samples) const override { - return this->createAttachment(format, size, samples); + inline UniquePtr getAttachment(const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples) const override { + return this->createAttachment(target, size, samples); } - inline UniquePtr getAttachment(const String& name, Format format, const Size2d& size, MultiSamplingLevel samples) const override { - return this->createAttachment(name, format, size, samples); + inline UniquePtr getAttachment(const String& name, const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples) const override { + return this->createAttachment(name, target, size, samples); } inline UniquePtr getTexture(Format format, const Size3d& size, ImageDimensions dimension, UInt32 levels, UInt32 layers, MultiSamplingLevel samples, bool allowWrite) const override { diff --git a/src/Rendering/include/litefx/rendering_api.hpp b/src/Rendering/include/litefx/rendering_api.hpp index 6d65d65d0..44b119693 100644 --- a/src/Rendering/include/litefx/rendering_api.hpp +++ b/src/Rendering/include/litefx/rendering_api.hpp @@ -774,6 +774,63 @@ namespace LiteFX::Rendering { Present = 0x00000004 }; + /// + /// Describes the behavior of render targets. + /// + enum class LITEFX_RENDERING_API RenderTargetFlags { + /// + /// No flags are enabled. + /// + None = 0x00, + + /// + /// If enabled, color or depth (depending on the render target type) are cleared when starting a render pass that renders to the render target. + /// + Clear = 0x01, + + /// + /// If enabled and the render target format supports stencil storage, the stencil part is cleared when the render pass that renders to the render target is started. + /// + ClearStencil = 0x02, + + /// + /// If enabled, the render target is discarded after ending the render pass. + /// + /// + /// When this flag is set, the render target storage is freed after the render pass has finished. The main use of this is to have depth/stencil targets on a render + /// pass that are only required during this render pass. It is not valid to attempt accessing the render target before or after the render pass. + /// + Volatile = 0x04, + + /// + /// If enabled, the render target is initialized with storage/unordered access enabled. + /// + /// + /// This flag is set, the render target image is created with storage/unordered access enabled. This allows the render target to be transitioned into a read/write + /// resource outside of the render pass. However, enabling this might be less efficient than creating a new texture and copying the render target into it on some + /// hardware. + /// + /// This flag must not be combined with depth/stencil formats. + /// + /// + AllowStorage = 0x08, + + /// + /// If enabled, the image can be used simultaneously from multiple queues. + /// + /// + /// If this flag is specified, the render target image is created with support for multi-queue access. This allows multiple queues to read from the image + /// simultaneously, as long as writes are properly synchronized using fences and barriers. If this flag is not specified, the render target image can only be + /// accessed by the queue that first uses the render target (which must be the queue that executes the render target). + /// + /// Note that it is currently not possible to transfer ownership between queues, so if multi-queue access is required, this flag must be specified when creating the + /// render target. + /// + /// This flag must not be combined with depth/stencil formats. + /// + Shared = 0x10 + }; + /// /// Describes the dimensions of a image resource, i.e. the dimensions that are required to access a texel or describe the image extent. /// @@ -1448,6 +1505,7 @@ namespace LiteFX::Rendering { LITEFX_DEFINE_FLAGS(ResourceAccess); LITEFX_DEFINE_FLAGS(BufferFormat); LITEFX_DEFINE_FLAGS(WriteMask); + LITEFX_DEFINE_FLAGS(RenderTargetFlags); #pragma endregion @@ -1997,6 +2055,12 @@ namespace LiteFX::Rendering { /// The type of the render target. virtual RenderTargetType type() const noexcept = 0; + /// + /// Returns the flags that control the behavior of the render target. + /// + /// The flags that control the behavior of the render target. + virtual RenderTargetFlags flags() const noexcept = 0; + /// /// Returns the internal format of the render target. /// @@ -2010,6 +2074,8 @@ namespace LiteFX::Rendering { /// true, if the render target should be cleared, when the render pass is started /// /// + /// + /// virtual bool clearBuffer() const noexcept = 0; /// @@ -2019,6 +2085,8 @@ namespace LiteFX::Rendering { /// true, if the render target stencil should be cleared, when the render pass is started /// /// + /// + /// virtual bool clearStencil() const noexcept = 0; /// @@ -2042,8 +2110,26 @@ namespace LiteFX::Rendering { /// the GPU memory again in the first place. /// /// true, if the target should not be made persistent for access after the render pass has finished. + /// + /// virtual bool isVolatile() const noexcept = 0; + /// + /// Return true, if the render target image can be used for storage/unordered access. + /// + /// true, if the render target image can be used for storage/unordered access. + /// + /// + virtual bool allowStorage() const noexcept = 0; + + /// + /// Return true, if the render target image can be accessed simultaneously from different queues. + /// + /// true, if the render target image can be accessed simultaneously from different queues. + /// + /// + virtual bool multiQueueAccess() const noexcept = 0; + /// /// Returns the render targets blend state. /// @@ -2067,26 +2153,22 @@ namespace LiteFX::Rendering { /// The location of the render target output attachment. /// The type of the render target. /// The format of the render target. - /// true, if the render target should be cleared, when a render pass is started. + /// The flags that control the behavior of the render target. /// The values with which the render target gets cleared. - /// true, if the render target stencil should be cleared, when a render pass is started. - /// true, if the target should not be made persistent for access after the render pass has finished. /// The render target blend state. - explicit RenderTarget(UInt32 location, RenderTargetType type, Format format, bool clearBuffer, const Vector4f& clearValues = { 0.f , 0.f, 0.f, 0.f }, bool clearStencil = true, bool isVolatile = false, const BlendState& blendState = {}); + explicit RenderTarget(UInt32 location, RenderTargetType type, Format format, RenderTargetFlags flags = RenderTargetFlags::None, const Vector4f& clearValues = { 0.f , 0.f, 0.f, 0.f }, const BlendState& blendState = {}); /// /// Initializes the render target. /// - /// The name of the render target. + /// The name of the render target. /// The location of the render target output attachment. /// The type of the render target. /// The format of the render target. - /// true, if the render target should be cleared, when a render pass is started. + /// The flags that control the behavior of the render target. /// The values with which the render target gets cleared. - /// true, if the render target stencil should be cleared, when a render pass is started. - /// true, if the target should not be made persistent for access after the render pass has finished. /// The render target blend state. - explicit RenderTarget(const String& name, UInt32 location, RenderTargetType type, Format format, bool clearBuffer, const Vector4f& clearValues = { 0.f , 0.f, 0.f, 0.f }, bool clearStencil = true, bool isVolatile = false, const BlendState& blendState = {}); + explicit RenderTarget(const String& name, UInt32 location, RenderTargetType type, Format format, RenderTargetFlags flags = RenderTargetFlags::None, const Vector4f& clearValues = { 0.f , 0.f, 0.f, 0.f }, const BlendState& blendState = {}); RenderTarget(const RenderTarget&) noexcept; RenderTarget(RenderTarget&&) noexcept; virtual ~RenderTarget() noexcept; @@ -2105,6 +2187,9 @@ namespace LiteFX::Rendering { /// RenderTargetType type() const noexcept override; + /// + RenderTargetFlags flags() const noexcept override; + /// Format format() const noexcept override; @@ -2120,6 +2205,12 @@ namespace LiteFX::Rendering { /// bool isVolatile() const noexcept override; + /// + bool allowStorage() const noexcept override; + + /// + bool multiQueueAccess() const noexcept override; + /// const BlendState& blendState() const noexcept override; }; @@ -5415,24 +5506,24 @@ namespace LiteFX::Rendering { /// /// Creates an image that is used as render target attachment. /// - /// The format of the image. + /// The render target description. /// The extent of the image. /// The number of samples, the image should be sampled with. /// The instance of the attachment image. - inline UniquePtr createAttachment(Format format, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const { - return this->getAttachment(format, size, samples); + inline UniquePtr createAttachment(const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const { + return this->getAttachment(target, size, samples); } /// /// Creates an image that is used as render target attachment. /// /// The name of the image. - /// The format of the image. + /// The render target description. /// The extent of the image. /// The number of samples, the image should be sampled with. /// The instance of the attachment image. - inline UniquePtr createAttachment(const String& name, Format format, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const { - return this->getAttachment(name, format, size, samples); + inline UniquePtr createAttachment(const String& name, const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples = MultiSamplingLevel::x1) const { + return this->getAttachment(name, target, size, samples); } /// @@ -5559,8 +5650,8 @@ namespace LiteFX::Rendering { virtual UniquePtr getVertexBuffer(const String& name, const IVertexBufferLayout& layout, BufferUsage usage, UInt32 elements) const = 0; virtual UniquePtr getIndexBuffer(const IIndexBufferLayout& layout, BufferUsage usage, UInt32 elements) const = 0; virtual UniquePtr getIndexBuffer(const String& name, const IIndexBufferLayout& layout, BufferUsage usage, UInt32 elements) const = 0; - virtual UniquePtr getAttachment(Format format, const Size2d& size, MultiSamplingLevel samples) const = 0; - virtual UniquePtr getAttachment(const String& name, Format format, const Size2d& size, MultiSamplingLevel samples) const = 0; + virtual UniquePtr getAttachment(const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples) const = 0; + virtual UniquePtr getAttachment(const String& name, const RenderTarget& target, const Size2d& size, MultiSamplingLevel samples) const = 0; virtual UniquePtr getTexture(Format format, const Size3d& size, ImageDimensions dimension, UInt32 levels, UInt32 layers, MultiSamplingLevel samples, bool allowWrite) const = 0; virtual UniquePtr getTexture(const String& name, Format format, const Size3d& size, ImageDimensions dimension, UInt32 levels, UInt32 layers, MultiSamplingLevel samples, bool allowWrite) const = 0; virtual Enumerable> getTextures(UInt32 elements, Format format, const Size3d& size, ImageDimensions dimension, UInt32 layers, UInt32 levels, MultiSamplingLevel samples, bool allowWrite) const = 0; diff --git a/src/Rendering/include/litefx/rendering_builders.hpp b/src/Rendering/include/litefx/rendering_builders.hpp index 2f342c77c..7aef33b2b 100644 --- a/src/Rendering/include/litefx/rendering_builders.hpp +++ b/src/Rendering/include/litefx/rendering_builders.hpp @@ -567,37 +567,37 @@ namespace LiteFX::Rendering { /// /// The polygon draw mode. /// - PolygonMode polygonMode; + PolygonMode polygonMode{ PolygonMode::Solid }; /// /// The polygon cull mode. /// - CullMode cullMode; + CullMode cullMode{ CullMode::BackFaces }; /// /// The polygon cull order. /// - CullOrder cullOrder; + CullOrder cullOrder{ CullOrder::ClockWise }; /// /// The line width, if line rasterization is supported. /// - Float lineWidth; + Float lineWidth{ 1.0f }; /// /// The depth bias state. /// - DepthStencilState::DepthBias depthBias; + DepthStencilState::DepthBias depthBias{ }; /// /// The depth state. /// - DepthStencilState::DepthState depthState; + DepthStencilState::DepthState depthState{ }; /// /// The stencil state. /// - DepthStencilState::StencilState stencilState; + DepthStencilState::StencilState stencilState{ }; } m_state; public: @@ -1414,13 +1414,11 @@ namespace LiteFX::Rendering { /// /// The type of the render target. /// The color format of the render target. + /// The flags that control the behavior of the render target. /// The fixed clear value for the render target. - /// true, if the render target color or depth should be cleared. - /// true, if the render target stencil should be cleared. - /// true to mark the render target as volatile, so is not required to be preserved after the render pass has ended. template - constexpr inline auto renderTarget(this TSelf&& self, RenderTargetType type, Format format, const Vector4f& clearValues = { 0.0f, 0.0f, 0.0f, 0.0f }, bool clearColor = true, bool clearStencil = true, bool isVolatile = false) -> TSelf& { - self.renderTarget("", static_cast(self.m_state.renderTargets.size()), type, format, clearValues, clearColor, clearStencil, isVolatile); + constexpr inline auto renderTarget(this TSelf&& self, RenderTargetType type, Format format, RenderTargetFlags flags = RenderTargetFlags::None, const Vector4f& clearValues = { 0.0f, 0.0f, 0.0f, 0.0f }) -> TSelf& { + self.renderTarget("", static_cast(self.m_state.renderTargets.size()), type, format, flags, clearValues); return self; } @@ -1430,13 +1428,11 @@ namespace LiteFX::Rendering { /// The name of the render target. /// The type of the render target. /// The color format of the render target. + /// The flags that control the behavior of the render target. /// The fixed clear value for the render target. - /// true, if the render target color or depth should be cleared. - /// true, if the render target stencil should be cleared. - /// true to mark the render target as volatile, so is not required to be preserved after the render pass has ended. template - constexpr inline auto renderTarget(this TSelf&& self, const String& name, RenderTargetType type, Format format, const Vector4f& clearValues = { 0.0f, 0.0f, 0.0f, 0.0f }, bool clearColor = true, bool clearStencil = true, bool isVolatile = false) -> TSelf& { - self.renderTarget(name, static_cast(self.m_state.renderTargets.size()), type, format, clearValues, clearColor, clearStencil, isVolatile); + constexpr inline auto renderTarget(this TSelf&& self, const String& name, RenderTargetType type, Format format, RenderTargetFlags flags = RenderTargetFlags::None, const Vector4f& clearValues = { 0.0f, 0.0f, 0.0f, 0.0f }) -> TSelf& { + self.renderTarget(name, static_cast(self.m_state.renderTargets.size()), type, format, flags, clearValues); return self; } @@ -1446,13 +1442,11 @@ namespace LiteFX::Rendering { /// The location of the render target. /// The type of the render target. /// The color format of the render target. + /// The flags that control the behavior of the render target. /// The fixed clear value for the render target. - /// true, if the render target color or depth should be cleared. - /// true, if the render target stencil should be cleared. - /// true to mark the render target as volatile, so is not required to be preserved after the render pass has ended. template - constexpr inline auto renderTarget(this TSelf&& self, UInt32 location, RenderTargetType type, Format format, const Vector4f& clearValues = { 0.0f, 0.0f, 0.0f, 0.0f }, bool clearColor = true, bool clearStencil = true, bool isVolatile = false) -> TSelf& { - self.renderTarget("", location, type, format, clearValues, clearColor, clearStencil, isVolatile); + constexpr inline auto renderTarget(this TSelf&& self, UInt32 location, RenderTargetType type, Format format, RenderTargetFlags flags = RenderTargetFlags::None, const Vector4f& clearValues = { 0.0f, 0.0f, 0.0f, 0.0f }) -> TSelf& { + self.renderTarget("", location, type, format, flags, clearValues); return self; } @@ -1462,14 +1456,12 @@ namespace LiteFX::Rendering { /// The name of the render target. /// The location of the render target. /// The type of the render target. + /// The flags that control the behavior of the render target. /// The color format of the render target. /// The fixed clear value for the render target. - /// true, if the render target color or depth should be cleared. - /// true, if the render target stencil should be cleared. - /// true to mark the render target as volatile, so is not required to be preserved after the render pass has ended. template - constexpr inline auto renderTarget(this TSelf&& self, const String& name, UInt32 location, RenderTargetType type, Format format, const Vector4f& clearValues = { 0.0f, 0.0f, 0.0f, 0.0f }, bool clearColor = true, bool clearStencil = true, bool isVolatile = false) -> TSelf& { - self.m_state.renderTargets.push_back(RenderTarget(name, location, type, format, clearColor, clearValues, clearStencil, isVolatile)); + constexpr inline auto renderTarget(this TSelf&& self, const String& name, UInt32 location, RenderTargetType type, Format format, RenderTargetFlags flags = RenderTargetFlags::None, const Vector4f& clearValues = { 0.0f, 0.0f, 0.0f, 0.0f }) -> TSelf& { + self.m_state.renderTargets.push_back(RenderTarget(name, location, type, format, flags, clearValues)); return self; } @@ -1479,13 +1471,11 @@ namespace LiteFX::Rendering { /// The input attachment mapping to map to. /// The type of the render target. /// The color format of the render target. + /// The flags that control the behavior of the render target. /// The fixed clear value for the render target. - /// true, if the render target color or depth should be cleared. - /// true, if the render target stencil should be cleared. - /// true to mark the render target as volatile, so is not required to be preserved after the render pass has ended. template - constexpr inline auto renderTarget(this TSelf&& self, input_attachment_mapping_type& output, RenderTargetType type, Format format, const Vector4f& clearValues = { 0.0f, 0.0f, 0.0f, 0.0f }, bool clearColor = true, bool clearStencil = true, bool isVolatile = false) -> TSelf& { - self.renderTarget("", output, static_cast(self.m_state.m_renderTargets.size()), type, format, clearValues, clearColor, clearStencil, isVolatile); + constexpr inline auto renderTarget(this TSelf&& self, input_attachment_mapping_type& output, RenderTargetType type, Format format, RenderTargetFlags flags = RenderTargetFlags::None, const Vector4f& clearValues = { 0.0f, 0.0f, 0.0f, 0.0f }) -> TSelf& { + self.renderTarget("", output, static_cast(self.m_state.m_renderTargets.size()), type, format, flags, clearValues); return self; } @@ -1496,13 +1486,11 @@ namespace LiteFX::Rendering { /// The input attachment mapping to map to. /// The type of the render target. /// The color format of the render target. + /// The flags that control the behavior of the render target. /// The fixed clear value for the render target. - /// true, if the render target color or depth should be cleared. - /// true, if the render target stencil should be cleared. - /// true to mark the render target as volatile, so is not required to be preserved after the render pass has ended. template - constexpr inline auto renderTarget(this TSelf&& self, const String& name, input_attachment_mapping_type& output, RenderTargetType type, Format format, const Vector4f& clearValues = { 0.0f, 0.0f, 0.0f, 0.0f }, bool clearColor = true, bool clearStencil = true, bool isVolatile = false) -> TSelf& { - self.renderTarget(name, output, static_cast(self.m_state.renderTargets.size()), type, format, clearValues, clearColor, clearStencil, isVolatile); + constexpr inline auto renderTarget(this TSelf&& self, const String& name, input_attachment_mapping_type& output, RenderTargetType type, Format format, RenderTargetFlags flags = RenderTargetFlags::None, const Vector4f& clearValues = { 0.0f, 0.0f, 0.0f, 0.0f }) -> TSelf& { + self.renderTarget(name, output, static_cast(self.m_state.renderTargets.size()), type, format, flags, clearValues); return self; } @@ -1513,13 +1501,11 @@ namespace LiteFX::Rendering { /// The location of the render target. /// The type of the render target. /// The color format of the render target. + /// The flags that control the behavior of the render target. /// The fixed clear value for the render target. - /// true, if the render target color or depth should be cleared. - /// true, if the render target stencil should be cleared. - /// true to mark the render target as volatile, so is not required to be preserved after the render pass has ended. template - constexpr inline auto renderTarget(this TSelf&& self, input_attachment_mapping_type& output, UInt32 location, RenderTargetType type, Format format, const Vector4f& clearValues = { 0.0f, 0.0f, 0.0f, 0.0f }, bool clearColor = true, bool clearStencil = true, bool isVolatile = false) -> TSelf& { - self.renderTarget("", output, location, type, format, clearValues, clearColor, clearStencil, isVolatile); + constexpr inline auto renderTarget(this TSelf&& self, input_attachment_mapping_type& output, UInt32 location, RenderTargetType type, Format format, RenderTargetFlags flags = RenderTargetFlags::None, const Vector4f& clearValues = { 0.0f, 0.0f, 0.0f, 0.0f }) -> TSelf& { + self.renderTarget("", output, location, type, format, flags, clearValues); return self; } diff --git a/src/Rendering/src/render_target.cpp b/src/Rendering/src/render_target.cpp index c7ce79bf5..bd4013f95 100644 --- a/src/Rendering/src/render_target.cpp +++ b/src/Rendering/src/render_target.cpp @@ -13,16 +13,21 @@ class RenderTarget::RenderTargetImpl : public Implement { private: RenderTargetType m_type = RenderTargetType::Color; Format m_format = Format::B8G8R8A8_SRGB; - bool m_clearBuffer = false, m_clearStencil = false, m_volatile = false; + RenderTargetFlags m_flags; Vector4f m_clearValues; UInt32 m_location; BlendState m_blendState; String m_name; public: - RenderTargetImpl(RenderTarget* parent, const String& name, UInt32 location, RenderTargetType type, Format format, bool clearBuffer, const Vector4f& clearValues, bool clearStencil, bool isVolatile, const BlendState& blendState) : - base(parent), m_name(name), m_location(location), m_type(type), m_format(format), m_clearBuffer(clearBuffer), m_clearValues(clearValues), m_clearStencil(clearStencil), m_volatile(isVolatile), m_blendState(blendState) + RenderTargetImpl(RenderTarget* parent, const String& name, UInt32 location, RenderTargetType type, Format format, RenderTargetFlags flags, const Vector4f& clearValues, const BlendState& blendState) : + base(parent), m_name(name), m_location(location), m_type(type), m_format(format), m_flags(flags), m_clearValues(clearValues), m_blendState(blendState) { + if ((::hasDepth(format) || ::hasStencil(format)) && LITEFX_FLAG_IS_SET(flags, RenderTargetFlags::AllowStorage)) [[unlikely]] + throw InvalidArgumentException("flags", "Depth/Stencil formats cannot be used for storage/unordered access."); + + if ((::hasDepth(format) || ::hasStencil(format)) && LITEFX_FLAG_IS_SET(flags, RenderTargetFlags::Shared)) [[unlikely]] + throw InvalidArgumentException("flags", "Depth/Stencil formats cannot be used shared between queues."); } }; @@ -31,27 +36,27 @@ class RenderTarget::RenderTargetImpl : public Implement { // ------------------------------------------------------------------------------------------------ RenderTarget::RenderTarget() noexcept : - RenderTarget(0, RenderTargetType::Color, Format::None, false, Vector4f{0.f, 0.f, 0.f, 0.f}, false, false, BlendState{}) + RenderTarget(0, RenderTargetType::Color, Format::None, RenderTargetFlags::None, Vector4f{0.f, 0.f, 0.f, 0.f}, BlendState{}) { } -RenderTarget::RenderTarget(UInt32 location, RenderTargetType type, Format format, bool clearBuffer, const Vector4f& clearValues, bool clearStencil, bool isVolatile, const BlendState& blendState) : - RenderTarget("", location, type, format, clearBuffer, clearValues, clearStencil, isVolatile, blendState) +RenderTarget::RenderTarget(UInt32 location, RenderTargetType type, Format format, RenderTargetFlags flags, const Vector4f& clearValues, const BlendState& blendState) : + RenderTarget("", location, type, format, flags, clearValues, blendState) { } -RenderTarget::RenderTarget(const String& name, UInt32 location, RenderTargetType type, Format format, bool clearBuffer, const Vector4f& clearValues, bool clearStencil, bool isVolatile, const BlendState& blendState) : - m_impl(makePimpl(this, name, location, type, format, clearBuffer, clearValues, clearStencil, isVolatile, blendState)) +RenderTarget::RenderTarget(const String& name, UInt32 location, RenderTargetType type, Format format, RenderTargetFlags flags, const Vector4f& clearValues, const BlendState& blendState) : + m_impl(makePimpl(this, name, location, type, format, flags, clearValues, blendState)) { } RenderTarget::RenderTarget(const RenderTarget& _other) noexcept : - m_impl(makePimpl(this, _other.name(), _other.location(), _other.type(), _other.format(), _other.clearBuffer(), _other.clearValues(), _other.clearStencil(), _other.isVolatile(), _other.blendState())) + m_impl(makePimpl(this, _other.name(), _other.location(), _other.type(), _other.format(), _other.flags(), _other.clearValues(), _other.blendState())) { } RenderTarget::RenderTarget(RenderTarget&& _other) noexcept : - m_impl(makePimpl(this, std::move(_other.m_impl->m_name), std::move(_other.m_impl->m_location), std::move(_other.m_impl->m_type), std::move(_other.m_impl->m_format), std::move(_other.m_impl->m_clearBuffer), std::move(_other.m_impl->m_clearValues), std::move(_other.m_impl->m_clearStencil), std::move(_other.m_impl->m_volatile), std::move(_other.m_impl->m_blendState))) + m_impl(makePimpl(this, std::move(_other.m_impl->m_name), std::move(_other.m_impl->m_location), std::move(_other.m_impl->m_type), std::move(_other.m_impl->m_format), std::move(_other.m_impl->m_flags), std::move(_other.m_impl->m_clearValues), std::move(_other.m_impl->m_blendState))) { } @@ -63,10 +68,8 @@ RenderTarget& RenderTarget::operator=(const RenderTarget& _other) noexcept m_impl->m_location = _other.m_impl->m_location; m_impl->m_type = _other.m_impl->m_type; m_impl->m_format = _other.m_impl->m_format; - m_impl->m_clearBuffer = _other.m_impl->m_clearBuffer; + m_impl->m_flags = _other.m_impl->m_flags; m_impl->m_clearValues = _other.m_impl->m_clearValues; - m_impl->m_clearStencil = _other.m_impl->m_clearStencil; - m_impl->m_volatile = _other.m_impl->m_volatile; m_impl->m_blendState = _other.m_impl->m_blendState; return *this; @@ -78,10 +81,8 @@ RenderTarget& RenderTarget::operator=(RenderTarget&& _other) noexcept m_impl->m_location = std::move(_other.m_impl->m_location); m_impl->m_type = std::move(_other.m_impl->m_type); m_impl->m_format = std::move(_other.m_impl->m_format); - m_impl->m_clearBuffer = std::move(_other.m_impl->m_clearBuffer); + m_impl->m_flags = std::move(_other.m_impl->m_flags); m_impl->m_clearValues = std::move(_other.m_impl->m_clearValues); - m_impl->m_clearStencil = std::move(_other.m_impl->m_clearStencil); - m_impl->m_volatile = std::move(_other.m_impl->m_volatile); m_impl->m_blendState = std::move(_other.m_impl->m_blendState); return *this; @@ -102,6 +103,11 @@ RenderTargetType RenderTarget::type() const noexcept return m_impl->m_type; } +RenderTargetFlags RenderTarget::flags() const noexcept +{ + return m_impl->m_flags; +} + Format RenderTarget::format() const noexcept { return m_impl->m_format; @@ -109,12 +115,12 @@ Format RenderTarget::format() const noexcept bool RenderTarget::clearBuffer() const noexcept { - return m_impl->m_clearBuffer; + return LITEFX_FLAG_IS_SET(m_impl->m_flags, RenderTargetFlags::Clear); } bool RenderTarget::clearStencil() const noexcept { - return m_impl->m_clearStencil; + return LITEFX_FLAG_IS_SET(m_impl->m_flags, RenderTargetFlags::ClearStencil); } const Vector4f& RenderTarget::clearValues() const noexcept @@ -124,7 +130,17 @@ const Vector4f& RenderTarget::clearValues() const noexcept bool RenderTarget::isVolatile() const noexcept { - return m_impl->m_volatile; + return LITEFX_FLAG_IS_SET(m_impl->m_flags, RenderTargetFlags::Volatile); +} + +bool RenderTarget::allowStorage() const noexcept +{ + return LITEFX_FLAG_IS_SET(m_impl->m_flags, RenderTargetFlags::AllowStorage); +} + +bool RenderTarget::multiQueueAccess() const noexcept +{ + return LITEFX_FLAG_IS_SET(m_impl->m_flags, RenderTargetFlags::Shared); } const IRenderTarget::BlendState& RenderTarget::blendState() const noexcept diff --git a/src/Samples/BasicRendering/src/sample.cpp b/src/Samples/BasicRendering/src/sample.cpp index 915cc1ae1..59b008510 100644 --- a/src/Samples/BasicRendering/src/sample.cpp +++ b/src/Samples/BasicRendering/src/sample.cpp @@ -65,8 +65,8 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA // Create a geometry render pass. UniquePtr renderPass = device->buildRenderPass("Opaque") - .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, {0.1f, 0.1f, 0.1f, 1.f}, true, false, false) - .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, {1.f, 0.f, 0.f, 0.f}, true, false, false); + .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, RenderTargetFlags::Clear, { 0.1f, 0.1f, 0.1f, 1.f }) + .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, RenderTargetFlags::Clear, { 1.f, 0.f, 0.f, 0.f }); // Create the shader program. SharedPtr shaderProgram = device->buildShaderProgram() diff --git a/src/Samples/Bindless/src/sample.cpp b/src/Samples/Bindless/src/sample.cpp index 7ef5504cd..35cb9dba9 100644 --- a/src/Samples/Bindless/src/sample.cpp +++ b/src/Samples/Bindless/src/sample.cpp @@ -95,8 +95,8 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA // Create a geometry render pass. UniquePtr renderPass = device->buildRenderPass("Opaque") - .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, {0.1f, 0.1f, 0.1f, 1.f}, true, false, false) - .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, {1.f, 0.f, 0.f, 0.f}, true, false, false); + .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, RenderTargetFlags::Clear, { 0.1f, 0.1f, 0.1f, 1.f }) + .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, RenderTargetFlags::Clear, { 1.f, 0.f, 0.f, 0.f }); // Create the shader program. SharedPtr shaderProgram = device->buildShaderProgram() diff --git a/src/Samples/Multisampling/src/sample.cpp b/src/Samples/Multisampling/src/sample.cpp index 90fd7948d..f138f6bb5 100644 --- a/src/Samples/Multisampling/src/sample.cpp +++ b/src/Samples/Multisampling/src/sample.cpp @@ -65,8 +65,8 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA // Create a geometry render pass. UniquePtr renderPass = device->buildRenderPass("Opaque", MultiSamplingLevel::x4) - .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, { 0.1f, 0.1f, 0.1f, 1.f }, true, false, false) - .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, {1.f, 0.f, 0.f, 0.f}, true, false, false); + .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, RenderTargetFlags::Clear, { 0.1f, 0.1f, 0.1f, 1.f }) + .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, RenderTargetFlags::Clear, { 1.f, 0.f, 0.f, 0.f }); // Create a shader program. SharedPtr shaderProgram = device->buildShaderProgram() diff --git a/src/Samples/Multithreading/src/sample.cpp b/src/Samples/Multithreading/src/sample.cpp index 234704da9..e9100b731 100644 --- a/src/Samples/Multithreading/src/sample.cpp +++ b/src/Samples/Multithreading/src/sample.cpp @@ -78,8 +78,8 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA // Create a geometry render pass. UniquePtr renderPass = device->buildRenderPass("Opaque", MultiSamplingLevel::x1, NUM_WORKERS) - .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, { 0.1f, 0.1f, 0.1f, 1.f }, true, false, false) - .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, {1.f, 0.f, 0.f, 0.f}, true, false, false); + .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, RenderTargetFlags::Clear, { 0.1f, 0.1f, 0.1f, 1.f }) + .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, RenderTargetFlags::Clear, { 1.f, 0.f, 0.f, 0.f }); // Create a shader program. SharedPtr shaderProgram = device->buildShaderProgram() diff --git a/src/Samples/PushConstants/src/sample.cpp b/src/Samples/PushConstants/src/sample.cpp index 09c5b3926..c4cdb2cd9 100644 --- a/src/Samples/PushConstants/src/sample.cpp +++ b/src/Samples/PushConstants/src/sample.cpp @@ -92,8 +92,8 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA // Create a geometry render pass. UniquePtr renderPass = device->buildRenderPass("Opaque") - .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, { 0.1f, 0.1f, 0.1f, 1.f }, true, false, false) - .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, { 1.f, 0.f, 0.f, 0.f }, true, false, false); + .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, RenderTargetFlags::Clear, { 0.1f, 0.1f, 0.1f, 1.f }) + .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, RenderTargetFlags::Clear, { 1.f, 0.f, 0.f, 0.f }); // Create a shader program. SharedPtr shaderProgram = device->buildShaderProgram() diff --git a/src/Samples/RenderPasses/src/sample.cpp b/src/Samples/RenderPasses/src/sample.cpp index 3f2be2c86..c1750879a 100644 --- a/src/Samples/RenderPasses/src/sample.cpp +++ b/src/Samples/RenderPasses/src/sample.cpp @@ -77,14 +77,14 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA // Create a geometry and lighting render passes. // NOTE: For Vulkan, input attachments need to be in a continuous range, starting at index 0. UniquePtr geometryPass = device->buildRenderPass("Geometry Pass") - .renderTarget("G-Buffer Color", 0, RenderTargetType::Color, Format::B8G8R8A8_UNORM, { 0.1f, 0.1f, 0.1f, 1.f }, true, false, false) - .renderTarget("G-Buffer Depth/Stencil", 1, RenderTargetType::DepthStencil, Format::D32_SFLOAT, { 1.f, 0.f, 0.f, 0.f }, true, true, false); + .renderTarget("G-Buffer Color", 0, RenderTargetType::Color, Format::B8G8R8A8_UNORM, RenderTargetFlags::Clear, { 0.1f, 0.1f, 0.1f, 1.f }) + .renderTarget("G-Buffer Depth/Stencil", 1, RenderTargetType::DepthStencil, Format::D32_SFLOAT, RenderTargetFlags::Clear | RenderTargetFlags::ClearStencil, { 1.f, 0.f, 0.f, 0.f }); UniquePtr lightingPass = device->buildRenderPass("Lighting Pass") .inputAttachment(0, *geometryPass, 0) // Map color attachment from geometry pass render target 0 to location 0. .inputAttachment(1, *geometryPass, 1) // Map depth/stencil attachment from geometry pass render target 1 to location 1. - .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, { 0.1f, 0.1f, 0.1f, 1.f }, true, false, false) - .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, { 1.f, 0.f, 0.f, 0.f }, true, true, false); + .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, RenderTargetFlags::Clear, { 0.1f, 0.1f, 0.1f, 1.f }) + .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, RenderTargetFlags::Clear | RenderTargetFlags::ClearStencil, { 1.f, 0.f, 0.f, 0.f }); // Create the shader programs. SharedPtr geometryPassShader = device->buildShaderProgram() diff --git a/src/Samples/Textures/src/sample.cpp b/src/Samples/Textures/src/sample.cpp index 3388ed652..1644a589e 100644 --- a/src/Samples/Textures/src/sample.cpp +++ b/src/Samples/Textures/src/sample.cpp @@ -70,8 +70,8 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA // Create a geometry render pass. UniquePtr renderPass = device->buildRenderPass("Opaque") - .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, { 0.1f, 0.1f, 0.1f, 1.f }, true, false, false) - .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, { 1.f, 0.f, 0.f, 0.f }, true, false, false); + .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, RenderTargetFlags::Clear, { 0.1f, 0.1f, 0.1f, 1.f }) + .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, RenderTargetFlags::Clear, { 1.f, 0.f, 0.f, 0.f }); // Create a shader program. SharedPtr shaderProgram = device->buildShaderProgram() diff --git a/src/Samples/UniformArrays/src/sample.cpp b/src/Samples/UniformArrays/src/sample.cpp index cdee7eb32..9372c4ddb 100644 --- a/src/Samples/UniformArrays/src/sample.cpp +++ b/src/Samples/UniformArrays/src/sample.cpp @@ -102,8 +102,8 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA // Create a geometry render pass. UniquePtr renderPass = device->buildRenderPass("Opaque") - .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, { 0.1f, 0.1f, 0.1f, 1.f }, true, false, false) - .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, { 1.f, 0.f, 0.f, 0.f }, true, false, false); + .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, RenderTargetFlags::Clear, { 0.1f, 0.1f, 0.1f, 1.f }) + .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, RenderTargetFlags::Clear, { 1.f, 0.f, 0.f, 0.f }); // Create a shader program. SharedPtr shaderProgram = device->buildShaderProgram() From fd3d916ea178ad9e29e80f19c469add7f937d17d Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Tue, 2 Jan 2024 11:49:38 +0100 Subject: [PATCH 12/38] Add render target flag to support transition into general image layouts. --- src/Backends/DirectX12/src/render_pass.cpp | 8 +++-- src/Backends/Vulkan/src/device.cpp | 1 + src/Backends/Vulkan/src/render_pass.cpp | 26 +++++++++----- .../include/litefx/rendering_api.hpp | 34 +++++++++++++++++-- src/Rendering/src/render_target.cpp | 5 +++ .../shaders/lighting_pass_fs.hlsl | 6 ++++ src/Samples/RenderPasses/src/sample.cpp | 5 +-- 7 files changed, 70 insertions(+), 15 deletions(-) diff --git a/src/Backends/DirectX12/src/render_pass.cpp b/src/Backends/DirectX12/src/render_pass.cpp index c44c6d44a..9ec47c686 100644 --- a/src/Backends/DirectX12/src/render_pass.cpp +++ b/src/Backends/DirectX12/src/render_pass.cpp @@ -60,6 +60,10 @@ class DirectX12RenderPass::DirectX12RenderPassImpl : public ImplementrenderTargetDescriptorSize()); diff --git a/src/Backends/Vulkan/src/device.cpp b/src/Backends/Vulkan/src/device.cpp index 7678417d5..2cd392c48 100644 --- a/src/Backends/Vulkan/src/device.cpp +++ b/src/Backends/Vulkan/src/device.cpp @@ -272,6 +272,7 @@ class VulkanDevice::VulkanDeviceImpl : public Implement { .descriptorBindingPartiallyBound = true, .descriptorBindingVariableDescriptorCount = true, .runtimeDescriptorArray = true, + .separateDepthStencilLayouts = true, .hostQueryReset = true, .timelineSemaphore = true }; diff --git a/src/Backends/Vulkan/src/render_pass.cpp b/src/Backends/Vulkan/src/render_pass.cpp index bf0529c47..b286c9727 100644 --- a/src/Backends/Vulkan/src/render_pass.cpp +++ b/src/Backends/Vulkan/src/render_pass.cpp @@ -93,17 +93,27 @@ class VulkanRenderPass::VulkanRenderPassImpl : public Implement(currentIndex), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }); attachments.push_back(attachment); break; case RenderTargetType::DepthStencil: if (::hasDepth(inputAttachment.renderTarget().format()) && ::hasStencil(inputAttachment.renderTarget().format())) [[likely]] - attachment.initialLayout = attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + { + attachment.initialLayout = inputAttachment.renderTarget().attachment() ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; + attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + } else if (::hasDepth(inputAttachment.renderTarget().format())) - attachment.initialLayout = attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL; + { + attachment.initialLayout = inputAttachment.renderTarget().attachment() ? VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL; + attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL; + } else if (::hasStencil(inputAttachment.renderTarget().format())) - attachment.initialLayout = attachment.finalLayout = VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL; + { + attachment.initialLayout = inputAttachment.renderTarget().attachment() ? VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL; + attachment.finalLayout = VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL; + } else [[unlikely]] { LITEFX_WARNING(VULKAN_LOG, "The depth/stencil input attachment at location {0} does not have a valid depth/stencil format ({1}). Falling back to VK_IMAGE_LAYOUT_GENERAL.", currentIndex, inputAttachment.renderTarget().format()); @@ -147,16 +157,16 @@ class VulkanRenderPass::VulkanRenderPassImpl : public Implement(currentIndex + inputAttachments.size()), attachment.finalLayout }); break; case RenderTargetType::DepthStencil: if (::hasDepth(renderTarget.format()) || ::hasStencil(renderTarget.format())) [[likely]] - attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachment.finalLayout = renderTarget.attachment() ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; else if (::hasDepth(renderTarget.format())) - attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL; + attachment.finalLayout = renderTarget.attachment() ? VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL; else if (::hasStencil(renderTarget.format())) - attachment.finalLayout = VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL; + attachment.finalLayout = renderTarget.attachment() ? VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL; else [[unlikely]] { LITEFX_WARNING(VULKAN_LOG, "The depth/stencil render target at location {0} does not have a valid depth/stencil format ({1}). Falling back to VK_IMAGE_LAYOUT_GENERAL.", currentIndex, renderTarget.format()); diff --git a/src/Rendering/include/litefx/rendering_api.hpp b/src/Rendering/include/litefx/rendering_api.hpp index 44b119693..6c4fe4038 100644 --- a/src/Rendering/include/litefx/rendering_api.hpp +++ b/src/Rendering/include/litefx/rendering_api.hpp @@ -828,7 +828,24 @@ namespace LiteFX::Rendering { /// /// This flag must not be combined with depth/stencil formats. /// - Shared = 0x10 + Shared = 0x10, + + /// + /// If enabled and supported, the render target will transition into an optimized attachment layout instead of a general image layout. + /// + /// + /// In Vulkan render passes are more elaborate compared to DirectX, which does not have any concept similar to "attachments". In DirectX all attachments are regular + /// images and later render passes use them as they would with any image in a shader. In Vulkan however, there is a special set of instructions to load data from + /// input attachments. If a render target is mapped to a input attachment and should use optimized layouts, this flag can be specified to enable it. Note, that it + /// might require to provide different shaders based on which backend is used. The shader targets created by the engine will define a macro (`DXIL`/`SPIRV`) to + /// support targeting different backends. + /// + /// If attachments are unsupported, creating a render pass with a render target that has this flag enabled, a warning will be issued to remind you to pay attention + /// to the differences in the backends. + /// + /// This flag is ignored for present targets. + /// + Attachment = 0x20 }; /// @@ -2115,7 +2132,7 @@ namespace LiteFX::Rendering { virtual bool isVolatile() const noexcept = 0; /// - /// Return true, if the render target image can be used for storage/unordered access. + /// Returns true, if the render target image can be used for storage/unordered access. /// /// true, if the render target image can be used for storage/unordered access. /// @@ -2123,13 +2140,21 @@ namespace LiteFX::Rendering { virtual bool allowStorage() const noexcept = 0; /// - /// Return true, if the render target image can be accessed simultaneously from different queues. + /// Returns true, if the render target image can be accessed simultaneously from different queues. /// /// true, if the render target image can be accessed simultaneously from different queues. /// /// virtual bool multiQueueAccess() const noexcept = 0; + /// + /// Returns true, if the render target should transition into an optimized attachment layout, rather than a general image layout after executing the render pass. + /// + /// true, if the render target should transition into an optimized attachment layout. + /// + /// + virtual bool attachment() const noexcept = 0; + /// /// Returns the render targets blend state. /// @@ -2211,6 +2236,9 @@ namespace LiteFX::Rendering { /// bool multiQueueAccess() const noexcept override; + /// + bool attachment() const noexcept override; + /// const BlendState& blendState() const noexcept override; }; diff --git a/src/Rendering/src/render_target.cpp b/src/Rendering/src/render_target.cpp index bd4013f95..c1c0ba3a2 100644 --- a/src/Rendering/src/render_target.cpp +++ b/src/Rendering/src/render_target.cpp @@ -143,6 +143,11 @@ bool RenderTarget::multiQueueAccess() const noexcept return LITEFX_FLAG_IS_SET(m_impl->m_flags, RenderTargetFlags::Shared); } +bool RenderTarget::attachment() const noexcept +{ + return LITEFX_FLAG_IS_SET(m_impl->m_flags, RenderTargetFlags::Attachment); +} + const IRenderTarget::BlendState& RenderTarget::blendState() const noexcept { return m_impl->m_blendState; diff --git a/src/Samples/RenderPasses/shaders/lighting_pass_fs.hlsl b/src/Samples/RenderPasses/shaders/lighting_pass_fs.hlsl index 100804938..22613885f 100644 --- a/src/Samples/RenderPasses/shaders/lighting_pass_fs.hlsl +++ b/src/Samples/RenderPasses/shaders/lighting_pass_fs.hlsl @@ -25,6 +25,12 @@ FragmentData main(VertexData input) { FragmentData fragment; + // NOTE: Unfortunately, specifying static samplers in the shader is currently not supported by dxc (see https://github.com/microsoft/DirectXShaderCompiler/issues/4137). This would + // allow us to use the same code path for both backends without where the input attachments are sampled. In this case the `RenderTargetFlags::Attachment` should not be provided + // and `SubpassLoad` should not be used for the Vulkan backend. As an alternative the static sampler can be provided from the render pipeline description instead of the root + // signature. However, in both cases whilst the code path would be the same, we would lose on the possible optimization from input attachment formats, which is only supported + // in Vulkan and demonstrated in this sample. Note that it is still possible to use input attachments with less optimal image layouts (i.e., without the `Attachment` flag). + #ifdef SPIRV fragment.Color = gDiffuse.SubpassLoad(); fragment.Depth = gDepth.SubpassLoad(); diff --git a/src/Samples/RenderPasses/src/sample.cpp b/src/Samples/RenderPasses/src/sample.cpp index c1750879a..af7c88212 100644 --- a/src/Samples/RenderPasses/src/sample.cpp +++ b/src/Samples/RenderPasses/src/sample.cpp @@ -76,9 +76,10 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA // Create a geometry and lighting render passes. // NOTE: For Vulkan, input attachments need to be in a continuous range, starting at index 0. + // NOTE: RenderTargetFlags::Attachment is not required UniquePtr geometryPass = device->buildRenderPass("Geometry Pass") - .renderTarget("G-Buffer Color", 0, RenderTargetType::Color, Format::B8G8R8A8_UNORM, RenderTargetFlags::Clear, { 0.1f, 0.1f, 0.1f, 1.f }) - .renderTarget("G-Buffer Depth/Stencil", 1, RenderTargetType::DepthStencil, Format::D32_SFLOAT, RenderTargetFlags::Clear | RenderTargetFlags::ClearStencil, { 1.f, 0.f, 0.f, 0.f }); + .renderTarget("G-Buffer Color", 0, RenderTargetType::Color, Format::B8G8R8A8_UNORM, RenderTargetFlags::Clear | RenderTargetFlags::Attachment, { 0.1f, 0.1f, 0.1f, 1.f }) + .renderTarget("G-Buffer Depth/Stencil", 1, RenderTargetType::DepthStencil, Format::D32_SFLOAT, RenderTargetFlags::Clear | RenderTargetFlags::ClearStencil | RenderTargetFlags::Attachment, { 1.f, 0.f, 0.f, 0.f }); UniquePtr lightingPass = device->buildRenderPass("Lighting Pass") .inputAttachment(0, *geometryPass, 0) // Map color attachment from geometry pass render target 0 to location 0. From d3ccc1d8b578b6a21de917dc9a5f1260b402e4ce Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Tue, 2 Jan 2024 11:53:11 +0100 Subject: [PATCH 13/38] Allow sampling on general layout attachments. --- src/Backends/Vulkan/src/factory.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Backends/Vulkan/src/factory.cpp b/src/Backends/Vulkan/src/factory.cpp index 705d51bd5..3229798c2 100644 --- a/src/Backends/Vulkan/src/factory.cpp +++ b/src/Backends/Vulkan/src/factory.cpp @@ -254,6 +254,9 @@ UniquePtr VulkanGraphicsFactory::createAttachment(const String& na imageInfo.samples = Vk::getSamples(samples); imageInfo.usage = (::hasDepth(format) || ::hasStencil(format) ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; + if (!target.attachment()) + imageInfo.usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + if (target.allowStorage()) imageInfo.usage |= VK_IMAGE_USAGE_STORAGE_BIT; From 3583ac326343ab5fc69d323b52c738fdec2d6944 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Tue, 2 Jan 2024 11:54:43 +0100 Subject: [PATCH 14/38] Fix scope of queue family indices. --- src/Backends/Vulkan/src/factory.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Backends/Vulkan/src/factory.cpp b/src/Backends/Vulkan/src/factory.cpp index 3229798c2..6ccea09a1 100644 --- a/src/Backends/Vulkan/src/factory.cpp +++ b/src/Backends/Vulkan/src/factory.cpp @@ -260,11 +260,12 @@ UniquePtr VulkanGraphicsFactory::createAttachment(const String& na if (target.allowStorage()) imageInfo.usage |= VK_IMAGE_USAGE_STORAGE_BIT; + auto queueFamilies = m_impl->m_device.queueFamilyIndices() | std::ranges::to(); + if (!target.multiQueueAccess()) imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; else { - auto queueFamilies = m_impl->m_device.queueFamilyIndices() | std::ranges::to(); imageInfo.sharingMode = VK_SHARING_MODE_CONCURRENT; imageInfo.queueFamilyIndexCount = static_cast(queueFamilies.size()); imageInfo.pQueueFamilyIndices = queueFamilies.data(); From f1be1b7a62961f3c6d05d96bd993d052e7f0c246 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Tue, 2 Jan 2024 14:51:45 +0100 Subject: [PATCH 15/38] Don't care about previous layout. --- src/Backends/Vulkan/src/command_buffer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Backends/Vulkan/src/command_buffer.cpp b/src/Backends/Vulkan/src/command_buffer.cpp index 94181abbb..4ab9636f1 100644 --- a/src/Backends/Vulkan/src/command_buffer.cpp +++ b/src/Backends/Vulkan/src/command_buffer.cpp @@ -278,7 +278,7 @@ void VulkanCommandBuffer::transfer(IVulkanBuffer& source, IVulkanImage& target, target.resolveSubresource(subresource, plane, layer, level); if (static_cast(target).layout(subresource) != ImageLayout::CopyDestination) - barrier.transition(target, level, 1, layer, 1, plane, ResourceAccess::None, ResourceAccess::TransferWrite, ImageLayout::CopyDestination); + barrier.transition(target, level, 1, layer, 1, plane, ResourceAccess::None, ResourceAccess::TransferWrite, ImageLayout::Undefined, ImageLayout::CopyDestination); return VkBufferImageCopy { .bufferOffset = source.alignedElementSize() * sourceElement, @@ -322,7 +322,7 @@ void VulkanCommandBuffer::transfer(IVulkanImage& source, IVulkanImage& target, U // barrier.transition(source, sourceLayer, 1, sourceLevel, 1, sourcePlane, ResourceAccess::None, ResourceAccess::TransferRead, ImageLayout::CopySource); if (static_cast(target).layout(targetSubresource) != ImageLayout::CopyDestination) - barrier.transition(target, targetLayer, 1, targetLevel, 1, targetPlane, ResourceAccess::None, ResourceAccess::TransferWrite, ImageLayout::CopyDestination); + barrier.transition(target, targetLayer, 1, targetLevel, 1, targetPlane, ResourceAccess::None, ResourceAccess::TransferWrite, ImageLayout::Undefined, ImageLayout::CopyDestination); return VkImageCopy { .srcSubresource = VkImageSubresourceLayers { From b0e94e2a6ecbb777759636dcc60f27696db9de33 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Tue, 2 Jan 2024 14:52:04 +0100 Subject: [PATCH 16/38] Render targets can be transfer destination/sources. --- src/Backends/Vulkan/src/factory.cpp | 12 +++++++++--- src/Backends/Vulkan/src/swapchain.cpp | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/Backends/Vulkan/src/factory.cpp b/src/Backends/Vulkan/src/factory.cpp index 6ccea09a1..fd8dd7b00 100644 --- a/src/Backends/Vulkan/src/factory.cpp +++ b/src/Backends/Vulkan/src/factory.cpp @@ -252,14 +252,20 @@ UniquePtr VulkanGraphicsFactory::createAttachment(const String& na imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; imageInfo.samples = Vk::getSamples(samples); - imageInfo.usage = (::hasDepth(format) || ::hasStencil(format) ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; + imageInfo.usage = ::hasDepth(format) || ::hasStencil(format) ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - if (!target.attachment()) - imageInfo.usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + if (target.attachment()) + imageInfo.usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; + else + imageInfo.usage |= VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; if (target.allowStorage()) imageInfo.usage |= VK_IMAGE_USAGE_STORAGE_BIT; + // Present targets should also allow copying. + if (target.allowStorage()) + imageInfo.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + auto queueFamilies = m_impl->m_device.queueFamilyIndices() | std::ranges::to(); if (!target.multiQueueAccess()) diff --git a/src/Backends/Vulkan/src/swapchain.cpp b/src/Backends/Vulkan/src/swapchain.cpp index fc455e63b..d63cc4845 100644 --- a/src/Backends/Vulkan/src/swapchain.cpp +++ b/src/Backends/Vulkan/src/swapchain.cpp @@ -78,7 +78,7 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { createInfo.surface = surface; createInfo.minImageCount = images; createInfo.imageArrayLayers = 1; - createInfo.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + createInfo.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; createInfo.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; createInfo.imageFormat = Vk::getFormat(selectedFormat); createInfo.imageColorSpace = this->findColorSpace(adapter, surface, selectedFormat); @@ -566,7 +566,7 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { .arrayLayers = 1, .samples = VK_SAMPLE_COUNT_1_BIT, .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE }; From 0f879b468d84d51899e5c56a609652989998986d Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Tue, 2 Jan 2024 14:52:30 +0100 Subject: [PATCH 17/38] Switch post-processing example from blur to RGB to grayscale conversion. --- src/Samples/Compute/CMakeLists.txt | 12 +- .../Compute/shaders/compute_blur_cs.hlsl | 48 ------- .../Compute/shaders/compute_lum_cs.hlsl | 13 ++ src/Samples/Compute/src/sample.cpp | 133 +++++++----------- 4 files changed, 71 insertions(+), 135 deletions(-) delete mode 100644 src/Samples/Compute/shaders/compute_blur_cs.hlsl create mode 100644 src/Samples/Compute/shaders/compute_lum_cs.hlsl diff --git a/src/Samples/Compute/CMakeLists.txt b/src/Samples/Compute/CMakeLists.txt index 22c3ba6c1..13bdb3c55 100644 --- a/src/Samples/Compute/CMakeLists.txt +++ b/src/Samples/Compute/CMakeLists.txt @@ -67,16 +67,16 @@ IF(BUILD_VULKAN_BACKEND) ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Geom.FS SOURCE "shaders/compute_geom_fs.hlsl" LANGUAGE HLSL TYPE FRAGMENT COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Pres.VS SOURCE "shaders/compute_present_vs.hlsl" LANGUAGE HLSL TYPE VERTEX COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Pres.FS SOURCE "shaders/compute_present_fs.hlsl" LANGUAGE HLSL TYPE FRAGMENT COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) - ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Blur.CS SOURCE "shaders/compute_blur_cs.hlsl" LANGUAGE HLSL TYPE COMPUTE COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.Lumi.CS SOURCE "shaders/compute_lum_cs.hlsl" LANGUAGE HLSL TYPE COMPUTE COMPILE_AS SPIRV SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.Geom.VS PROPERTIES FOLDER "Samples/Shaders/Vulkan") SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.Geom.FS PROPERTIES FOLDER "Samples/Shaders/Vulkan") SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.Pres.VS PROPERTIES FOLDER "Samples/Shaders/Vulkan") SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.Pres.FS PROPERTIES FOLDER "Samples/Shaders/Vulkan") - SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.Blur.CS PROPERTIES FOLDER "Samples/Shaders/Vulkan") + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.Lumi.CS PROPERTIES FOLDER "Samples/Shaders/Vulkan") TARGET_LINK_SHADERS(${PROJECT_NAME} INSTALL_DESTINATION "${CMAKE_INSTALL_BINARY_DIR}/${SHADER_DEFAULT_SUBDIR}" - SHADERS ${PROJECT_NAME}.Vk.Shaders.Geom.VS ${PROJECT_NAME}.Vk.Shaders.Geom.FS ${PROJECT_NAME}.Vk.Shaders.Blur.CS SHADERS ${PROJECT_NAME}.Vk.Shaders.Pres.VS ${PROJECT_NAME}.Vk.Shaders.Pres.FS + SHADERS ${PROJECT_NAME}.Vk.Shaders.Geom.VS ${PROJECT_NAME}.Vk.Shaders.Geom.FS ${PROJECT_NAME}.Vk.Shaders.Lumi.CS SHADERS ${PROJECT_NAME}.Vk.Shaders.Pres.VS ${PROJECT_NAME}.Vk.Shaders.Pres.FS ) ENDIF(BUILD_VULKAN_BACKEND) @@ -87,16 +87,16 @@ IF(BUILD_DIRECTX_12_BACKEND) ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Geom.PS SOURCE "shaders/compute_geom_fs.hlsl" LANGUAGE HLSL TYPE PIXEL COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Pres.VS SOURCE "shaders/compute_present_vs.hlsl" LANGUAGE HLSL TYPE VERTEX COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Pres.PS SOURCE "shaders/compute_present_fs.hlsl" LANGUAGE HLSL TYPE PIXEL COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) - ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Blur.CS SOURCE "shaders/compute_blur_cs.hlsl" LANGUAGE HLSL TYPE COMPUTE COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.Lumi.CS SOURCE "shaders/compute_lum_cs.hlsl" LANGUAGE HLSL TYPE COMPUTE COMPILE_AS DXIL SHADER_MODEL ${BUILD_HLSL_SHADER_MODEL} COMPILER DXC) SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.Geom.VS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.Geom.PS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.Pres.VS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.Pres.PS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") - SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.Blur.CS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.Lumi.CS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") TARGET_LINK_SHADERS(${PROJECT_NAME} INSTALL_DESTINATION "${CMAKE_INSTALL_BINARY_DIR}/${SHADER_DEFAULT_SUBDIR}" - SHADERS ${PROJECT_NAME}.Dx.Shaders.Geom.VS ${PROJECT_NAME}.Dx.Shaders.Geom.PS ${PROJECT_NAME}.Dx.Shaders.Blur.CS SHADERS ${PROJECT_NAME}.Dx.Shaders.Pres.VS ${PROJECT_NAME}.Dx.Shaders.Pres.PS + SHADERS ${PROJECT_NAME}.Dx.Shaders.Geom.VS ${PROJECT_NAME}.Dx.Shaders.Geom.PS ${PROJECT_NAME}.Dx.Shaders.Lumi.CS SHADERS ${PROJECT_NAME}.Dx.Shaders.Pres.VS ${PROJECT_NAME}.Dx.Shaders.Pres.PS ) ENDIF(BUILD_DIRECTX_12_BACKEND) diff --git a/src/Samples/Compute/shaders/compute_blur_cs.hlsl b/src/Samples/Compute/shaders/compute_blur_cs.hlsl deleted file mode 100644 index 4bef4629b..000000000 --- a/src/Samples/Compute/shaders/compute_blur_cs.hlsl +++ /dev/null @@ -1,48 +0,0 @@ -#pragma pack_matrix(row_major) - -#define KERNEL_SIZE 8 // NOTE: Must be even! -#define SIGMA 10 - -RWTexture2D FrameBuffer : register(u0, space0); - -// Evaluates gaussian bell curve for a value n. -float gauss(float sigma, int n) -{ - return exp(-pow(n, 2.0) / (2.0 * pow(sigma, 2.0))); -} - -[numthreads(8, 8, 1)] -void main(uint3 id : SV_DispatchThreadID) -{ - int width, height; - FrameBuffer.GetDimensions(width, height); - - // Compute 1D gaussian kernel. - float kernel[KERNEL_SIZE + 1]; - float weight = 0.0; - - [unroll((KERNEL_SIZE / 2) + 1)] - for (int n = 0; n <= KERNEL_SIZE / 2; ++n) - kernel[KERNEL_SIZE / 2 + n] = kernel[KERNEL_SIZE / 2 - n] = gauss(SIGMA, n); - - [unroll(KERNEL_SIZE + 1)] - for (int s = 0; s < KERNEL_SIZE + 1; ++s) - weight += kernel[s]; - - // Compute weighted color. - float3 color = float3(0.0, 0.0, 0.0); - - for (int x = -KERNEL_SIZE / 2; x <= KERNEL_SIZE / 2; ++x) - { - [unroll(KERNEL_SIZE + 1)] - for (int y = -KERNEL_SIZE / 2; y <= KERNEL_SIZE / 2; ++y) - { - // NOTE: This samples pixels on the edges of the frame buffer multiple times, which is inaccurate but serves the purpose of demonstration. - float coefficient = kernel[(KERNEL_SIZE / 2) + x] * kernel[(KERNEL_SIZE / 2) + y]; - int2 sampleLocation = int2(max(min(id.x + x, width), 0), max(min(id.y + y, height), 0)); - color += FrameBuffer.Load(sampleLocation).rgb; - } - } - - FrameBuffer[id.xy] = float4(color / pow(weight, 2.0), 1.0); -} \ No newline at end of file diff --git a/src/Samples/Compute/shaders/compute_lum_cs.hlsl b/src/Samples/Compute/shaders/compute_lum_cs.hlsl new file mode 100644 index 000000000..9d29e6272 --- /dev/null +++ b/src/Samples/Compute/shaders/compute_lum_cs.hlsl @@ -0,0 +1,13 @@ +#pragma pack_matrix(row_major) + +RWTexture2D FrameBuffer : register(u0, space0); + +[numthreads(8, 8, 1)] +void main(uint3 id : SV_DispatchThreadID) +{ + // Read the color at the current position and compute luminosity. + float3 color = FrameBuffer.Load(id.xy).rgb; + float Y = 0.2126 * color.r + 0.7152 * color.g + 0.0722 * color.b; + + FrameBuffer[id.xy] = float4(Y, Y, Y, 1.0); +} \ No newline at end of file diff --git a/src/Samples/Compute/src/sample.cpp b/src/Samples/Compute/src/sample.cpp index 70ab8128e..f47689c4a 100644 --- a/src/Samples/Compute/src/sample.cpp +++ b/src/Samples/Compute/src/sample.cpp @@ -66,8 +66,8 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA // Create a geometry render pass. UniquePtr renderPass = device->buildRenderPass("Opaque") - .renderTarget("Color Target", RenderTargetType::Color, Format::R8G8B8A8_UNORM, {0.1f, 0.1f, 0.1f, 1.f}, true, false, false) - .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, {1.f, 0.f, 0.f, 0.f}, true, false, false); + .renderTarget("Color Target", RenderTargetType::Color, Format::R8G8B8A8_UNORM, RenderTargetFlags::Clear | RenderTargetFlags::Shared | RenderTargetFlags::AllowStorage, { 0.1f, 0.1f, 0.1f, 1.f }) + .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, RenderTargetFlags::Clear, { 1.f, 0.f, 0.f, 0.f }); // Create the shader program. SharedPtr shaderProgram = device->buildShaderProgram() @@ -85,44 +85,24 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA .layout(shaderProgram->reflectPipelineLayout()) .shaderProgram(shaderProgram); - // Create the blur shader program. - SharedPtr blurProgram = device->buildShaderProgram() - .withComputeShaderModule("shaders/compute_blur_cs." + FileExtensions::SHADER); + // Create the post-processing shader program. + SharedPtr postProgram = device->buildShaderProgram() + .withComputeShaderModule("shaders/compute_lum_cs." + FileExtensions::SHADER); // RGB -> Luminosity // Create a compute pipeline. - UniquePtr blurPipeline = device->buildComputePipeline("Blur") - .layout(blurProgram->reflectPipelineLayout()) - .shaderProgram(blurProgram); + UniquePtr postPipeline = device->buildComputePipeline("Post") + .layout(postProgram->reflectPipelineLayout()) + .shaderProgram(postProgram); // Build a present render pass. UniquePtr presentPass = device->buildRenderPass("Present") - .renderTarget("Present Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, { 0.0f, 0.0f, 0.0f, 1.f }, false, false, false); - - // Create a shader program for resolving the blurred image. - SharedPtr presentProgram = device->buildShaderProgram() - .withVertexShaderModule("shaders/compute_present_vs." + FileExtensions::SHADER) - .withFragmentShaderModule("shaders/compute_present_fs." + FileExtensions::SHADER); - - // Create a render pipeline for presentation. - SharedPtr screenQuadAssembler = device->buildInputAssembler() - .topology(PrimitiveTopology::TriangleStrip); - - UniquePtr presentPipeline = device->buildRenderPipeline(*presentPass, "Resolve") - .inputAssembler(device->buildInputAssembler() - .topology(PrimitiveTopology::TriangleStrip)) - .rasterizer(device->buildRasterizer() - .polygonMode(PolygonMode::Solid) - .cullMode(CullMode::Disabled) - .cullOrder(CullOrder::ClockWise)) - .layout(presentProgram->reflectPipelineLayout()) - .shaderProgram(presentProgram); + .renderTarget("Present Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, RenderTargetFlags::None, { 0.0f, 0.0f, 0.0f, 1.f }); // Add the resources to the device state. device->state().add(std::move(renderPass)); device->state().add(std::move(presentPass)); device->state().add(std::move(renderPipeline)); - device->state().add(std::move(blurPipeline)); - device->state().add(std::move(presentPipeline)); + device->state().add(std::move(postPipeline)); } void SampleApp::initBuffers(IRenderBackend* backend) @@ -169,14 +149,15 @@ void SampleApp::initBuffers(IRenderBackend* backend) { { .resource = *transformBuffer, .firstElement = 2, .elements = 1 } } }); - // Allocate bindings for the blur pass and presentation. - auto& blurPipeline = m_device->state().pipeline("Blur"); - auto& blurInputLayout = blurPipeline.layout()->descriptorSet(0); - auto blurBindings = blurInputLayout.allocate({ { } }); - - auto& presentPipeline = m_device->state().pipeline("Resolve"); - auto& presentInputLayout = presentPipeline.layout()->descriptorSet(0); - auto presentBindings = presentInputLayout.allocate({ { } }); + // Allocate bindings for the post-processing pass. + auto& renderPass = m_device->state().renderPass("Opaque"); + auto& postPipeline = m_device->state().pipeline("Post"); + auto& postInputLayout = postPipeline.layout()->descriptorSet(0); + auto postBindings = postInputLayout.allocateMultiple(3, { + { { .resource = renderPass.frameBuffer(0).image(0) } }, + { { .resource = renderPass.frameBuffer(1).image(0) } }, + { { .resource = renderPass.frameBuffer(2).image(0) } } + }); // Add everything to the state. m_device->state().add(std::move(vertexBuffer)); @@ -184,8 +165,7 @@ void SampleApp::initBuffers(IRenderBackend* backend) m_device->state().add(std::move(cameraBuffer)); m_device->state().add(std::move(transformBuffer)); m_device->state().add("Camera Bindings", std::move(cameraBindings)); - m_device->state().add("Blur Bindings", std::move(blurBindings)); - m_device->state().add("Present Bindings", std::move(presentBindings)); + std::ranges::for_each(postBindings, [this, i = 0](auto& binding) mutable { m_device->state().add(fmt::format("Post Bindings {0}", i++), std::move(binding)); }); std::ranges::for_each(transformBindings, [this, i = 0](auto& binding) mutable { m_device->state().add(fmt::format("Transform Bindings {0}", i++), std::move(binding)); }); } @@ -411,18 +391,18 @@ void SampleApp::drawFrame() // Query state. For performance reasons, those state variables should be cached for more complex applications, instead of looking them up every frame. auto& renderPass = m_device->state().renderPass("Opaque"); auto& presentPass = m_device->state().renderPass("Present"); - auto& blurPipeline = m_device->state().pipeline("Blur"); + auto& postPipeline = m_device->state().pipeline("Post"); auto& geometryPipeline = m_device->state().pipeline("Geometry"); - auto& resolvePipeline = m_device->state().pipeline("Resolve"); auto& transformBuffer = m_device->state().buffer("Transform"); auto& cameraBindings = m_device->state().descriptorSet("Camera Bindings"); - auto& blurBindings = m_device->state().descriptorSet("Blur Bindings"); - auto& presentBindings = m_device->state().descriptorSet("Present Bindings"); + auto& postBindings = m_device->state().descriptorSet(fmt::format("Post Bindings {0}", backBuffer)); auto& transformBindings = m_device->state().descriptorSet(fmt::format("Transform Bindings {0}", backBuffer)); auto& vertexBuffer = m_device->state().vertexBuffer("Vertex Buffer"); auto& indexBuffer = m_device->state().indexBuffer("Index Buffer"); // Draw geometry. + UInt64 geometryFence = 0; + { // Wait for all transfers to finish. renderPass.commandQueue().waitFor(m_device->defaultQueue(QueueType::Transfer), m_transferFence); @@ -452,7 +432,7 @@ void SampleApp::drawFrame() // Draw the object and present the frame by ending the render pass. commandBuffer->drawIndexed(indexBuffer.elements()); - renderPass.end(); + geometryFence = renderPass.end(); } // Perform post processing on compute queue. @@ -461,63 +441,54 @@ void SampleApp::drawFrame() { // Create a command buffer. auto commandBuffer = m_device->defaultQueue(QueueType::Compute).createCommandBuffer(true); - commandBuffer->use(blurPipeline); + commandBuffer->use(postPipeline); // Get the image from the back buffer of the geometry pass. auto& frameBuffer = renderPass.frameBuffer(backBuffer); auto& image = frameBuffer.image(0); // Create a barrier that handles image transition. - auto barrier = m_device->makeBarrier(PipelineStage::Fragment, PipelineStage::Compute); - barrier->transition(image, ResourceAccess::RenderTarget, ResourceAccess::ShaderReadWrite, ImageLayout::ReadWrite); + // NOTE: Since we did not specify the `RenderTargetFlags::Attachment` flag for the render target during pipeline creation, the render target is in `Common` layout and only needs + // transitioning into a writeable state. + auto barrier = m_device->makeBarrier(PipelineStage::None, PipelineStage::Compute); + barrier->transition(image, ResourceAccess::None, ResourceAccess::ShaderReadWrite, ImageLayout::Common, ImageLayout::ReadWrite); commandBuffer->barrier(*barrier); // Bind the image to the texture descriptor. - blurBindings.update(0, image); - commandBuffer->bind(blurBindings); + commandBuffer->bind(postBindings); - // Dispatch the blur pass. + // Dispatch the post-processing pass. commandBuffer->dispatch({ static_cast(image.extent().x()), static_cast(image.extent().y()), 1 }); + // After post-processing, transition the image back into a state where it can be copied from. + barrier = m_device->makeBarrier(PipelineStage::Compute, PipelineStage::None); + barrier->transition(image, ResourceAccess::ShaderReadWrite, ResourceAccess::None, ImageLayout::CopySource); + commandBuffer->barrier(*barrier); + // Submit the command buffer. - //m_device->defaultQueue(QueueType::Compute).waitFor(renderPass.commandQueue(), frameBuffer.lastFence()); + m_device->defaultQueue(QueueType::Compute).waitFor(renderPass.commandQueue(), geometryFence); postProcessFence = commandBuffer->submit(); - - // NOTE: Since the queues might have different priorities, we have to wait for the dispatch either later by using a barrier, or explicitly somewhere. Otherwise more - // command buffers will be allocated than actually being processed. } // Execute present pass. { - presentPass.begin(backBuffer); - auto commandBuffer = presentPass.activeFrameBuffer().commandBuffer(0); - commandBuffer->use(resolvePipeline); - commandBuffer->setViewports(m_viewport.get()); - commandBuffer->setScissors(m_scissor.get()); - - // Get the image from the back buffer of the geometry pass, as it is the one that was previously handled in the compute queue. - auto& frameBuffer = renderPass.frameBuffer(backBuffer); - auto& image = frameBuffer.image(0); - - // Transition the image back to a shader resource. - auto barrier = m_device->makeBarrier(PipelineStage::Compute, PipelineStage::Vertex); - barrier->transition(image, ResourceAccess::ShaderReadWrite, ResourceAccess::ShaderRead, ImageLayout::ShaderResource); + // Copy the post-processed image into the render target. + // NOTE: This implicitly transitions the image into `CopyDestination` layout. + auto& queue = presentPass.commandQueue(); + auto commandBuffer = queue.createCommandBuffer(true); + commandBuffer->transfer(renderPass.frameBuffer(backBuffer).image(0), presentPass.frameBuffer(backBuffer).image(0)); + + // Transition the image back into `Present` layout. + auto barrier = m_device->makeBarrier(PipelineStage::Transfer, PipelineStage::Resolve); + barrier->transition(presentPass.frameBuffer(backBuffer).image(0), ResourceAccess::TransferWrite, ResourceAccess::ResolveRead, ImageLayout::CopyDestination, ImageLayout::Present); commandBuffer->barrier(*barrier); - // Bind the image to the - presentBindings.update(0, image); - commandBuffer->bind(presentBindings); - - // Draw 4 instances of "nothing" to create the screen quad. - commandBuffer->draw(0, 4); + // Wait for the compute queue to finish before performing the transfer. + queue.waitFor(m_device->defaultQueue(QueueType::Compute), postProcessFence); + queue.submit(commandBuffer); - // Important: transition the image back to a render target, for the next iteration of this back buffer to be able to render into it. - barrier = m_device->makeBarrier(PipelineStage::Fragment, PipelineStage::Fragment); - barrier->transition(image, ResourceAccess::ShaderRead, ResourceAccess::RenderTarget, ImageLayout::RenderTarget); - commandBuffer->barrier(*barrier); - - // End the render pass in order to present the image. - //presentPass.commandQueue().waitFor(m_device->defaultQueue(QueueType::Compute), postProcessFence); + // Begin and immediately end the present pass (that does not do any actual work except presenting on end). + presentPass.begin(backBuffer); presentPass.end(); } } \ No newline at end of file From 17235eefaa1d6793c86ef27f9fecb3a5eadc748e Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Tue, 2 Jan 2024 16:04:18 +0100 Subject: [PATCH 18/38] Fix documentation markup. --- src/Rendering/include/litefx/rendering_api.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Rendering/include/litefx/rendering_api.hpp b/src/Rendering/include/litefx/rendering_api.hpp index 6c4fe4038..6c0b52d68 100644 --- a/src/Rendering/include/litefx/rendering_api.hpp +++ b/src/Rendering/include/litefx/rendering_api.hpp @@ -1989,7 +1989,7 @@ namespace LiteFX::Rendering { /// /// Represents a render target, i.e. an abstract view of the output of an . - /// + /// /// /// A render target represents one output of a render pass, stored within an . It is contained by a , that contains /// the , that stores the actual render target image resource. From 667b41856a4c6dd48e709e66acfe1e2f26178201 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Tue, 2 Jan 2024 17:04:07 +0100 Subject: [PATCH 19/38] Don't use footprints for image to image copies. --- src/Backends/DirectX12/src/command_buffer.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Backends/DirectX12/src/command_buffer.cpp b/src/Backends/DirectX12/src/command_buffer.cpp index 5ac9de989..a4283c0a7 100644 --- a/src/Backends/DirectX12/src/command_buffer.cpp +++ b/src/Backends/DirectX12/src/command_buffer.cpp @@ -281,13 +281,9 @@ void DirectX12CommandBuffer::transfer(IDirectX12Image& source, IDirectX12Image& if (target.elements() < targetSubresource + subresources) [[unlikely]] throw ArgumentOutOfRangeException("targetElement", "The target image has only {0} sub-resources, but a transfer for {1} sub-resources starting from sub-resources {2} has been requested.", target.elements(), subresources, targetSubresource); - D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint; - const auto& targetDesc = std::as_const(target).handle()->GetDesc(); - for (int sr(0); sr < subresources; ++sr) { - m_impl->m_queue.device().handle()->GetCopyableFootprints(&targetDesc, sourceSubresource + sr, 1, 0, &footprint, nullptr, nullptr, nullptr); - CD3DX12_TEXTURE_COPY_LOCATION sourceLocation(std::as_const(source).handle().Get(), footprint), targetLocation(std::as_const(target).handle().Get(), targetSubresource + sr); + CD3DX12_TEXTURE_COPY_LOCATION sourceLocation(std::as_const(source).handle().Get(), sourceSubresource + sr), targetLocation(std::as_const(target).handle().Get(), targetSubresource + sr); this->handle()->CopyTextureRegion(&targetLocation, 0, 0, 0, &sourceLocation, nullptr); } } From 009ed6969a6ecdcb28e3264c7506ee1b01930be4 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Tue, 2 Jan 2024 17:04:48 +0100 Subject: [PATCH 20/38] No longer implicitly barrier on transfers. --- src/Backends/Vulkan/src/command_buffer.cpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/Backends/Vulkan/src/command_buffer.cpp b/src/Backends/Vulkan/src/command_buffer.cpp index 4ab9636f1..5b1f9d922 100644 --- a/src/Backends/Vulkan/src/command_buffer.cpp +++ b/src/Backends/Vulkan/src/command_buffer.cpp @@ -269,17 +269,11 @@ void VulkanCommandBuffer::transfer(IVulkanBuffer& source, IVulkanImage& target, if (target.elements() < firstSubresource + elements) [[unlikely]] throw ArgumentOutOfRangeException("targetElement", "The target image has only {0} sub-resources, but a transfer for {1} elements starting from element {2} has been requested.", target.elements(), elements, firstSubresource); - // Create a copy command and add it to the command buffer. - VulkanBarrier barrier(PipelineStage::None, PipelineStage::Transfer); - Array copyInfos(elements); std::ranges::generate(copyInfos, [&, this, i = firstSubresource]() mutable { UInt32 subresource = i++, layer = 0, level = 0, plane = 0; target.resolveSubresource(subresource, plane, layer, level); - if (static_cast(target).layout(subresource) != ImageLayout::CopyDestination) - barrier.transition(target, level, 1, layer, 1, plane, ResourceAccess::None, ResourceAccess::TransferWrite, ImageLayout::Undefined, ImageLayout::CopyDestination); - return VkBufferImageCopy { .bufferOffset = source.alignedElementSize() * sourceElement, .bufferRowLength = 0, @@ -295,7 +289,6 @@ void VulkanCommandBuffer::transfer(IVulkanBuffer& source, IVulkanImage& target, }; }); - this->barrier(barrier); ::vkCmdCopyBufferToImage(this->handle(), std::as_const(source).handle(), std::as_const(target).handle(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast(copyInfos.size()), copyInfos.data()); } @@ -307,9 +300,6 @@ void VulkanCommandBuffer::transfer(IVulkanImage& source, IVulkanImage& target, U if (target.elements() < targetSubresource + subresources) [[unlikely]] throw ArgumentOutOfRangeException("targetElement", "The target image has only {0} sub-resources, but a transfer for {1} sub-resources starting from sub-resources {2} has been requested.", target.elements(), subresources, targetSubresource); - // Create a copy command and add it to the command buffer. - VulkanBarrier barrier(PipelineStage::None, PipelineStage::Transfer); - Array copyInfos(subresources); std::ranges::generate(copyInfos, [&, this, i = 0]() mutable { UInt32 sourceRsc = sourceSubresource + i, sourceLayer = 0, sourceLevel = 0, sourcePlane = 0; @@ -318,12 +308,6 @@ void VulkanCommandBuffer::transfer(IVulkanImage& source, IVulkanImage& target, U target.resolveSubresource(targetRsc, targetLayer, targetLevel, targetPlane); i++; - //if (static_cast(source).layout(sourceSubresource) != ImageLayout::CopySource) - // barrier.transition(source, sourceLayer, 1, sourceLevel, 1, sourcePlane, ResourceAccess::None, ResourceAccess::TransferRead, ImageLayout::CopySource); - - if (static_cast(target).layout(targetSubresource) != ImageLayout::CopyDestination) - barrier.transition(target, targetLayer, 1, targetLevel, 1, targetPlane, ResourceAccess::None, ResourceAccess::TransferWrite, ImageLayout::Undefined, ImageLayout::CopyDestination); - return VkImageCopy { .srcSubresource = VkImageSubresourceLayers { .aspectMask = source.aspectMask(sourcePlane), @@ -343,7 +327,6 @@ void VulkanCommandBuffer::transfer(IVulkanImage& source, IVulkanImage& target, U }; }); - this->barrier(barrier); ::vkCmdCopyImage(this->handle(), std::as_const(source).handle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, std::as_const(target).handle(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast(copyInfos.size()), copyInfos.data()); } From 07d8dc69dff31739a4ebdf05f9e9f5718e3c6fa0 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Tue, 2 Jan 2024 17:05:49 +0100 Subject: [PATCH 21/38] Fix validation errors for sample. --- src/Backends/DirectX12/src/factory.cpp | 4 ++-- src/Backends/DirectX12/src/render_pass.cpp | 23 ++++++++++++++++------ src/Samples/Compute/src/sample.cpp | 13 +++++++++--- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/Backends/DirectX12/src/factory.cpp b/src/Backends/DirectX12/src/factory.cpp index 8afce9fcf..0b8e9316a 100644 --- a/src/Backends/DirectX12/src/factory.cpp +++ b/src/Backends/DirectX12/src/factory.cpp @@ -200,8 +200,8 @@ UniquePtr DirectX12GraphicsFactory::createAttachment(const Stri if (target.allowStorage()) resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - if (target.multiQueueAccess()) - resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS; + //if (target.multiQueueAccess()) + // resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS; D3D12MA::ALLOCATION_DESC allocationDesc { .HeapType = D3D12_HEAP_TYPE_DEFAULT }; diff --git a/src/Backends/DirectX12/src/render_pass.cpp b/src/Backends/DirectX12/src/render_pass.cpp index 9ec47c686..15e86b19c 100644 --- a/src/Backends/DirectX12/src/render_pass.cpp +++ b/src/Backends/DirectX12/src/render_pass.cpp @@ -322,8 +322,10 @@ void DirectX12RenderPass::begin(UInt32 buffer) if (renderTarget.type() == RenderTargetType::DepthStencil) depthStencilBarrier.transition(image, ResourceAccess::DepthStencilRead, ResourceAccess::DepthStencilWrite, ImageLayout::DepthWrite); - else - renderTargetBarrier.transition(image, ResourceAccess::ShaderRead, ResourceAccess::RenderTarget, ImageLayout::RenderTarget); + else //if (!renderTarget.multiQueueAccess()) + renderTargetBarrier.transition(image, ResourceAccess::None, ResourceAccess::RenderTarget, ImageLayout::Undefined, ImageLayout::RenderTarget); + //else // Resources with simultaneous access enabled don't need to be transitioned. + // renderTargetBarrier.transition(image, ResourceAccess::ShaderRead, ResourceAccess::RenderTarget, ImageLayout::Common); }); beginCommandBuffer->barrier(renderTargetBarrier); @@ -373,16 +375,25 @@ UInt64 DirectX12RenderPass::end() const DirectX12Barrier renderTargetBarrier(PipelineStage::RenderTarget, PipelineStage::Fragment), depthStencilBarrier(PipelineStage::DepthStencil, PipelineStage::DepthStencil), resolveBarrier(PipelineStage::RenderTarget, PipelineStage::Resolve), presentBarrier(PipelineStage::RenderTarget, PipelineStage::None); std::ranges::for_each(m_impl->m_renderTargets, [&](const RenderTarget& renderTarget) { + //if (renderTarget.multiQueueAccess()) + // return; // Resources with simultaneous access enabled don't need to be transitioned. + switch (renderTarget.type()) { default: - case RenderTargetType::Color: return renderTargetBarrier.transition(const_cast(frameBuffer->image(renderTarget.location())), ResourceAccess::RenderTarget, ResourceAccess::ShaderRead, ImageLayout::ShaderResource); - case RenderTargetType::DepthStencil: return depthStencilBarrier.transition(const_cast(frameBuffer->image(renderTarget.location())), ResourceAccess::DepthStencilWrite, ResourceAccess::DepthStencilRead, ImageLayout::DepthRead); + case RenderTargetType::Color: + renderTargetBarrier.transition(const_cast(frameBuffer->image(renderTarget.location())), ResourceAccess::RenderTarget, ResourceAccess::ShaderRead, renderTarget.attachment() ? ImageLayout::ShaderResource : ImageLayout::Common); + break; + case RenderTargetType::DepthStencil: + depthStencilBarrier.transition(const_cast(frameBuffer->image(renderTarget.location())), ResourceAccess::DepthStencilWrite, ResourceAccess::DepthStencilRead, ImageLayout::DepthRead); + break; case RenderTargetType::Present: if (requiresResolve) - return resolveBarrier.transition(const_cast(frameBuffer->image(renderTarget.location())), ResourceAccess::RenderTarget, ResourceAccess::ResolveRead, ImageLayout::ResolveSource); + resolveBarrier.transition(const_cast(frameBuffer->image(renderTarget.location())), ResourceAccess::RenderTarget, ResourceAccess::ResolveRead, ImageLayout::ResolveSource); else - return presentBarrier.transition(const_cast(frameBuffer->image(renderTarget.location())), ResourceAccess::RenderTarget, ResourceAccess::None, ImageLayout::Present); + presentBarrier.transition(const_cast(frameBuffer->image(renderTarget.location())), ResourceAccess::RenderTarget, ResourceAccess::None, ImageLayout::Present); + + break; } }); diff --git a/src/Samples/Compute/src/sample.cpp b/src/Samples/Compute/src/sample.cpp index f47689c4a..69aa05752 100644 --- a/src/Samples/Compute/src/sample.cpp +++ b/src/Samples/Compute/src/sample.cpp @@ -66,7 +66,7 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA // Create a geometry render pass. UniquePtr renderPass = device->buildRenderPass("Opaque") - .renderTarget("Color Target", RenderTargetType::Color, Format::R8G8B8A8_UNORM, RenderTargetFlags::Clear | RenderTargetFlags::Shared | RenderTargetFlags::AllowStorage, { 0.1f, 0.1f, 0.1f, 1.f }) + .renderTarget("Color Target", RenderTargetType::Color, Format::B8G8R8A8_UNORM, RenderTargetFlags::Clear | RenderTargetFlags::Shared | RenderTargetFlags::AllowStorage, { 0.1f, 0.1f, 0.1f, 1.f }) .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, RenderTargetFlags::Clear, { 1.f, 0.f, 0.f, 0.f }); // Create the shader program. @@ -476,11 +476,18 @@ void SampleApp::drawFrame() // NOTE: This implicitly transitions the image into `CopyDestination` layout. auto& queue = presentPass.commandQueue(); auto commandBuffer = queue.createCommandBuffer(true); + + // Transition the image back into `CopyDestination` layout. + auto barrier = m_device->makeBarrier(PipelineStage::None, PipelineStage::Transfer); + barrier->transition(presentPass.frameBuffer(backBuffer).image(0), ResourceAccess::None, ResourceAccess::TransferWrite, ImageLayout::Undefined, ImageLayout::CopyDestination); + commandBuffer->barrier(*barrier); + + // Copy the image. commandBuffer->transfer(renderPass.frameBuffer(backBuffer).image(0), presentPass.frameBuffer(backBuffer).image(0)); // Transition the image back into `Present` layout. - auto barrier = m_device->makeBarrier(PipelineStage::Transfer, PipelineStage::Resolve); - barrier->transition(presentPass.frameBuffer(backBuffer).image(0), ResourceAccess::TransferWrite, ResourceAccess::ResolveRead, ImageLayout::CopyDestination, ImageLayout::Present); + barrier = m_device->makeBarrier(PipelineStage::Transfer, PipelineStage::Resolve); + barrier->transition(presentPass.frameBuffer(backBuffer).image(0), ResourceAccess::TransferWrite, ResourceAccess::Common, ImageLayout::CopyDestination, ImageLayout::Present); commandBuffer->barrier(*barrier); // Wait for the compute queue to finish before performing the transfer. From 1902aa23ab7d0f21e0e288cfabb34d6e397a1fe2 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Wed, 3 Jan 2024 11:01:03 +0100 Subject: [PATCH 22/38] Rebind frame buffers after resize. --- src/Samples/Compute/src/sample.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Samples/Compute/src/sample.cpp b/src/Samples/Compute/src/sample.cpp index 69aa05752..969b3b7ec 100644 --- a/src/Samples/Compute/src/sample.cpp +++ b/src/Samples/Compute/src/sample.cpp @@ -281,6 +281,12 @@ void SampleApp::onResize(const void* sender, ResizeEventArgs e) m_device->state().renderPass("Opaque").resizeFrameBuffers(renderArea); m_device->state().renderPass("Present").resizeFrameBuffers(renderArea); + // Update the post-processing bindings that reference the "opaque" frame buffer. + auto opaqueFrameBuffers = m_device->state().renderPass("Opaque").frameBuffers(); + + for (size_t i{ 0 }; auto& frameBuffer : opaqueFrameBuffers) + m_device->state().descriptorSet(fmt::format("Post Bindings {0}", i++)).update(0, frameBuffer->image(0)); + // Also resize viewport and scissor. m_viewport->setRectangle(RectF(0.f, 0.f, static_cast(e.width()), static_cast(e.height()))); m_scissor->setRectangle(RectF(0.f, 0.f, static_cast(e.width()), static_cast(e.height()))); From a149af5283b1a5b38b572cef1f6391d911d0ccef Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Wed, 3 Jan 2024 11:55:43 +0100 Subject: [PATCH 23/38] Support multi-sampled UAV bindings. --- src/Backends/DirectX12/src/descriptor_set.cpp | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/Backends/DirectX12/src/descriptor_set.cpp b/src/Backends/DirectX12/src/descriptor_set.cpp index 385aab840..eec78c3e7 100644 --- a/src/Backends/DirectX12/src/descriptor_set.cpp +++ b/src/Backends/DirectX12/src/descriptor_set.cpp @@ -319,15 +319,31 @@ void DirectX12DescriptorSet::update(UInt32 binding, const IDirectX12Image& textu break; case ImageDimensions::DIM_2: - if (texture.layers() == 1) + if (texture.samples() == MultiSamplingLevel::x1) { - textureView.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; - textureView.Texture2D = { .MipSlice = firstLevel, .PlaneSlice = 0 }; + if (texture.layers() == 1) + { + textureView.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + textureView.Texture2D = { .MipSlice = firstLevel, .PlaneSlice = 0 }; + } + else + { + textureView.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + textureView.Texture2DArray = { .MipSlice = firstLevel, .FirstArraySlice = firstLayer, .ArraySize = numLayers, .PlaneSlice = 0 }; + } } else { - textureView.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; - textureView.Texture2DArray = { .MipSlice = firstLevel, .FirstArraySlice = firstLayer, .ArraySize = numLayers, .PlaneSlice = 0 }; + if (texture.layers() == 1) + { + textureView.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DMS; + textureView.Texture2D = { .MipSlice = firstLevel, .PlaneSlice = 0 }; + } + else + { + textureView.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DMSARRAY; + textureView.Texture2DArray = { .MipSlice = firstLevel, .FirstArraySlice = firstLayer, .ArraySize = numLayers, .PlaneSlice = 0 }; + } } break; From dabc63409f2dbe7a0fdeee1433d77f728cf840d2 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Thu, 4 Jan 2024 12:27:41 +0100 Subject: [PATCH 24/38] Improve runtime exception readability. --- src/Backends/DirectX12/include/litefx/backends/dx12_api.hpp | 2 +- src/Backends/Vulkan/include/litefx/backends/vulkan_api.hpp | 2 +- src/Core/include/litefx/exceptions.hpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Backends/DirectX12/include/litefx/backends/dx12_api.hpp b/src/Backends/DirectX12/include/litefx/backends/dx12_api.hpp index 71b0bcd33..8bf2098ab 100644 --- a/src/Backends/DirectX12/include/litefx/backends/dx12_api.hpp +++ b/src/Backends/DirectX12/include/litefx/backends/dx12_api.hpp @@ -297,7 +297,7 @@ namespace LiteFX::Rendering::Backends { /// The error code returned by the operation. /// The error message. explicit DX12PlatformException(HRESULT result, StringView message) noexcept : - m_code(result), m_error(result), RuntimeException("{2} {1} (HRESULT 0x{0:08X})", message, static_cast(result), m_error.ErrorMessage()) { } + m_code(result), m_error(result), RuntimeException("{2} {1} (HRESULT 0x{0:08X})", static_cast(result), m_error.ErrorMessage(), message) { } /// /// Initializes a new exception. diff --git a/src/Backends/Vulkan/include/litefx/backends/vulkan_api.hpp b/src/Backends/Vulkan/include/litefx/backends/vulkan_api.hpp index 0aff94140..559de3048 100644 --- a/src/Backends/Vulkan/include/litefx/backends/vulkan_api.hpp +++ b/src/Backends/Vulkan/include/litefx/backends/vulkan_api.hpp @@ -342,7 +342,7 @@ namespace LiteFX::Rendering::Backends { /// The error code returned by the operation. /// The error message. explicit VulkanPlatformException(VkResult result, StringView message) noexcept : - m_code(result), RuntimeException("{1} Operation returned {0}.", message, result) { } + m_code(result), RuntimeException("{1} Operation returned {0}.", result, message) { } /// /// Initializes a new exception. diff --git a/src/Core/include/litefx/exceptions.hpp b/src/Core/include/litefx/exceptions.hpp index feddf4dc9..4d42cf7f4 100644 --- a/src/Core/include/litefx/exceptions.hpp +++ b/src/Core/include/litefx/exceptions.hpp @@ -237,7 +237,7 @@ namespace LiteFX { /// /// The error message. explicit RuntimeException(std::string_view message) noexcept : - Exception(fmt::format("The operation could not be executed. {}", message), std::source_location::current(), std::stacktrace::current()) { } + Exception(fmt::format("The operation could not be executed: {}", message), std::source_location::current(), std::stacktrace::current()) { } /// /// Initializes a new exception. @@ -246,7 +246,7 @@ namespace LiteFX { /// The arguments passed to the error message format string. template explicit RuntimeException(std::string_view format, TArgs&&... args) noexcept : - Exception(fmt::format("The operation could not be executed. {}", fmt::vformat(format, fmt::make_format_args(args...))), std::source_location::current(), std::stacktrace::current()) { } + Exception(fmt::format("The operation could not be executed: {}", fmt::vformat(format, fmt::make_format_args(args...))), std::source_location::current(), std::stacktrace::current()) { } RuntimeException(const RuntimeException&) = delete; RuntimeException(RuntimeException&&) = delete; From 9e15152717346d635964128f801a58221cdf04fd Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Thu, 4 Jan 2024 12:28:16 +0100 Subject: [PATCH 25/38] Improve documentation on required extensions. --- src/Backends/Vulkan/src/device.cpp | 35 ++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/Backends/Vulkan/src/device.cpp b/src/Backends/Vulkan/src/device.cpp index 2cd392c48..48af7282a 100644 --- a/src/Backends/Vulkan/src/device.cpp +++ b/src/Backends/Vulkan/src/device.cpp @@ -151,6 +151,10 @@ class VulkanDevice::VulkanDeviceImpl : public Implement { private: void defineMandatoryExtensions() noexcept { + // NOTE: If an extension is not supported, update the graphics driver to the most recent one. You can lookup extension support for individual drivers here: + // https://vulkan.gpuinfo.org/listdevicescoverage.php?extension=VK_KHR_present_wait (replace the extension name to adjust the filter). + + // Required to query image and buffer requirements. m_extensions.push_back(VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME); #ifdef BUILD_DIRECTX_12_BACKEND @@ -165,6 +169,7 @@ class VulkanDevice::VulkanDeviceImpl : public Implement { #ifndef NDEBUG auto availableExtensions = m_adapter.getAvailableDeviceExtensions(); + // Required to set debug names. if (auto match = std::ranges::find_if(availableExtensions, [](const String& extension) { return extension == VK_EXT_DEBUG_MARKER_EXTENSION_NAME; }); match != availableExtensions.end()) m_extensions.push_back(VK_EXT_DEBUG_MARKER_EXTENSION_NAME); #endif @@ -237,17 +242,23 @@ class VulkanDevice::VulkanDeviceImpl : public Implement { }) | std::ranges::to>(); // Allow geometry and tessellation shader stages. - VkPhysicalDeviceFeatures deviceFeatures = { - .geometryShader = true, - .tessellationShader = true, - .samplerAnisotropy = true + VkPhysicalDeviceFeatures2 deviceFeatures = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, + .features = { + .geometryShader = true, + .tessellationShader = true, + .samplerAnisotropy = true + } }; + // Enable synchronization overhaul. VkPhysicalDeviceVulkan13Features deviceFeatures13 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, + .pNext = &deviceFeatures, .synchronization2 = true }; + // Enable various descriptor related features, as well as timelime semaphores and other little QoL improvements. VkPhysicalDeviceVulkan12Features deviceFeatures12 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, .pNext = &deviceFeatures13, @@ -285,14 +296,14 @@ class VulkanDevice::VulkanDeviceImpl : public Implement { }; // Define the device itself. - VkDeviceCreateInfo createInfo = {}; - createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - createInfo.pNext = &extendedDynamicStateFeatures; - createInfo.queueCreateInfoCount = static_cast(queueCreateInfos.size()); - createInfo.pQueueCreateInfos = queueCreateInfos.data(); - createInfo.pEnabledFeatures = &deviceFeatures; - createInfo.enabledExtensionCount = static_cast(requiredExtensions.size()); - createInfo.ppEnabledExtensionNames = requiredExtensions.data(); + VkDeviceCreateInfo createInfo = { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .pNext = &extendedDynamicStateFeatures, + .queueCreateInfoCount = static_cast(queueCreateInfos.size()), + .pQueueCreateInfos = queueCreateInfos.data(), + .enabledExtensionCount = static_cast(requiredExtensions.size()), + .ppEnabledExtensionNames = requiredExtensions.data() + }; // Create the device. // NOTE: This can time-out under very mysterious circumstances, in which case the event log shows a TDR error. Unfortunately, the only way I found From 779a435520cdfafc815a34f3c8e82bcb4090eda5 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Thu, 4 Jan 2024 12:28:39 +0100 Subject: [PATCH 26/38] Print stack trace on error. --- src/Samples/Compute/src/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Samples/Compute/src/main.cpp b/src/Samples/Compute/src/main.cpp index bae82f6a8..7badb86f6 100644 --- a/src/Samples/Compute/src/main.cpp +++ b/src/Samples/Compute/src/main.cpp @@ -156,7 +156,7 @@ int main(const int argc, const char** argv) } catch (const LiteFX::Exception& ex) { - std::cerr << "\033[3;41;37mUnhandled exception: " << ex.what() << "\033[0m" << std::endl; + std::cerr << "\033[3;41;37mUnhandled exception: " << ex.what() << '\n' << "at: " << ex.trace() << "\033[0m" << std::endl; return EXIT_FAILURE; } From 195727c9adba2f0ba6c7b7fe92cd7e6f53c6cb5e Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Thu, 4 Jan 2024 16:21:59 +0100 Subject: [PATCH 27/38] Apply sRGB curve correction on individual vector elements. --- src/Backends/DirectX12/shaders/blit.hlsl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Backends/DirectX12/shaders/blit.hlsl b/src/Backends/DirectX12/shaders/blit.hlsl index 0d6c98d8d..afd4a3c85 100644 --- a/src/Backends/DirectX12/shaders/blit.hlsl +++ b/src/Backends/DirectX12/shaders/blit.hlsl @@ -14,7 +14,11 @@ float3 applySRGB(float3 x) { // See: https://github.com/Microsoft/DirectX-Graphics-Samples/blob/master/MiniEngine/Core/Shaders/GenerateMipsCS.hlsli#L55 //return x < 0.0031308 ? 12.92 * x : 1.055 * pow(abs(x), 1.0 / 2.4) - 0.055; - return x < 0.0031308 ? 12.92 * x : 1.13005 * sqrt(abs(x - 0.00228)) - 0.13448 * x + 0.005719; + return float3( + x.r < 0.0031308 ? 12.92 * x.r : 1.13005 * sqrt(abs(x.r - 0.00228)) - 0.13448 * x.r + 0.005719, + x.g < 0.0031308 ? 12.92 * x.g : 1.13005 * sqrt(abs(x.g - 0.00228)) - 0.13448 * x.g + 0.005719, + x.b < 0.0031308 ? 12.92 * x.b : 1.13005 * sqrt(abs(x.b - 0.00228)) - 0.13448 * x.b + 0.005719 + ); } float4 packColor(float4 color) From 6149a3053a6e6c95d3ebde8978a0c31304ed5df5 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Thu, 4 Jan 2024 17:23:19 +0100 Subject: [PATCH 28/38] Improve swap chain implementation to no longer require binary semaphores. --- .../Vulkan/include/litefx/backends/vulkan.hpp | 40 ----- src/Backends/Vulkan/src/convert.cpp | 4 + src/Backends/Vulkan/src/device.cpp | 4 + src/Backends/Vulkan/src/queue.cpp | 63 +++---- src/Backends/Vulkan/src/render_pass.cpp | 22 +-- src/Backends/Vulkan/src/swapchain.cpp | 155 ++++++++---------- 6 files changed, 103 insertions(+), 185 deletions(-) diff --git a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp index c6ea3688a..5c03a9213 100644 --- a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp +++ b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp @@ -1357,12 +1357,6 @@ namespace LiteFX::Rendering::Backends { // Vulkan Swap Chain interface. public: - /// - /// Returns a reference of the current swap semaphore, a command queue can wait on for presenting. - /// - /// A reference of the current swap semaphore, a command queue can wait on for presenting. - virtual const VkSemaphore& semaphore() const noexcept; - /// /// Returns the query pool for the current frame. /// @@ -1466,40 +1460,6 @@ namespace LiteFX::Rendering::Backends { /// The internal timeline semaphore. virtual const VkSemaphore& timelineSemaphore() const noexcept; - /// - /// Submits a single command buffer and inserts a fence to wait for it. - /// - /// - /// By calling this method, the queue takes shared ownership over the until the fence is passed. The reference will be released - /// during a , if the awaited fence is inserted after the associated one. - /// - /// Note that submitting a command buffer that is currently recording will implicitly close the command buffer. - /// - /// The command buffer to submit to the command queue. - /// The semaphores to wait for on each pipeline stage. There must be a semaphore for each entry in the array. - /// The pipeline stages of the current render pass to wait for before submitting the command buffer. - /// The semaphores to signal, when the command buffer is executed. - /// The value of the fence, inserted after the command buffer. - /// - virtual UInt64 submit(SharedPtr commandBuffer, Span waitForSemaphores, Span waitForStages, Span signalSemaphores = { }) const; - - /// - /// Submits a set of command buffers and inserts a fence to wait for them. - /// - /// - /// By calling this method, the queue takes shared ownership over the until the fence is passed. The reference will be released - /// during a , if the awaited fence is inserted after the associated one. - /// - /// Note that submitting a command buffer that is currently recording will implicitly close the command buffer. - /// - /// The command buffers to submit to the command queue. - /// The semaphores to wait for on each pipeline stage. There must be a semaphore for each entry in the array. - /// The pipeline stages of the current render pass to wait for before submitting the command buffer. - /// The semaphores to signal, when the command buffer is executed. - /// The value of the fence, inserted after the command buffers. - /// - virtual UInt64 submit(const Enumerable>& commandBuffers, Span waitForSemaphores, Span waitForStages, Span signalSemaphores = { }) const; - // CommandQueue interface. public: /// diff --git a/src/Backends/Vulkan/src/convert.cpp b/src/Backends/Vulkan/src/convert.cpp index 2eabd9676..3e2065e85 100644 --- a/src/Backends/Vulkan/src/convert.cpp +++ b/src/Backends/Vulkan/src/convert.cpp @@ -1063,7 +1063,11 @@ constexpr VkImageLayout LITEFX_VULKAN_API LiteFX::Rendering::Backends::Vk::getIm case ImageLayout::RenderTarget: return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; case ImageLayout::DepthRead: return VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; case ImageLayout::DepthWrite: return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; +#ifdef BUILD_DIRECTX_12_BACKEND // Images from interop swap chain must not be transitioned into present state. + case ImageLayout::Present: return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; +#else case ImageLayout::Present: return VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; +#endif case ImageLayout::ResolveSource: return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; case ImageLayout::ResolveDestination: return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; case ImageLayout::Undefined: return VK_IMAGE_LAYOUT_UNDEFINED; diff --git a/src/Backends/Vulkan/src/device.cpp b/src/Backends/Vulkan/src/device.cpp index 48af7282a..83977607a 100644 --- a/src/Backends/Vulkan/src/device.cpp +++ b/src/Backends/Vulkan/src/device.cpp @@ -162,6 +162,10 @@ class VulkanDevice::VulkanDeviceImpl : public Implement { m_extensions.push_back(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME); m_extensions.push_back(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME); m_extensions.push_back(VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME); + + // Required to synchronize Vulkan command execution with D3D presentation. + m_extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME); + //m_extensions.push_back(VK_KHR_EXTERNAL_FENCE_WIN32_EXTENSION_NAME); #else m_extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); #endif // BUILD_DIRECTX_12_BACKEND diff --git a/src/Backends/Vulkan/src/queue.cpp b/src/Backends/Vulkan/src/queue.cpp index 65a3c16b2..2be425839 100644 --- a/src/Backends/Vulkan/src/queue.cpp +++ b/src/Backends/Vulkan/src/queue.cpp @@ -164,15 +164,10 @@ SharedPtr VulkanQueue::createCommandBuffer(bool beginRecord UInt64 VulkanQueue::submit(SharedPtr commandBuffer) const { - return this->submit(commandBuffer, {}, {}, {}); -} - -UInt64 VulkanQueue::submit(SharedPtr commandBuffer, Span waitForSemaphores, Span waitForStages, Span signalSemaphores) const -{ - if (commandBuffer == nullptr) + if (commandBuffer == nullptr) [[unlikely]] throw InvalidArgumentException("commandBuffer", "The command buffer must be initialized."); - if (commandBuffer->isSecondary()) + if (commandBuffer->isSecondary()) [[unlikely]] throw InvalidArgumentException("commandBuffer", "The command buffer must be a primary command buffer."); std::lock_guard lock(m_impl->m_mutex); @@ -189,31 +184,22 @@ UInt64 VulkanQueue::submit(SharedPtr commandBuffer, S commandBuffer->end(); // Create an array of all signal semaphores. - Array semaphoresToSignal(signalSemaphores.size()); - std::ranges::generate(semaphoresToSignal, [&signalSemaphores, i = 0]() mutable { return signalSemaphores[i++]; }); - semaphoresToSignal.insert(semaphoresToSignal.begin(), m_impl->m_timelineSemaphore); + std::array semaphoresToSignal = { m_impl->m_timelineSemaphore }; // Submit the command buffer. auto fence = ++m_impl->m_fenceValue; - Array waitValues(waitForSemaphores.size(), 0); - Array signalValues(semaphoresToSignal.size(), 0); - signalValues[0] = fence; + std::array signalValues = { fence }; - VkTimelineSemaphoreSubmitInfo timelineInfo{ + VkTimelineSemaphoreSubmitInfo timelineInfo { .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .pNext = nullptr, - .waitSemaphoreValueCount = static_cast(waitValues.size()), - .pWaitSemaphoreValues = waitValues.data(), .signalSemaphoreValueCount = static_cast(signalValues.size()), .pSignalSemaphoreValues = signalValues.data() }; - VkSubmitInfo submitInfo{ + VkSubmitInfo submitInfo { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = &timelineInfo, - .waitSemaphoreCount = static_cast(waitForSemaphores.size()), - .pWaitSemaphores = waitForSemaphores.data(), - .pWaitDstStageMask = waitForStages.data(), .commandBufferCount = 1, .pCommandBuffers = &commandBuffer->handle(), .signalSemaphoreCount = static_cast(semaphoresToSignal.size()), @@ -233,15 +219,10 @@ UInt64 VulkanQueue::submit(SharedPtr commandBuffer, S UInt64 VulkanQueue::submit(const Enumerable>& commandBuffers) const { - return this->submit(commandBuffers, {}, {}, {}); -} - -UInt64 VulkanQueue::submit(const Enumerable>& commandBuffers, Span waitForSemaphores, Span waitForStages, Span signalSemaphores) const -{ - if (!std::ranges::all_of(commandBuffers, [](const auto& buffer) { return buffer != nullptr; })) + if (!std::ranges::all_of(commandBuffers, [](const auto& buffer) { return buffer != nullptr; })) [[unlikely]] throw InvalidArgumentException("commandBuffers", "At least one command buffer is not initialized."); - if (!std::ranges::all_of(commandBuffers, [](const auto& buffer) { return !buffer->isSecondary(); })) + if (!std::ranges::all_of(commandBuffers, [](const auto& buffer) { return !buffer->isSecondary(); })) [[unlikely]] throw InvalidArgumentException("commandBuffers", "At least one command buffer is a secondary command buffer, which is not allowed to be submitted to a command queue."); std::lock_guard lock(m_impl->m_mutex); @@ -264,31 +245,22 @@ UInt64 VulkanQueue::submit(const Enumerable }() | std::ranges::to>(); // Create an array of all signal semaphores. - Array semaphoresToSignal(signalSemaphores.size()); - std::ranges::generate(semaphoresToSignal, [&signalSemaphores, i = 0]() mutable { return signalSemaphores[i++]; }); - semaphoresToSignal.insert(semaphoresToSignal.begin(), m_impl->m_timelineSemaphore); + std::array semaphoresToSignal = { m_impl->m_timelineSemaphore }; // Submit the command buffer. auto fence = ++m_impl->m_fenceValue; - Array waitValues(waitForSemaphores.size(), 0); - Array signalValues(semaphoresToSignal.size(), 0); - signalValues[0] = fence; + std::array signalValues = { fence }; - VkTimelineSemaphoreSubmitInfo timelineInfo{ + VkTimelineSemaphoreSubmitInfo timelineInfo { .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .pNext = nullptr, - .waitSemaphoreValueCount = static_cast(waitValues.size()), - .pWaitSemaphoreValues = waitValues.data(), .signalSemaphoreValueCount = static_cast(signalValues.size()), .pSignalSemaphoreValues = signalValues.data() }; - VkSubmitInfo submitInfo{ + VkSubmitInfo submitInfo { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = &timelineInfo, - .waitSemaphoreCount = static_cast(waitForSemaphores.size()), - .pWaitSemaphores = waitForSemaphores.data(), - .pWaitDstStageMask = waitForStages.data(), .commandBufferCount = static_cast(handles.size()), .pCommandBuffers = handles.data(), .signalSemaphoreCount = static_cast(semaphoresToSignal.size()), @@ -309,7 +281,7 @@ UInt64 VulkanQueue::submit(const Enumerable void VulkanQueue::waitFor(UInt64 fence) const noexcept { UInt64 completedValue{ 0 }; - //raiseIfFailed(::vkGetSemaphoreCounterValue(this->getDevice()->handle(), m_impl->m_timelineSemaphore, &completedValue), "Unable to query current queue timeline semaphore value."); + //raiseIfFailed(::vkGetSemaphoreCounterValue(m_impl->m_device.handle(), m_impl->m_timelineSemaphore, &completedValue), "Unable to query current queue timeline semaphore value."); ::vkGetSemaphoreCounterValue(m_impl->m_device.handle(), m_impl->m_timelineSemaphore, &completedValue); if (completedValue < fence) @@ -321,7 +293,7 @@ void VulkanQueue::waitFor(UInt64 fence) const noexcept .pValues = &fence }; - //raiseIfFailed(::vkWaitSemaphores(this->getDevice()->handle(), &waitInfo, std::numeric_limits::max()), "Unable to wait for queue timeline semaphore."); + //raiseIfFailed(::vkWaitSemaphores(m_impl->m_device.handle(), &waitInfo, std::numeric_limits::max()), "Unable to wait for queue timeline semaphore."); ::vkWaitSemaphores(m_impl->m_device.handle(), &waitInfo, std::numeric_limits::max()); } @@ -330,17 +302,20 @@ void VulkanQueue::waitFor(UInt64 fence) const noexcept void VulkanQueue::waitFor(const VulkanQueue& queue, UInt64 fence) const noexcept { + static const std::array waitStages = { VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT }; + VkTimelineSemaphoreSubmitInfo timelineInfo { .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .waitSemaphoreValueCount = 1, .pWaitSemaphoreValues = &fence }; - VkSubmitInfo submitInfo{ + VkSubmitInfo submitInfo { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = &timelineInfo, .waitSemaphoreCount = 1, - .pWaitSemaphores = &queue.m_impl->m_timelineSemaphore + .pWaitSemaphores = &queue.m_impl->m_timelineSemaphore, + .pWaitDstStageMask = waitStages.data() }; ::vkQueueSubmit(this->handle(), 1, &submitInfo, VK_NULL_HANDLE); diff --git a/src/Backends/Vulkan/src/render_pass.cpp b/src/Backends/Vulkan/src/render_pass.cpp index b286c9727..42fa5e4d6 100644 --- a/src/Backends/Vulkan/src/render_pass.cpp +++ b/src/Backends/Vulkan/src/render_pass.cpp @@ -181,7 +181,11 @@ class VulkanRenderPass::VulkanRenderPassImpl : public ImplementstencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; presentResolveAttachment->stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; presentResolveAttachment->initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +#ifdef BUILD_DIRECTX_12_BACKEND + presentResolveAttachment->finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; +#else presentResolveAttachment->finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; +#endif } presentTarget = VkAttachmentReference { static_cast(currentIndex + inputAttachments.size()), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL }; @@ -468,19 +476,11 @@ UInt64 VulkanRenderPass::end() const ::vkCmdEndRenderPass(std::as_const(*commandBuffer).handle()); // Submit the command buffer. - if (!this->hasPresentTarget()) - frameBuffer->lastFence() = m_impl->m_queue->submit(commandBuffer); - else - { - // Draw the frame, if the result of the render pass it should be presented to the swap chain. - std::array waitForSemaphores = { m_impl->m_device.swapChain().semaphore() }; - std::array waitForStages = { VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT }; - std::array signalSemaphores = { frameBuffer->semaphore() }; - frameBuffer->lastFence() = m_impl->m_queue->submit(commandBuffer, waitForSemaphores, waitForStages, signalSemaphores); + frameBuffer->lastFence() = m_impl->m_queue->submit(commandBuffer); - // Present the swap chain. + // Present the swap chain. + if (this->hasPresentTarget()) m_impl->m_device.swapChain().present(*frameBuffer); - } // Reset the frame buffer. m_impl->m_activeFrameBuffer = nullptr; diff --git a/src/Backends/Vulkan/src/swapchain.cpp b/src/Backends/Vulkan/src/swapchain.cpp index d63cc4845..4f7c650c5 100644 --- a/src/Backends/Vulkan/src/swapchain.cpp +++ b/src/Backends/Vulkan/src/swapchain.cpp @@ -17,11 +17,11 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { Size2d m_renderArea { }; Format m_format { Format::None }; UInt32 m_buffers { }; - Array m_swapSemaphores { }; UInt32 m_currentImage { }; Array> m_presentImages { }; const VulkanDevice& m_device; VkSwapchainKHR m_handle = VK_NULL_HANDLE; + VkFence m_waitForImage = VK_NULL_HANDLE; Array> m_timingEvents; Array m_timestamps; @@ -110,17 +110,9 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { VkSwapchainKHR swapChain; raiseIfFailed(::vkCreateSwapchainKHR(m_device.handle(), &createInfo, nullptr, &swapChain), "Swap chain could not be created."); - // Create a semaphore for swapping images. - VkSemaphoreCreateInfo semaphoreInfo{}; - semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - - m_swapSemaphores.resize(images); - std::ranges::generate(m_swapSemaphores, [&]() mutable { - VkSemaphore semaphore; - raiseIfFailed(::vkCreateSemaphore(m_device.handle(), &semaphoreInfo, nullptr, &semaphore), "Unable to create swap semaphore."); - - return semaphore; - }); + // Initialize the fence used to wait for image access. + VkFenceCreateInfo fenceInfo { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO }; + raiseIfFailed(::vkCreateFence(m_device.handle(), &fenceInfo, nullptr, &m_waitForImage), "Unable to create image acquisition fence."); // Create the swap chain images. auto actualRenderArea = Size2d(static_cast(createInfo.imageExtent.width), static_cast(createInfo.imageExtent.height)); @@ -186,11 +178,10 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { // Destroy the swap chain itself. ::vkDestroySwapchainKHR(m_device.handle(), m_handle, nullptr); - // Destroy the image swap semaphores. - std::ranges::for_each(m_swapSemaphores, [&](const auto& semaphore) { ::vkDestroySemaphore(m_device.handle(), semaphore, nullptr); }); + // Destroy the fence used to wait for image acquisition. + ::vkDestroyFence(m_device.handle(), m_waitForImage, nullptr); // Destroy state. - m_swapSemaphores.clear(); m_buffers = 0; m_renderArea = {}; m_format = Format::None; @@ -199,14 +190,16 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { UInt32 swapBackBuffer() { - // NOTE: m_currentImage may overflow, but this is okay. If we use nextImage to index semaphores, however, this may result in errors, since the image order is not necessarily preserved. + // Queue an image acquisition request, then wait for the fence and reset it for the next iteration. Note how this is similar to the DirectX behavior, where the swap call blocks until the + // image is acquired and ready. UInt32 nextImage; - m_currentImage++; - raiseIfFailed(::vkAcquireNextImageKHR(m_device.handle(), m_handle, UINT64_MAX, this->currentSemaphore(), VK_NULL_HANDLE, &nextImage), "Unable to swap front buffer."); + raiseIfFailed(::vkAcquireNextImageKHR(m_device.handle(), m_handle, UINT64_MAX, VK_NULL_HANDLE, m_waitForImage, &nextImage), "Unable to swap front buffer. Make sure that all previously acquired images are actually presented before acquiring another image."); + raiseIfFailed(::vkWaitForFences(m_device.handle(), 1, &m_waitForImage, VK_TRUE, UINT64_MAX), "Unable to wait for image acquisition."); + raiseIfFailed(::vkResetFences(m_device.handle(), 1, &m_waitForImage), "Unable to reset image acquisition fence."); // Query the timing events. // TODO: In rare situations, and only when using this swap chain implementation, the validation layers will complain about query pools not being reseted, when writing time stamps. I could - // not find out why and when this happens, but I maybe waiting explicitly on the last frame's fence (for the respective image) will fix the issue. + // not find out why and when this happens, but maybe waiting explicitly on the last frame's fence (for the respective image) will fix the issue. if (m_supportsTiming && !m_timingEvents.empty()) [[likely]] { m_currentQueryPool = m_timingQueryPools[nextImage]; @@ -226,14 +219,10 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { { // Draw the frame, if the result of the render pass it should be presented to the swap chain. std::array swapChains = { m_handle }; - std::array signalSemaphores = { frameBuffer.semaphore() }; const auto bufferIndex = frameBuffer.bufferIndex(); - VkPresentInfoKHR presentInfo { + VkPresentInfoKHR presentInfo = { .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, - .pNext = nullptr, - .waitSemaphoreCount = static_cast(signalSemaphores.size()), - .pWaitSemaphores = signalSemaphores.data(), .swapchainCount = static_cast(swapChains.size()), .pSwapchains = swapChains.data(), .pImageIndices = &bufferIndex, @@ -243,11 +232,6 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { raiseIfFailed(::vkQueuePresentKHR(m_device.defaultQueue(QueueType::Graphics).handle(), &presentInfo), "Unable to present swap chain."); } - const VkSemaphore& currentSemaphore() - { - return m_swapSemaphores[m_currentImage % m_buffers]; - } - const VkQueryPool& currentTimestampQueryPool() { return m_currentQueryPool; @@ -282,6 +266,15 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { #else #include +#if VK_HEADER_VERSION < 268 +// Warn about this bug: https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/5295. +#if __cplusplus >= 202302L +# warning "Vulkan SDK version is below 1.3.268.0, which will cause false validation errors about invalid command buffer resets. This bug has been fixed in later versions." +#else +# pragma message("Note: Vulkan SDK version is below 1.3.268.0, which will cause false validation errors about invalid command buffer resets. This bug has been fixed in later versions.") +#endif +#endif + namespace D3D { /// @@ -331,21 +324,26 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { Size2d m_renderArea{ }; Format m_format{ Format::None }; UInt32 m_buffers{ }; - Array m_swapSemaphores{ }; UInt32 m_currentImage{ }; Array> m_presentImages{ }; Array m_imageResources; + Array m_presentFences; + //Array m_presentFences; const VulkanDevice& m_device; ComPtr m_d3dDevice; ComPtr m_swapChain; ComPtr m_presentQueue; bool m_supportsTearing = false; + ComPtr m_fence = nullptr; + HANDLE m_fenceHandle; Array> m_timingEvents; Array m_timestamps; Array m_timingQueryPools; bool m_supportsTiming = false; + PFN_vkImportSemaphoreWin32HandleKHR importSemaphoreWin32HandleKHR = nullptr; + public: VulkanSwapChainImpl(VulkanSwapChain* parent, const VulkanDevice& device) : base(parent), m_device(device) @@ -354,6 +352,8 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { if (!m_supportsTiming) LITEFX_WARNING(VULKAN_LOG, "Timestamp queries are not supported and will be disabled. Reading timestamps will always return 0."); + + importSemaphoreWin32HandleKHR = reinterpret_cast(::vkGetDeviceProcAddr(device.handle(), "vkImportSemaphoreWin32HandleKHR")); } ~VulkanSwapChainImpl() @@ -432,8 +432,8 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { { infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE); infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE); - infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE); - //infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_INFO, TRUE); + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, FALSE); + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_INFO, FALSE); // Suppress individual messages by their ID D3D12_MESSAGE_ID suppressIds[] = { D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE }; @@ -452,7 +452,7 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { // Create a command queue. D3D12_COMMAND_QUEUE_DESC presentQueueDesc { .Type = D3D12_COMMAND_LIST_TYPE_DIRECT, - .Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, + .Priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH, .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE, //.NodeMask = m_d3dDevice->GetNodeCount() <= 1 ? 0 : pdidProps.deviceNodeMask }; @@ -467,12 +467,12 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { .Format = DX12::getFormat(selectedFormat), .Stereo = FALSE, .SampleDesc = { 1, 0 }, - .BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT, + .BufferUsage = DXGI_USAGE_BACK_BUFFER, // DXGI_USAGE_RENDER_TARGET_OUTPUT .BufferCount = images, .Scaling = DXGI_SCALING_STRETCH, .SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD, .AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED, - .Flags = (m_supportsTearing = static_cast(tearingSupport)) ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : (UInt32)0, + .Flags = (m_supportsTearing = static_cast(tearingSupport)) ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : (UInt32)0 }; ComPtr swapChain; @@ -486,6 +486,23 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { // Disable Alt+Enter shortcut for fullscreen-toggle. if (FAILED(factory->MakeWindowAssociation(hwnd, DXGI_MWA_NO_ALT_ENTER))) [[unlikely]] LITEFX_WARNING(VULKAN_LOG, "Unable disable keyboard control sequence for full-screen switching."); + + // Initialize the present fences array. + m_presentFences.resize(images, 0ul); + + // Create a fence for synchronization. + D3D::raiseIfFailed(m_d3dDevice->CreateFence(0, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&m_fence)), "Unable to create interop synchronization fence for swap chain."); + D3D::raiseIfFailed(m_d3dDevice->CreateSharedHandle(m_fence.Get(), nullptr, GENERIC_ALL, L"", &m_fenceHandle), "Unable to create shared handle for swap chain interop synchronization fence."); + + // Import the fence handle to signal it from Vulkan workloads. + VkImportSemaphoreWin32HandleInfoKHR fenceImportInfo = { + .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR, + .semaphore = m_device.defaultQueue(QueueType::Graphics).timelineSemaphore(), + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE_BIT, + .handle = m_fenceHandle + }; + + raiseIfFailed(importSemaphoreWin32HandleKHR(m_device.handle(), &fenceImportInfo), "Unable to import interop synchronization fence for swap chain."); } void reset(Format format, const Size2d& renderArea, UInt32 buffers) @@ -535,6 +552,9 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { // Initialize the query pools. if (m_timingQueryPools.size() != images) this->resetQueryPools(m_timingEvents); + + // Reset the present fences array. + m_presentFences.resize(images, 0ul); } void createImages(Format format, const Size2d& renderArea, UInt32 buffers) @@ -625,21 +645,6 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { return makeUnique(m_device, backBuffer, Size3d { imageInfo.extent.width, imageInfo.extent.height, imageInfo.extent.depth }, format, ImageDimensions::DIM_2, 1, 1, MultiSamplingLevel::x1, false, ImageLayout::Present); }); - // Destroy the image swap semaphores. - std::ranges::for_each(m_swapSemaphores, [&](const auto& semaphore) { ::vkDestroySemaphore(m_device.handle(), semaphore, nullptr); }); - - // Create a semaphore for swapping images. - VkSemaphoreCreateInfo semaphoreInfo{}; - semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - - m_swapSemaphores.resize(buffers); - std::ranges::generate(m_swapSemaphores, [&]() mutable { - VkSemaphore semaphore; - raiseIfFailed(::vkCreateSemaphore(m_device.handle(), &semaphoreInfo, nullptr, &semaphore), "Unable to create swap semaphore."); - - return semaphore; - }); - // Store state variables. m_renderArea = renderArea; m_format = format; @@ -691,11 +696,9 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { m_swapChain.Reset(); m_d3dDevice.Reset(); - // Destroy the image swap semaphores. - std::ranges::for_each(m_swapSemaphores, [&](const auto& semaphore) { ::vkDestroySemaphore(m_device.handle(), semaphore, nullptr); }); - // Destroy state. - m_swapSemaphores.clear(); + ::CloseHandle(m_fenceHandle); + m_presentFences.clear(); m_buffers = 0; m_renderArea = {}; m_format = Format::None; @@ -707,15 +710,9 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { // Get the current back buffer index. m_currentImage = m_swapChain->GetCurrentBackBufferIndex(); - // We need to manually signal the current semaphore on the graphics queue, to inform it that the swap chain image is ready to be written. - VkSubmitInfo submitInfo = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .signalSemaphoreCount = 1, - .pSignalSemaphores = &this->currentSemaphore() - }; - - raiseIfFailed(::vkQueueSubmit(m_device.defaultQueue(QueueType::Graphics).handle(), 1, &submitInfo, VK_NULL_HANDLE), "Unable to submit the present queue signal."); - + // Wait for all workloads on this image to finish in order to be able to re-use the associated command buffers. + m_device.defaultQueue(QueueType::Graphics).waitFor(m_presentFences[m_currentImage]); + // Query the timing events. if (m_supportsTiming && !m_timingEvents.empty()) [[likely]] { @@ -734,30 +731,13 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { void present(const VulkanFrameBuffer& frameBuffer) { - // We need to manually signal the current semaphore on the graphics queue, to inform it, that the swap chain is ready. - Array waitForStages = { VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT }; - - VkSubmitInfo submitInfo = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .waitSemaphoreCount = 1, - .pWaitSemaphores = &frameBuffer.semaphore(), - .pWaitDstStageMask = waitForStages.data() - }; - - // Wait for the frame buffer semaphore, as well as for the rendering fence to complete. - auto& queue = m_device.defaultQueue(QueueType::Graphics); - raiseIfFailed(::vkQueueSubmit(queue.handle(), 1, &submitInfo, VK_NULL_HANDLE), "Unable to submit the present queue signal."); - - // Present needs to happen on UI thread, so we cannot do this asynchronously. - queue.waitFor(frameBuffer.lastFence()); + // Wait for all commands to finish on the default graphics queue. We assume that this is the last queue that receives (synchronized) workloads, as it is expected to + // handle presentation by convention. + // TODO: In some rare situations, there are visible scanlines that look like the wait does not actually wait for all writes on the back buffer, but I am not sure how to debug this properly. + m_presentQueue->Wait(m_fence.Get(), m_presentFences[m_currentImage] = frameBuffer.lastFence()); D3D::raiseIfFailed(m_swapChain->Present(0, m_supportsTearing ? DXGI_PRESENT_ALLOW_TEARING : 0), "Unable to queue present event on swap chain."); } - - const VkSemaphore& currentSemaphore() - { - return m_swapSemaphores[m_currentImage]; - } - + const VkQueryPool& currentTimestampQueryPool() { return m_timingQueryPools[m_currentImage]; @@ -836,11 +816,6 @@ VulkanSwapChain::VulkanSwapChain(const VulkanDevice& device, Format surfaceForma VulkanSwapChain::~VulkanSwapChain() noexcept = default; -const VkSemaphore& VulkanSwapChain::semaphore() const noexcept -{ - return m_impl->currentSemaphore(); -} - const VkQueryPool& VulkanSwapChain::timestampQueryPool() const noexcept { return m_impl->currentTimestampQueryPool(); From feb31ff37710dbf307c1ab2470d25083b9622010 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Thu, 4 Jan 2024 17:23:40 +0100 Subject: [PATCH 29/38] Update Vulkan SDK version on CI builds to prevent false positive validation errors. --- .github/workflows/ci.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/weekly.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 84ad954a2..dad553584 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ on: - ready_for_review env: - vulkanSdkVersion: '1.3.239.0' # Lowest version that ships with DXC is 1.2.148.0 + vulkanSdkVersion: '1.3.268.0' jobs: job: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9cf810457..706db54ce 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,7 +18,7 @@ on: required: true env: - vulkanSdkVersion: '1.3.239.0' # Lowest version that ships with DXC is 1.2.148.0 + vulkanSdkVersion: '1.3.268.0' jobs: build: diff --git a/.github/workflows/weekly.yml b/.github/workflows/weekly.yml index fd3cc984e..5aad20247 100644 --- a/.github/workflows/weekly.yml +++ b/.github/workflows/weekly.yml @@ -13,7 +13,7 @@ on: - cron: '1 0 * * 1' # Run every monday night at 00:01 AM (UTC). env: - vulkanSdkVersion: '1.3.239.0' # Lowest version that ships with DXC is 1.2.148.0 + vulkanSdkVersion: '1.3.268.0' jobs: job: From 75d338aadd572f7d2cc6c09788ad6bfe6c338c1e Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Thu, 4 Jan 2024 17:24:16 +0100 Subject: [PATCH 30/38] Adjust breakpoint behavior. --- src/Backends/DirectX12/src/device.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Backends/DirectX12/src/device.cpp b/src/Backends/DirectX12/src/device.cpp index 975926391..a637de443 100644 --- a/src/Backends/DirectX12/src/device.cpp +++ b/src/Backends/DirectX12/src/device.cpp @@ -121,8 +121,8 @@ class DirectX12Device::DirectX12DeviceImpl : public Implement { { infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE); infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE); - infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE); - //infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_INFO, TRUE); + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, FALSE); + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_INFO, FALSE); // Suppress individual messages by their ID D3D12_MESSAGE_ID suppressIds[] = { From ba77bdeaaeb6fe4b30380a3da3adceced57126f1 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Thu, 4 Jan 2024 17:24:38 +0100 Subject: [PATCH 31/38] Clarify presentation logic. --- src/Backends/DirectX12/src/swapchain.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Backends/DirectX12/src/swapchain.cpp b/src/Backends/DirectX12/src/swapchain.cpp index 20440bc51..0219ddba7 100644 --- a/src/Backends/DirectX12/src/swapchain.cpp +++ b/src/Backends/DirectX12/src/swapchain.cpp @@ -168,7 +168,10 @@ class DirectX12SwapChain::DirectX12SwapChainImpl : public Implementhandle()->GetCurrentBackBufferIndex(); + + // Wait for all rendering commands to finish on the image index (otherwise we would not be able to re-use the command buffers). m_device.defaultQueue(QueueType::Graphics).waitFor(m_presentFences[m_currentImage]); // Read back the timestamps. @@ -276,9 +279,8 @@ Enumerable DirectX12SwapChain::images() const noexcept void DirectX12SwapChain::present(const DirectX12FrameBuffer& frameBuffer) const { - // NOTE: Present is similar to issuing a command on the graphics queue, so there is no need to wait for the fence here. However, - // we must wait for the fence before handing out the back-buffer to a new frame again, so we queue up the fence to be able - // to wait for it later. + // Store the last fence here that marks the end of the rendering to this frame buffer. Presenting is queued after rendering anyway, but when swapping the back buffers buffers, + // we need to wait for all commands to finish before being able to re-use the command buffers associated with queued commands. m_impl->m_presentFences[m_impl->m_currentImage] = frameBuffer.lastFence(); raiseIfFailed(this->handle()->Present(0, this->supportsVariableRefreshRate() ? DXGI_PRESENT_ALLOW_TEARING : 0), "Unable to present swap chain"); } From f1a06632afbd0df867dea7c19103bc665a7174c8 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Thu, 4 Jan 2024 17:25:06 +0100 Subject: [PATCH 32/38] Add initial explicit barrier for texture. --- src/Samples/Textures/src/sample.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Samples/Textures/src/sample.cpp b/src/Samples/Textures/src/sample.cpp index 1644a589e..1f929dc52 100644 --- a/src/Samples/Textures/src/sample.cpp +++ b/src/Samples/Textures/src/sample.cpp @@ -120,13 +120,18 @@ void loadTexture(TDevice& device, UniquePtr& texture, UniquePtr barrier = device.buildBarrier() + .waitFor(PipelineStage::None).toContinueWith(PipelineStage::Transfer) + .blockAccessTo(*texture, ResourceAccess::TransferWrite).transitionLayout(ImageLayout::CopyDestination).whenFinishedWith(ResourceAccess::None); + + commandBuffer->barrier(*barrier); commandBuffer->transfer(asShared(std::move(stagedTexture)), *texture); // Generate the rest of the mip maps. commandBuffer->generateMipMaps(*texture); // Create a barrier to ensure the texture is readable. - UniquePtr barrier = device.buildBarrier() + barrier = device.buildBarrier() .waitFor(PipelineStage::None).toContinueWith(PipelineStage::Fragment) .blockAccessTo(*texture, ResourceAccess::ShaderRead).transitionLayout(ImageLayout::ShaderResource).whenFinishedWith(ResourceAccess::None); From be1c2d01774148bc65e1b4ab3aae2e949c15eb5c Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Fri, 5 Jan 2024 12:28:26 +0100 Subject: [PATCH 33/38] Use dedicated resources for interop swap-chain. --- src/Backends/Vulkan/src/swapchain.cpp | 139 ++++++++++++++++++++++++-- 1 file changed, 128 insertions(+), 11 deletions(-) diff --git a/src/Backends/Vulkan/src/swapchain.cpp b/src/Backends/Vulkan/src/swapchain.cpp index 4f7c650c5..30ab14b86 100644 --- a/src/Backends/Vulkan/src/swapchain.cpp +++ b/src/Backends/Vulkan/src/swapchain.cpp @@ -328,13 +328,15 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { Array> m_presentImages{ }; Array m_imageResources; Array m_presentFences; - //Array m_presentFences; const VulkanDevice& m_device; - ComPtr m_d3dDevice; + ComPtr m_d3dDevice; ComPtr m_swapChain; ComPtr m_presentQueue; + ComPtr m_workloadFence = nullptr, m_presentationFence = nullptr; + Array> m_presentCommandAllocators; + Array> m_presentCommandLists; + bool m_supportsTearing = false; - ComPtr m_fence = nullptr; HANDLE m_fenceHandle; Array> m_timingEvents; @@ -354,6 +356,9 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { LITEFX_WARNING(VULKAN_LOG, "Timestamp queries are not supported and will be disabled. Reading timestamps will always return 0."); importSemaphoreWin32HandleKHR = reinterpret_cast(::vkGetDeviceProcAddr(device.handle(), "vkImportSemaphoreWin32HandleKHR")); + + if (importSemaphoreWin32HandleKHR == nullptr) [[unlikely]] + throw RuntimeException("Semaphore importing is not available. Check if all required extensions are available."); } ~VulkanSwapChainImpl() @@ -490,9 +495,10 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { // Initialize the present fences array. m_presentFences.resize(images, 0ul); - // Create a fence for synchronization. - D3D::raiseIfFailed(m_d3dDevice->CreateFence(0, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&m_fence)), "Unable to create interop synchronization fence for swap chain."); - D3D::raiseIfFailed(m_d3dDevice->CreateSharedHandle(m_fence.Get(), nullptr, GENERIC_ALL, L"", &m_fenceHandle), "Unable to create shared handle for swap chain interop synchronization fence."); + // Create fences for synchronization. + D3D::raiseIfFailed(m_d3dDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_presentationFence)), "Unable to create presentation synchronization fence for swap chain."); + D3D::raiseIfFailed(m_d3dDevice->CreateFence(0, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&m_workloadFence)), "Unable to create interop synchronization fence for swap chain."); + D3D::raiseIfFailed(m_d3dDevice->CreateSharedHandle(m_workloadFence.Get(), nullptr, GENERIC_ALL, L"", &m_fenceHandle), "Unable to create shared handle for swap chain interop synchronization fence."); // Import the fence handle to signal it from Vulkan workloads. VkImportSemaphoreWin32HandleInfoKHR fenceImportInfo = { @@ -503,6 +509,18 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { }; raiseIfFailed(importSemaphoreWin32HandleKHR(m_device.handle(), &fenceImportInfo), "Unable to import interop synchronization fence for swap chain."); + + // Allocate command lists. + m_presentCommandAllocators.clear(); + m_presentCommandLists.clear(); + m_presentCommandAllocators.resize(images); + m_presentCommandLists.resize(images); + + for (UInt32 i{ 0 }; i < images; ++i) + { + D3D::raiseIfFailed(m_d3dDevice->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_presentCommandAllocators[i])), "Unable to create command allocator for present queue commands."); + D3D::raiseIfFailed(m_d3dDevice->CreateCommandList1(0, D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_LIST_FLAG_NONE, IID_PPV_ARGS(&m_presentCommandLists[i])), "Unable to create command list for present queue commands."); + } } void reset(Format format, const Size2d& renderArea, UInt32 buffers) @@ -555,10 +573,27 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { // Reset the present fences array. m_presentFences.resize(images, 0ul); + + // Resize and re-allocate command lists. + m_presentCommandAllocators.clear(); + m_presentCommandLists.clear(); + m_presentCommandAllocators.resize(images); + m_presentCommandLists.resize(images); + + for (UInt32 i{ 0 }; i < images; ++i) + { + D3D::raiseIfFailed(m_d3dDevice->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_presentCommandAllocators[i])), "Unable to create command allocator for present queue commands."); + D3D::raiseIfFailed(m_d3dDevice->CreateCommandList1(0, D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_LIST_FLAG_NONE, IID_PPV_ARGS(&m_presentCommandLists[i])), "Unable to create command list for present queue commands."); + } } void createImages(Format format, const Size2d& renderArea, UInt32 buffers) - { + { + // NOTE: We maintain two sets of images: the swap chain back buffers and separate image resources that are shared and written to by the Vulkan renderer. During present + // the `m_workloadFence` is waited upon before copying the shared images into the swap chain back buffers. While it is possible to share and write the back buffers + // directly, they are not synchronized (even waiting for the workload fence before presenting is not enough). This causes back buffers to be written whilst they + // presented, resulting in artifacts or flickering. + // Acquire the swap chain images. m_presentImages.resize(buffers); m_imageResources.resize(buffers); @@ -567,7 +602,24 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { ComPtr resource; HANDLE resourceHandle = nullptr; const int image = i++; - D3D::raiseIfFailed(m_swapChain->GetBuffer(image, IID_PPV_ARGS(&resource)), "Unable to acquire image resource from swap chain back buffer {0}.", image); + //D3D::raiseIfFailed(m_swapChain->GetBuffer(image, IID_PPV_ARGS(&resource)), "Unable to acquire image resource from swap chain back buffer {0}.", image); + + // Create a image resource. + D3D12_RESOURCE_DESC imageDesc = { + .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D, + .Width = renderArea.width(), + .Height = static_cast(renderArea.height()), + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = DX12::getFormat(format), + .SampleDesc = { 1, 0 }, + .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + .Flags = D3D12_RESOURCE_FLAG_NONE + }; + + auto heapInfo = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + + D3D::raiseIfFailed(m_d3dDevice->CreateCommittedResource(&heapInfo, D3D12_HEAP_FLAG_SHARED, &imageDesc, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&resource)), "Unable to create image resource to interop back buffer."); D3D::raiseIfFailed(m_d3dDevice->CreateSharedHandle(resource.Get(), nullptr, GENERIC_ALL, nullptr, &resourceHandle), "Unable to create shared handle for interop back buffer."); // Wrap the back buffer images in an vulkan image. @@ -712,7 +764,20 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { // Wait for all workloads on this image to finish in order to be able to re-use the associated command buffers. m_device.defaultQueue(QueueType::Graphics).waitFor(m_presentFences[m_currentImage]); - + + // Wait for the last presentation on the current image to finish, so that we can re-use the command buffers associated with it. + if (m_presentationFence->GetCompletedValue() < m_presentFences[m_currentImage]) + { + HANDLE eventHandle = ::CreateEvent(nullptr, false, false, nullptr); + HRESULT hr = m_presentationFence->SetEventOnCompletion(m_presentFences[m_currentImage], eventHandle); + + if (SUCCEEDED(hr)) + ::WaitForSingleObject(eventHandle, INFINITE); + + ::CloseHandle(eventHandle); + raiseIfFailed(hr, "Unable to register presentation fence completion event."); + } + // Query the timing events. if (m_supportsTiming && !m_timingEvents.empty()) [[likely]] { @@ -733,9 +798,61 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { { // Wait for all commands to finish on the default graphics queue. We assume that this is the last queue that receives (synchronized) workloads, as it is expected to // handle presentation by convention. - // TODO: In some rare situations, there are visible scanlines that look like the wait does not actually wait for all writes on the back buffer, but I am not sure how to debug this properly. - m_presentQueue->Wait(m_fence.Get(), m_presentFences[m_currentImage] = frameBuffer.lastFence()); + m_presentQueue->Wait(m_workloadFence.Get(), m_presentFences[m_currentImage] = frameBuffer.lastFence()); + + // Copy shared images to back buffers. See `createImages` for details on why we do this. + ComPtr resource; + D3D::raiseIfFailed(m_swapChain->GetBuffer(m_currentImage, IID_PPV_ARGS(&resource)), "Unable to acquire image resource from swap chain back buffer {0}.", m_currentImage); + + auto& allocator = m_presentCommandAllocators[m_currentImage]; + auto& commandList = m_presentCommandLists[m_currentImage]; + D3D::raiseIfFailed(allocator->Reset(), "Unable to reset command allocator before presentation."); + D3D::raiseIfFailed(commandList->Reset(allocator.Get(), nullptr), "Unable to reset command list before presentation."); + + // Transition into copy destination state and copy the resource. + D3D12_TEXTURE_BARRIER barrier = { + .SyncBefore = D3D12_BARRIER_SYNC_NONE, + .SyncAfter = D3D12_BARRIER_SYNC_COPY, + .AccessBefore = D3D12_BARRIER_ACCESS_NO_ACCESS, + .AccessAfter = D3D12_BARRIER_ACCESS_COPY_DEST, + .LayoutBefore = D3D12_BARRIER_LAYOUT_UNDEFINED, + .LayoutAfter = D3D12_BARRIER_LAYOUT_COPY_DEST, + .pResource = resource.Get(), + .Subresources = {.NumMipLevels = 1, .NumArraySlices = 1, .NumPlanes = 1 } + }; + + D3D12_BARRIER_GROUP barrierGroup = { + .Type = D3D12_BARRIER_TYPE_TEXTURE, + .NumBarriers = 1, + .pTextureBarriers = &barrier + }; + + commandList->Barrier(1, &barrierGroup); + commandList->CopyResource(resource.Get(), m_imageResources[m_currentImage].image.Get()); + + // Transition into present state and close the command list. + barrier = { + .SyncBefore = D3D12_BARRIER_SYNC_COPY, + .SyncAfter = D3D12_BARRIER_SYNC_NONE, + .AccessBefore = D3D12_BARRIER_ACCESS_COPY_DEST, + .AccessAfter = D3D12_BARRIER_ACCESS_NO_ACCESS, + .LayoutBefore = D3D12_BARRIER_LAYOUT_COPY_DEST, + .LayoutAfter = D3D12_BARRIER_LAYOUT_PRESENT, + .pResource = resource.Get(), + .Subresources = { .NumMipLevels = 1, .NumArraySlices = 1, .NumPlanes = 1 } + }; + + commandList->Barrier(1, &barrierGroup); + + D3D::raiseIfFailed(commandList->Close(), "Unable to close command list for presentation."); + + // Submit the command buffer. + std::array commandBuffers = { commandList.Get() }; + m_presentQueue->ExecuteCommandLists(commandBuffers.size(), commandBuffers.data()); + + // Do the presentation. D3D::raiseIfFailed(m_swapChain->Present(0, m_supportsTearing ? DXGI_PRESENT_ALLOW_TEARING : 0), "Unable to queue present event on swap chain."); + D3D::raiseIfFailed(m_presentQueue->Signal(m_presentationFence.Get(), m_presentFences[m_currentImage]), "Unable to signal presentation fence."); } const VkQueryPool& currentTimestampQueryPool() From 3324d22e783bb17255040f87ea88ab0fd24d91f7 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Fri, 5 Jan 2024 13:09:38 +0100 Subject: [PATCH 34/38] Remove semaphore from frame buffer interface. --- .../Vulkan/include/litefx/backends/vulkan.hpp | 6 ------ src/Backends/Vulkan/src/frame_buffer.cpp | 16 ---------------- 2 files changed, 22 deletions(-) diff --git a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp index 5c03a9213..8dc067572 100644 --- a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp +++ b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp @@ -1098,12 +1098,6 @@ namespace LiteFX::Rendering::Backends { // Vulkan frame buffer interface. public: - /// - /// Returns a reference of the semaphore, that can be used to signal, that the frame buffer is finished. - /// - /// A reference of the semaphore, that can be used to signal, that the frame buffer is finished. - const VkSemaphore& semaphore() const noexcept; - /// Returns a reference to the value of the fence that indicates the last submission drawing into the frame buffer. /// /// diff --git a/src/Backends/Vulkan/src/frame_buffer.cpp b/src/Backends/Vulkan/src/frame_buffer.cpp index 86a10853d..9599b776c 100644 --- a/src/Backends/Vulkan/src/frame_buffer.cpp +++ b/src/Backends/Vulkan/src/frame_buffer.cpp @@ -16,7 +16,6 @@ class VulkanFrameBuffer::VulkanFrameBufferImpl : public Implement m_renderTargetViews; Array> m_commandBuffers; Size2d m_size; - VkSemaphore m_semaphore; UInt32 m_bufferIndex; UInt64 m_lastFence{ 0 }; @@ -26,21 +25,11 @@ class VulkanFrameBuffer::VulkanFrameBufferImpl : public Implementm_renderPass.device().handle(), this->handle(), nullptr); } -const VkSemaphore& VulkanFrameBuffer::semaphore() const noexcept -{ - return m_impl->m_semaphore; -} - UInt64& VulkanFrameBuffer::lastFence() noexcept { return m_impl->m_lastFence; From ff5f4bca74ab65e9b88a6aa2fbc266ca7e5fd030 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Fri, 5 Jan 2024 13:17:59 +0100 Subject: [PATCH 35/38] Allow swap chain presentation without providing a frame buffer. --- .../include/litefx/backends/dx12.hpp | 3 ++ src/Backends/DirectX12/src/swapchain.cpp | 7 ++++- .../Vulkan/include/litefx/backends/vulkan.hpp | 3 ++ src/Backends/Vulkan/src/swapchain.cpp | 30 ++++++++++++++----- .../include/litefx/rendering_api.hpp | 12 ++++++++ 5 files changed, 47 insertions(+), 8 deletions(-) diff --git a/src/Backends/DirectX12/include/litefx/backends/dx12.hpp b/src/Backends/DirectX12/include/litefx/backends/dx12.hpp index 7891a3d73..54ad4251a 100644 --- a/src/Backends/DirectX12/include/litefx/backends/dx12.hpp +++ b/src/Backends/DirectX12/include/litefx/backends/dx12.hpp @@ -1408,6 +1408,9 @@ namespace LiteFX::Rendering::Backends { /// void present(const DirectX12FrameBuffer& frameBuffer) const override; + /// + void present(UInt64 fence) const override; + public: /// Enumerable getSurfaceFormats() const noexcept override; diff --git a/src/Backends/DirectX12/src/swapchain.cpp b/src/Backends/DirectX12/src/swapchain.cpp index 0219ddba7..f4ad06711 100644 --- a/src/Backends/DirectX12/src/swapchain.cpp +++ b/src/Backends/DirectX12/src/swapchain.cpp @@ -278,10 +278,15 @@ Enumerable DirectX12SwapChain::images() const noexcept } void DirectX12SwapChain::present(const DirectX12FrameBuffer& frameBuffer) const +{ + this->present(frameBuffer.lastFence()); +} + +void DirectX12SwapChain::present(UInt64 fence) const { // Store the last fence here that marks the end of the rendering to this frame buffer. Presenting is queued after rendering anyway, but when swapping the back buffers buffers, // we need to wait for all commands to finish before being able to re-use the command buffers associated with queued commands. - m_impl->m_presentFences[m_impl->m_currentImage] = frameBuffer.lastFence(); + m_impl->m_presentFences[m_impl->m_currentImage] = fence; raiseIfFailed(this->handle()->Present(0, this->supportsVariableRefreshRate() ? DXGI_PRESENT_ALLOW_TEARING : 0), "Unable to present swap chain"); } diff --git a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp index 8dc067572..fbda5b474 100644 --- a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp +++ b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp @@ -1389,6 +1389,9 @@ namespace LiteFX::Rendering::Backends { /// void present(const VulkanFrameBuffer& frameBuffer) const override; + /// + void present(UInt64 fence) const override; + public: /// Enumerable getSurfaceFormats() const noexcept override; diff --git a/src/Backends/Vulkan/src/swapchain.cpp b/src/Backends/Vulkan/src/swapchain.cpp index 30ab14b86..7137567fb 100644 --- a/src/Backends/Vulkan/src/swapchain.cpp +++ b/src/Backends/Vulkan/src/swapchain.cpp @@ -192,8 +192,7 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { { // Queue an image acquisition request, then wait for the fence and reset it for the next iteration. Note how this is similar to the DirectX behavior, where the swap call blocks until the // image is acquired and ready. - UInt32 nextImage; - raiseIfFailed(::vkAcquireNextImageKHR(m_device.handle(), m_handle, UINT64_MAX, VK_NULL_HANDLE, m_waitForImage, &nextImage), "Unable to swap front buffer. Make sure that all previously acquired images are actually presented before acquiring another image."); + raiseIfFailed(::vkAcquireNextImageKHR(m_device.handle(), m_handle, UINT64_MAX, VK_NULL_HANDLE, m_waitForImage, &m_currentImage), "Unable to swap front buffer. Make sure that all previously acquired images are actually presented before acquiring another image."); raiseIfFailed(::vkWaitForFences(m_device.handle(), 1, &m_waitForImage, VK_TRUE, UINT64_MAX), "Unable to wait for image acquisition."); raiseIfFailed(::vkResetFences(m_device.handle(), 1, &m_waitForImage), "Unable to reset image acquisition fence."); @@ -202,7 +201,7 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { // not find out why and when this happens, but maybe waiting explicitly on the last frame's fence (for the respective image) will fix the issue. if (m_supportsTiming && !m_timingEvents.empty()) [[likely]] { - m_currentQueryPool = m_timingQueryPools[nextImage]; + m_currentQueryPool = m_timingQueryPools[m_currentImage]; auto result = ::vkGetQueryPoolResults(m_device.handle(), m_currentQueryPool, 0, m_timestamps.size(), m_timestamps.size() * sizeof(UInt64), m_timestamps.data(), sizeof(UInt64), VK_QUERY_RESULT_64_BIT); if (result != VK_NOT_READY) // Initial frames do not yet contain query results. @@ -212,14 +211,21 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { ::vkResetQueryPool(m_device.handle(), m_currentQueryPool, 0, m_timestamps.size()); } - return nextImage; + return m_currentImage; } void present(const VulkanFrameBuffer& frameBuffer) + { + this->present(frameBuffer.lastFence()); + } + + void present(UInt64 fence) { // Draw the frame, if the result of the render pass it should be presented to the swap chain. std::array swapChains = { m_handle }; - const auto bufferIndex = frameBuffer.bufferIndex(); + const auto bufferIndex = m_currentImage; + const auto& queue = m_device.defaultQueue(QueueType::Graphics); + queue.waitFor(fence); VkPresentInfoKHR presentInfo = { .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, @@ -229,7 +235,7 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { .pResults = nullptr }; - raiseIfFailed(::vkQueuePresentKHR(m_device.defaultQueue(QueueType::Graphics).handle(), &presentInfo), "Unable to present swap chain."); + raiseIfFailed(::vkQueuePresentKHR(queue.handle(), &presentInfo), "Unable to present swap chain."); } const VkQueryPool& currentTimestampQueryPool() @@ -795,10 +801,15 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { } void present(const VulkanFrameBuffer& frameBuffer) + { + this->present(frameBuffer.lastFence()); + } + + void present(UInt64 fence) { // Wait for all commands to finish on the default graphics queue. We assume that this is the last queue that receives (synchronized) workloads, as it is expected to // handle presentation by convention. - m_presentQueue->Wait(m_workloadFence.Get(), m_presentFences[m_currentImage] = frameBuffer.lastFence()); + m_presentQueue->Wait(m_workloadFence.Get(), m_presentFences[m_currentImage] = fence); // Copy shared images to back buffers. See `createImages` for details on why we do this. ComPtr resource; @@ -1012,6 +1023,11 @@ void VulkanSwapChain::present(const VulkanFrameBuffer& frameBuffer) const m_impl->present(frameBuffer); } +void VulkanSwapChain::present(UInt64 fence) const +{ + m_impl->present(fence); +} + Enumerable VulkanSwapChain::getSurfaceFormats() const noexcept { return m_impl->getSurfaceFormats(m_impl->m_device.adapter().handle(), m_impl->m_device.surface().handle()); diff --git a/src/Rendering/include/litefx/rendering_api.hpp b/src/Rendering/include/litefx/rendering_api.hpp index 6c0b52d68..941ff977d 100644 --- a/src/Rendering/include/litefx/rendering_api.hpp +++ b/src/Rendering/include/litefx/rendering_api.hpp @@ -5024,6 +5024,18 @@ namespace LiteFX::Rendering { /// The frame buffer for which the present should wait. virtual void present(const IFrameBuffer& frameBuffer) const = 0; + /// + /// Queues a present that gets executed after has been signalled on the default graphics queue. + /// + /// + /// You can use this overload in situations where you do not have an or to render into before presenting. Instead, you typically + /// copy into the swap chain back buffer images directly (). This copy is done in a command buffer that must be submitted to the default graphics queue. The swap + /// chain can then wait for the copy to finish before presenting it. Example scenarios where this is useful are, where you want to write to the back buffer from a compute shader, that + /// does not have an equivalent to render passes. + /// + /// The fence to pass on the default graphics queue after which the present is executed. + virtual void present(UInt64 fence) const = 0; + public: /// /// Invoked, when the swap chain has swapped the back buffers. From 3cf1372ff492033d5de3cb1bb65572748c4b1fc0 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Fri, 5 Jan 2024 13:19:06 +0100 Subject: [PATCH 36/38] Remove empty render pass for presentation. --- src/Samples/Compute/src/sample.cpp | 45 +++++++++++------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/src/Samples/Compute/src/sample.cpp b/src/Samples/Compute/src/sample.cpp index 969b3b7ec..5680a001a 100644 --- a/src/Samples/Compute/src/sample.cpp +++ b/src/Samples/Compute/src/sample.cpp @@ -94,13 +94,8 @@ void initRenderGraph(TRenderBackend* backend, SharedPtr& inputA .layout(postProgram->reflectPipelineLayout()) .shaderProgram(postProgram); - // Build a present render pass. - UniquePtr presentPass = device->buildRenderPass("Present") - .renderTarget("Present Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, RenderTargetFlags::None, { 0.0f, 0.0f, 0.0f, 1.f }); - // Add the resources to the device state. device->state().add(std::move(renderPass)); - device->state().add(std::move(presentPass)); device->state().add(std::move(renderPipeline)); device->state().add(std::move(postPipeline)); } @@ -274,13 +269,12 @@ void SampleApp::onResize(const void* sender, ResizeEventArgs e) auto surfaceFormat = m_device->swapChain().surfaceFormat(); auto renderArea = Size2d(e.width(), e.height()); m_device->swapChain().reset(surfaceFormat, renderArea, 3); - + // NOTE: Important to do this in order, since dependencies (i.e. input attachments) are re-created and might be mapped to images that do no longer exist when a dependency // gets re-created. This is hard to detect, since some frame buffers can have a constant size, that does not change with the render area and do not need to be // re-created. We should either think of a clever implicit dependency management for this, or at least document this behavior! m_device->state().renderPass("Opaque").resizeFrameBuffers(renderArea); - m_device->state().renderPass("Present").resizeFrameBuffers(renderArea); - + // Update the post-processing bindings that reference the "opaque" frame buffer. auto opaqueFrameBuffers = m_device->state().renderPass("Opaque").frameBuffers(); @@ -396,7 +390,6 @@ void SampleApp::drawFrame() // Query state. For performance reasons, those state variables should be cached for more complex applications, instead of looking them up every frame. auto& renderPass = m_device->state().renderPass("Opaque"); - auto& presentPass = m_device->state().renderPass("Present"); auto& postPipeline = m_device->state().pipeline("Post"); auto& geometryPipeline = m_device->state().pipeline("Geometry"); auto& transformBuffer = m_device->state().buffer("Transform"); @@ -436,17 +429,16 @@ void SampleApp::drawFrame() commandBuffer->bind(vertexBuffer); commandBuffer->bind(indexBuffer); - // Draw the object and present the frame by ending the render pass. + // Draw the object and end the render pass. commandBuffer->drawIndexed(indexBuffer.elements()); geometryFence = renderPass.end(); } // Perform post processing on compute queue. - UInt64 postProcessFence = 0; - { // Create a command buffer. - auto commandBuffer = m_device->defaultQueue(QueueType::Compute).createCommandBuffer(true); + auto& computeQueue = m_device->defaultQueue(QueueType::Compute); + auto commandBuffer = computeQueue.createCommandBuffer(true); commandBuffer->use(postPipeline); // Get the image from the back buffer of the geometry pass. @@ -473,35 +465,30 @@ void SampleApp::drawFrame() // Submit the command buffer. m_device->defaultQueue(QueueType::Compute).waitFor(renderPass.commandQueue(), geometryFence); - postProcessFence = commandBuffer->submit(); - } + auto postProcessFence = computeQueue.submit(commandBuffer); - // Execute present pass. - { // Copy the post-processed image into the render target. - // NOTE: This implicitly transitions the image into `CopyDestination` layout. - auto& queue = presentPass.commandQueue(); - auto commandBuffer = queue.createCommandBuffer(true); + auto& graphicsQueue = m_device->defaultQueue(QueueType::Graphics); + commandBuffer = graphicsQueue.createCommandBuffer(true); // Transition the image back into `CopyDestination` layout. - auto barrier = m_device->makeBarrier(PipelineStage::None, PipelineStage::Transfer); - barrier->transition(presentPass.frameBuffer(backBuffer).image(0), ResourceAccess::None, ResourceAccess::TransferWrite, ImageLayout::Undefined, ImageLayout::CopyDestination); + barrier = m_device->makeBarrier(PipelineStage::None, PipelineStage::Transfer); + barrier->transition(*m_device->swapChain().image(backBuffer), ResourceAccess::None, ResourceAccess::TransferWrite, ImageLayout::Undefined, ImageLayout::CopyDestination); commandBuffer->barrier(*barrier); // Copy the image. - commandBuffer->transfer(renderPass.frameBuffer(backBuffer).image(0), presentPass.frameBuffer(backBuffer).image(0)); + commandBuffer->transfer(renderPass.frameBuffer(backBuffer).image(0), *m_device->swapChain().image(backBuffer)); // Transition the image back into `Present` layout. barrier = m_device->makeBarrier(PipelineStage::Transfer, PipelineStage::Resolve); - barrier->transition(presentPass.frameBuffer(backBuffer).image(0), ResourceAccess::TransferWrite, ResourceAccess::Common, ImageLayout::CopyDestination, ImageLayout::Present); + barrier->transition(*m_device->swapChain().image(backBuffer), ResourceAccess::TransferWrite, ResourceAccess::Common, ImageLayout::CopyDestination, ImageLayout::Present); commandBuffer->barrier(*barrier); // Wait for the compute queue to finish before performing the transfer. - queue.waitFor(m_device->defaultQueue(QueueType::Compute), postProcessFence); - queue.submit(commandBuffer); + graphicsQueue.waitFor(m_device->defaultQueue(QueueType::Compute), postProcessFence); + auto fence = graphicsQueue.submit(commandBuffer); - // Begin and immediately end the present pass (that does not do any actual work except presenting on end). - presentPass.begin(backBuffer); - presentPass.end(); + // Present after the transfer is finished. + m_device->swapChain().present(fence); } } \ No newline at end of file From 49e93e74c350da7e8900a33fb024aacb648ce04d Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Fri, 5 Jan 2024 13:50:48 +0100 Subject: [PATCH 37/38] Document changes from multi-queue example PR. --- docs/release-logs/0.4.1.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/release-logs/0.4.1.md b/docs/release-logs/0.4.1.md index 01755f5c3..848765e05 100644 --- a/docs/release-logs/0.4.1.md +++ b/docs/release-logs/0.4.1.md @@ -20,13 +20,17 @@ - Command buffers can now be submitted with shared ownership to a command queue, which then stores them and releases the references, if the submit fence is passed (during `waitFor`). - Command buffer transfers can now receive resources with shared ownership. Resource references are released in a similar fashion. - To share ownership, the `asShared` function can be used. -- Allow manual command queue allocation for advanced parallel workloads. ([See PR #112](https://github.com/crud89/LiteFX/pull/112)) +- Allow manual command queue allocation for advanced parallel workloads. ([See PR #112](https://github.com/crud89/LiteFX/pull/112) and [PR #114](https://github.com/crud89/LiteFX/pull/114)) + - New "Compute" example demonstrates how to use and synchronize different graphics and compute queues. - Make most of the render pipeline state dynamic (viewports, scissors, ...). ([See PR #86](https://github.com/crud89/LiteFX/pull/86)) - Vector conversion to math types can now be done for constant vectors. ([See PR #87](https://github.com/crud89/LiteFX/pull/87)) - Backend types now import contra-variant interface functions instead of hiding them. ([See PR #91](https://github.com/crud89/LiteFX/pull/91)) - Add support for GPU time measurements (*Timing Events*). ([See PR #94](https://github.com/crud89/LiteFX/pull/94)) - Add builder interface for barriers and extent barrier flexibility. ([See PR #97](https://github.com/crud89/LiteFX/pull/97)) - Add support for static secondary command buffers aka. bundles. ([See PR #100](https://github.com/crud89/LiteFX/pull/100)) +- Render targets are now created with a set of flags instead of individual boolean switches. ([See PR #114](https://github.com/crud89/LiteFX/pull/114)) + - This also enables for more use cases, like using render targets in read-write bindings or sharing between different queues. +- Swap chains can now accept `present` calls without explicitly providing a frame buffer. ([See PR #114](https://github.com/crud89/LiteFX/pull/114)) **🌋 Vulkan:** @@ -36,6 +40,8 @@ - Command buffers no longer share a command pool, improving multi-threading behavior. ([See PR #112](https://github.com/crud89/LiteFX/pull/112)) - Queue allocation has also been reworked so that a queue from the most specialized queue family for a provided `QueueType` is returned. - Empty descriptor sets are now allowed and may be automatically created to fill gaps in descriptor set space indices. ([See PR#110](https://github.com/crud89/LiteFX/pull/110)) +- Swap chain presentation uses fences instead of convoluted binary semaphores for synchronization. ([See PR #114](https://github.com/crud89/LiteFX/pull/114)) + - Furthermore, the D3D interop version of the swap chain has been reworked to support proper frames in flight (as opposed to do a full CPU-wait before presenting). **❎ DirectX 12:** From a6f8f798b6c3ebc20cacfaccd941d63c5c1115b7 Mon Sep 17 00:00:00 2001 From: Carsten Rudolph <18394207+crud89@users.noreply.github.com> Date: Fri, 5 Jan 2024 14:04:14 +0100 Subject: [PATCH 38/38] Add debug groups. --- src/Samples/Compute/src/sample.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Samples/Compute/src/sample.cpp b/src/Samples/Compute/src/sample.cpp index 5680a001a..6d2d36412 100644 --- a/src/Samples/Compute/src/sample.cpp +++ b/src/Samples/Compute/src/sample.cpp @@ -438,6 +438,7 @@ void SampleApp::drawFrame() { // Create a command buffer. auto& computeQueue = m_device->defaultQueue(QueueType::Compute); + computeQueue.beginDebugRegion("Post-Processing"); auto commandBuffer = computeQueue.createCommandBuffer(true); commandBuffer->use(postPipeline); @@ -466,9 +467,11 @@ void SampleApp::drawFrame() // Submit the command buffer. m_device->defaultQueue(QueueType::Compute).waitFor(renderPass.commandQueue(), geometryFence); auto postProcessFence = computeQueue.submit(commandBuffer); + computeQueue.endDebugRegion(); // Copy the post-processed image into the render target. auto& graphicsQueue = m_device->defaultQueue(QueueType::Graphics); + graphicsQueue.beginDebugRegion("Presentation"); commandBuffer = graphicsQueue.createCommandBuffer(true); // Transition the image back into `CopyDestination` layout. @@ -489,6 +492,7 @@ void SampleApp::drawFrame() auto fence = graphicsQueue.submit(commandBuffer); // Present after the transfer is finished. + graphicsQueue.endDebugRegion(); m_device->swapChain().present(fence); } } \ No newline at end of file