gpu: Defer GPL instrumenation until linking

KhronosGroup · Sep 18, 2024 · dea352a · dea352a
1 parent e0feb83
commit dea352a
Show file tree

Hide file tree

Showing 7 changed files with 617 additions and 55 deletions.
diff --git a/layers/gpu/instrumentation/gpu_shader_instrumentor.cpp b/layers/gpu/instrumentation/gpu_shader_instrumentor.cpp
diff --git a/layers/gpu/instrumentation/gpu_shader_instrumentor.h b/layers/gpu/instrumentation/gpu_shader_instrumentor.h
@@ -185,13 +185,26 @@ class GpuShaderInstrumentor : public ValidationStateTracker {
                                             VkDeviceQueueCreateFlags flags,
                                             const VkQueueFamilyProperties &queueFamilyProperties) override;
 
+    bool NeedPipelineCreationShaderInstrumentation(vvl::Pipeline &pipeline_state);
+    bool HasBindlessDescriptors(vvl::Pipeline &pipeline_state);
+    bool HasBindlessDescriptors(VkShaderCreateInfoEXT &create_info);
+
     template <typename SafeCreateInfo>
     void PreCallRecordPipelineCreationShaderInstrumentation(
         const VkAllocationCallbacks *pAllocator, vvl::Pipeline &pipeline_state, SafeCreateInfo &new_pipeline_ci,
         const Location &loc, std::vector<chassis::ShaderInstrumentationMetadata> &shader_instrumentation_metadata);
     void PostCallRecordPipelineCreationShaderInstrumentation(
         vvl::Pipeline &pipeline_state, std::vector<chassis::ShaderInstrumentationMetadata> &shader_instrumentation_metadata);
 
+    // We have GPL variations for graphics as they defer instrumentation until linking
+    void PreCallRecordPipelineCreationShaderInstrumentationGPL(
+        const VkAllocationCallbacks *pAllocator, vvl::Pipeline &pipeline_state,
+        vku::safe_VkGraphicsPipelineCreateInfo &new_pipeline_ci, const Location &loc,
+        std::vector<chassis::ShaderInstrumentationMetadata> &shader_instrumentation_metadata);
+    void PostCallRecordPipelineCreationShaderInstrumentationGPL(
+        vvl::Pipeline &pipeline_state, const VkAllocationCallbacks *pAllocator,
+        std::vector<chassis::ShaderInstrumentationMetadata> &shader_instrumentation_metadata);
+
     // GPU-AV and DebugPrint are using the same way to do the actual shader instrumentation logic
     // Returns if shader was instrumented successfully or not
     bool InstrumentShader(const vvl::span<const uint32_t> &input_spirv, uint32_t unique_shader_id, bool has_bindless_descriptors,

diff --git a/layers/state_tracker/pipeline_state.h b/layers/state_tracker/pipeline_state.h
@@ -140,6 +140,9 @@ class Pipeline : public StateObject {
         std::vector<VkShaderModule> instrumented_shader_module;
         // TODO - For GPL, this doesn't get passed down from linked shaders
         bool was_instrumented = false;
+        // When we instrument GPL at link time, we need to hold the new libraries until they are done
+        VkPipeline pre_raster_lib = VK_NULL_HANDLE;
+        VkPipeline frag_out_lib = VK_NULL_HANDLE;
     } instrumentation_data;
 
     // Executable or legacy pipeline

diff --git a/tests/unit/debug_printf.cpp b/tests/unit/debug_printf.cpp
@@ -11,6 +11,7 @@
  *     http://www.apache.org/licenses/LICENSE-2.0
  */
 
+#include <vulkan/vulkan_core.h>
 #include <cstdint>
 #include "../framework/layer_validation_tests.h"
 #include "../framework/pipeline_helper.h"
@@ -2430,6 +2431,154 @@ TEST_F(NegativeDebugPrintf, UseAllDescriptorSlotsPipelineNotReserved) {
     }
 }
 
+TEST_F(NegativeDebugPrintf, UseAllDescriptorSlotsPipelineGraphics) {
+    TEST_DESCRIPTION("Do not reserve a descriptor slot and proceed to use them all anyway so debug printf can't");
+    RETURN_IF_SKIP(InitDebugPrintfFramework());
+    RETURN_IF_SKIP(InitState());
+    InitRenderTarget();
+    m_errorMonitor->ExpectSuccess(kErrorBit | kWarningBit | kInformationBit);
+
+    char const *shader_source = R"glsl(
+        #version 450
+        #extension GL_EXT_debug_printf : enable
+        void main() {
+            float myfloat = 3.1415f;
+            debugPrintfEXT("float == %f", myfloat);
+        }
+    )glsl";
+    VkShaderObj vs(this, shader_source, VK_SHADER_STAGE_VERTEX_BIT);
+
+    const uint32_t set_limit = m_device->phy().limits_.maxBoundDescriptorSets;
+    OneOffDescriptorSet descriptor_set(m_device, {{0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr}});
+    // First try to use too many sets in the pipeline layout
+    {
+        m_errorMonitor->SetDesiredWarning(
+            "This Pipeline Layout has too many descriptor sets that will not allow GPU shader instrumentation to be setup for "
+            "pipelines created with it");
+        std::vector<const vkt::DescriptorSetLayout *> layouts(set_limit);
+        for (uint32_t i = 0; i < set_limit; i++) {
+            layouts[i] = &descriptor_set.layout_;
+        }
+        vkt::PipelineLayout pipe_layout(*m_device, layouts);
+        m_errorMonitor->VerifyFound();
+
+        CreatePipelineHelper pipe(*this);
+        pipe.shader_stages_ = {vs.GetStageCreateInfo(), pipe.fs_->GetStageCreateInfo()};
+        pipe.gp_ci_.layout = pipe_layout.handle();
+        pipe.CreateGraphicsPipeline();
+
+        m_commandBuffer->begin();
+        m_commandBuffer->BeginRenderPass(m_renderPassBeginInfo);
+        vk::CmdBindPipeline(m_commandBuffer->handle(), VK_PIPELINE_BIND_POINT_GRAPHICS, pipe.Handle());
+        vk::CmdDraw(m_commandBuffer->handle(), 3, 1, 0, 0);
+        m_commandBuffer->EndRenderPass();
+        m_commandBuffer->end();
+
+        // Will not print out because no slot was possible to put output buffer
+        m_default_queue->Submit(*m_commandBuffer);
+        m_default_queue->Wait();
+    }
+
+    // Reduce by one (so there is room now) and print something
+    {
+        std::vector<const vkt::DescriptorSetLayout *> layouts(set_limit - 1);
+        for (uint32_t i = 0; i < set_limit - 1; i++) {
+            layouts[i] = &descriptor_set.layout_;
+        }
+        vkt::PipelineLayout pipe_layout(*m_device, layouts);
+
+        CreatePipelineHelper pipe(*this);
+        pipe.shader_stages_ = {vs.GetStageCreateInfo(), pipe.fs_->GetStageCreateInfo()};
+        pipe.gp_ci_.layout = pipe_layout.handle();
+        pipe.CreateGraphicsPipeline();
+
+        m_commandBuffer->begin();
+        m_commandBuffer->BeginRenderPass(m_renderPassBeginInfo);
+        vk::CmdBindPipeline(m_commandBuffer->handle(), VK_PIPELINE_BIND_POINT_GRAPHICS, pipe.Handle());
+        vk::CmdDraw(m_commandBuffer->handle(), 3, 1, 0, 0);
+        m_commandBuffer->EndRenderPass();
+        m_commandBuffer->end();
+
+        m_errorMonitor->SetDesiredFailureMsg(kInformationBit, "float == 3.141500");
+        m_errorMonitor->SetDesiredFailureMsg(kInformationBit, "float == 3.141500");
+        m_errorMonitor->SetDesiredFailureMsg(kInformationBit, "float == 3.141500");
+        m_default_queue->Submit(*m_commandBuffer);
+        m_default_queue->Wait();
+        m_errorMonitor->VerifyFound();
+    }
+}
+
+TEST_F(NegativeDebugPrintf, UseAllDescriptorSlotsPipelineGPL) {
+    TEST_DESCRIPTION("Do not reserve a descriptor slot and proceed to use them all anyway so debug printf can't");
+    AddRequiredExtensions(VK_EXT_GRAPHICS_PIPELINE_LIBRARY_EXTENSION_NAME);
+    AddRequiredFeature(vkt::Feature::graphicsPipelineLibrary);
+    RETURN_IF_SKIP(InitDebugPrintfFramework());
+    RETURN_IF_SKIP(InitState());
+    InitRenderTarget();
+    m_errorMonitor->ExpectSuccess(kErrorBit | kWarningBit | kInformationBit);
+
+    char const *shader_source = R"glsl(
+        #version 450
+        #extension GL_EXT_debug_printf : enable
+        void main() {
+            float myfloat = 3.1415f;
+            debugPrintfEXT("float == %f", myfloat);
+        }
+    )glsl";
+
+    const uint32_t set_limit = m_device->phy().limits_.maxBoundDescriptorSets;
+    OneOffDescriptorSet descriptor_set(m_device, {{0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr}});
+    // First try to use too many sets in the pipeline layout
+    {
+        m_errorMonitor->SetDesiredWarning(
+            "This Pipeline Layout has too many descriptor sets that will not allow GPU shader instrumentation to be setup for "
+            "pipelines created with it");
+        std::vector<const vkt::DescriptorSetLayout *> layouts(set_limit);
+        for (uint32_t i = 0; i < set_limit; i++) {
+            layouts[i] = &descriptor_set.layout_;
+        }
+        vkt::PipelineLayout pipe_layout(*m_device, layouts);
+        m_errorMonitor->VerifyFound();
+
+        vkt::SimpleGPL pipe(*this, pipe_layout.handle(), shader_source);
+
+        m_commandBuffer->begin();
+        m_commandBuffer->BeginRenderPass(m_renderPassBeginInfo);
+        vk::CmdBindPipeline(m_commandBuffer->handle(), VK_PIPELINE_BIND_POINT_GRAPHICS, pipe.Handle());
+        vk::CmdDraw(m_commandBuffer->handle(), 3, 1, 0, 0);
+        m_commandBuffer->EndRenderPass();
+        m_commandBuffer->end();
+
+        // Will not print out because no slot was possible to put output buffer
+        m_default_queue->Submit(*m_commandBuffer);
+        m_default_queue->Wait();
+    }
+
+    // Reduce by one (so there is room now) and print something
+    {
+        std::vector<const vkt::DescriptorSetLayout *> layouts(set_limit - 1);
+        for (uint32_t i = 0; i < set_limit - 1; i++) {
+            layouts[i] = &descriptor_set.layout_;
+        }
+        vkt::PipelineLayout pipe_layout(*m_device, layouts);
+        vkt::SimpleGPL pipe(*this, pipe_layout.handle(), shader_source);
+
+        m_commandBuffer->begin();
+        m_commandBuffer->BeginRenderPass(m_renderPassBeginInfo);
+        vk::CmdBindPipeline(m_commandBuffer->handle(), VK_PIPELINE_BIND_POINT_GRAPHICS, pipe.Handle());
+        vk::CmdDraw(m_commandBuffer->handle(), 3, 1, 0, 0);
+        m_commandBuffer->EndRenderPass();
+        m_commandBuffer->end();
+
+        m_errorMonitor->SetDesiredFailureMsg(kInformationBit, "float == 3.141500");
+        m_errorMonitor->SetDesiredFailureMsg(kInformationBit, "float == 3.141500");
+        m_errorMonitor->SetDesiredFailureMsg(kInformationBit, "float == 3.141500");
+        m_default_queue->Submit(*m_commandBuffer);
+        m_default_queue->Wait();
+        m_errorMonitor->VerifyFound();
+    }
+}
+
 // TODO - https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/7178
 TEST_F(NegativeDebugPrintf, DISABLED_UseAllDescriptorSlotsShaderObjectReserved) {
     TEST_DESCRIPTION("Reserve a descriptor slot and proceed to use them all anyway so debug printf can't");

diff --git a/tests/unit/gpu_av_oob.cpp b/tests/unit/gpu_av_oob.cpp
@@ -887,8 +887,103 @@ TEST_F(NegativeGpuAVOOB, GPLImageLoadStoreIndependentSets) {
     m_errorMonitor->VerifyFound();
 }
 
+TEST_F(NegativeGpuAVOOB, GPLNonInlined) {
+    TEST_DESCRIPTION("Make sure GPL works when shader modules are not inlined at pipeline creation time");
+    AddRequiredExtensions(VK_EXT_GRAPHICS_PIPELINE_LIBRARY_EXTENSION_NAME);
+    AddRequiredFeature(vkt::Feature::graphicsPipelineLibrary);
+    AddDisabledFeature(vkt::Feature::robustBufferAccess);
+    RETURN_IF_SKIP(InitGpuAvFramework());
+    RETURN_IF_SKIP(InitState());
+    InitRenderTarget();
+
+    VkMemoryPropertyFlags reqs = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+    vkt::Buffer offset_buffer(*m_device, 4, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, reqs);
+    vkt::Buffer write_buffer(*m_device, 16, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, reqs);
+
+    OneOffDescriptorSet descriptor_set(m_device, {{0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr},
+                                                  {1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr}});
+    const vkt::PipelineLayout pipeline_layout(*m_device, {&descriptor_set.layout_});
+    descriptor_set.WriteDescriptorBufferInfo(0, offset_buffer.handle(), 0, VK_WHOLE_SIZE);
+    descriptor_set.WriteDescriptorBufferInfo(1, write_buffer.handle(), 0, VK_WHOLE_SIZE, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+    descriptor_set.UpdateDescriptorSets();
+
+    uint32_t *offset_buffer_ptr = (uint32_t *)offset_buffer.memory().map();
+    *offset_buffer_ptr = 8;
+    offset_buffer.memory().unmap();
+
+    static const char vertshader[] = R"glsl(
+        #version 450
+        layout(set = 0, binding = 0) uniform Uniform { uint offset_buffer[]; };
+        layout(set = 0, binding = 1) buffer StorageBuffer { uint write_buffer[]; };
+        void main() {
+            uint index = offset_buffer[0];
+            write_buffer[index] = 0xdeadca71;
+        }
+    )glsl";
+    // Create VkShaderModule to pass in
+    VkShaderObj vs(this, vertshader, VK_SHADER_STAGE_VERTEX_BIT);
+    VkShaderObj fs(this, kFragmentMinimalGlsl, VK_SHADER_STAGE_FRAGMENT_BIT);
+
+    CreatePipelineHelper vertex_input_lib(*this);
+    vertex_input_lib.InitVertexInputLibInfo();
+    vertex_input_lib.CreateGraphicsPipeline(false);
+
+    // For GPU-AV tests this shrinks things so only a single fragment is executed
+    VkViewport viewport = {0, 0, 1, 1, 0, 1};
+    VkRect2D scissor = {{0, 0}, {1, 1}};
+
+    CreatePipelineHelper pre_raster_lib(*this);
+    {
+        pre_raster_lib.InitPreRasterLibInfo(&vs.GetStageCreateInfo());
+        pre_raster_lib.vp_state_ci_.pViewports = &viewport;
+        pre_raster_lib.vp_state_ci_.pScissors = &scissor;
+        pre_raster_lib.gp_ci_.layout = pipeline_layout.handle();
+        pre_raster_lib.CreateGraphicsPipeline();
+    }
+
+    CreatePipelineHelper frag_shader_lib(*this);
+    {
+        frag_shader_lib.InitFragmentLibInfo(&fs.GetStageCreateInfo());
+        frag_shader_lib.gp_ci_.layout = pipeline_layout.handle();
+        frag_shader_lib.CreateGraphicsPipeline(false);
+    }
+
+    CreatePipelineHelper frag_out_lib(*this);
+    frag_out_lib.InitFragmentOutputLibInfo();
+    frag_out_lib.CreateGraphicsPipeline(false);
+
+    VkPipeline libraries[4] = {
+        vertex_input_lib.Handle(),
+        pre_raster_lib.Handle(),
+        frag_shader_lib.Handle(),
+        frag_out_lib.Handle(),
+    };
+    VkPipelineLibraryCreateInfoKHR link_info = vku::InitStructHelper();
+    link_info.libraryCount = size(libraries);
+    link_info.pLibraries = libraries;
+
+    VkGraphicsPipelineCreateInfo exe_pipe_ci = vku::InitStructHelper(&link_info);
+    exe_pipe_ci.layout = pipeline_layout.handle();
+    vkt::Pipeline exe_pipe(*m_device, exe_pipe_ci);
+
+    m_commandBuffer->begin();
+    m_commandBuffer->BeginRenderPass(m_renderPassBeginInfo);
+    vk::CmdBindPipeline(m_commandBuffer->handle(), VK_PIPELINE_BIND_POINT_GRAPHICS, exe_pipe.handle());
+    vk::CmdBindDescriptorSets(m_commandBuffer->handle(), VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout.handle(), 0, 1,
+                              &descriptor_set.set_, 0, nullptr);
+    vk::CmdDraw(m_commandBuffer->handle(), 3, 1, 0, 0);
+    m_commandBuffer->EndRenderPass();
+    m_commandBuffer->end();
+
+    m_errorMonitor->SetDesiredError("VUID-vkCmdDraw-storageBuffers-06936", 3);
+
+    m_default_queue->Submit(*m_commandBuffer);
+    m_default_queue->Wait();
+    m_errorMonitor->VerifyFound();
+}
+
 TEST_F(NegativeGpuAVOOB, StorageBuffer) {
-    TEST_DESCRIPTION("Make suree OOB is still checked when result is from a BufferDeviceAddress");
+    TEST_DESCRIPTION("Make sure OOB is still checked when result is from a BufferDeviceAddress");
     SetTargetApiVersion(VK_API_VERSION_1_2);
     AddRequiredExtensions(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME);
     AddRequiredFeature(vkt::Feature::bufferDeviceAddress);