Skip to content

Commit

Permalink
Merge pull request #1590 from colincornaby/apple-silicon-gamma-correc…
Browse files Browse the repository at this point in the history
…tion-optimization

Optimized gamma correction on Apple Silicon
  • Loading branch information
Hoikas authored Jul 2, 2024
2 parents 4442c40 + 8b1d619 commit 0116eff
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 35 deletions.
117 changes: 88 additions & 29 deletions Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,17 +334,22 @@ void plMetalDevice::BeginNewRenderPass()
renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare);

if (fSampleCount == 1) {
if (NeedsPostprocessing()) {
// We only need the intermediate texture for post processing on
// non-tilers. Tilers can direct read/write on the fragment texture.
if (NeedsPostprocessing() && !SupportsTileMemory()) {
renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentUnprocessedOutputTexture);
} else {
renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentOutputTexture);
}
} else {
renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentMSAAOutputTexture);

// if we need postprocessing, output to the main pass texture
// if we need postprocessing, output to the intermediate main pass texture
// otherwise we can go straight to the drawable
if (NeedsPostprocessing()) {

// We only need the intermediate texture for post processing on
// non-tilers. Tilers can direct read/write on the fragment texture.
if (NeedsPostprocessing() && !SupportsTileMemory()) {
renderPassDescriptor->colorAttachments()->object(0)->setResolveTexture(fCurrentUnprocessedOutputTexture);
} else {
renderPassDescriptor->colorAttachments()->object(0)->setResolveTexture(fCurrentFragmentOutputTexture);
Expand Down Expand Up @@ -450,6 +455,10 @@ plMetalDevice::plMetalDevice()

fMetalDevice = MTL::CreateSystemDefaultDevice();
fCommandQueue = fMetalDevice->newCommandQueue();

// Only known tiler on Apple devices are Apple GPUs.
// Apple recommends a family check for tile memory support.
fSupportsTileMemory = fMetalDevice->supportsFamily(MTL::GPUFamilyApple1);

// set up all the depth stencil states
MTL::DepthStencilDescriptor* depthDescriptor = MTL::DepthStencilDescriptor::alloc()->init();
Expand Down Expand Up @@ -1041,15 +1050,18 @@ void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable)
fCurrentDrawableDepthTexture = fMetalDevice->newTexture(depthTextureDescriptor);
}
}

// Do we need to create a unprocessed output texture?
// If the depth needs to be rebuilt - we probably need to rebuild this one too
if ((fCurrentUnprocessedOutputTexture && depthNeedsRebuild) || (fCurrentUnprocessedOutputTexture == nullptr && NeedsPostprocessing())) {
MTL::TextureDescriptor* mainPassDescriptor = MTL::TextureDescriptor::texture2DDescriptor(drawable->texture()->pixelFormat(), drawable->texture()->width(), drawable->texture()->height(), false);
mainPassDescriptor->setStorageMode(MTL::StorageModePrivate);
mainPassDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageRenderTarget);
fCurrentUnprocessedOutputTexture->release();
fCurrentUnprocessedOutputTexture = fMetalDevice->newTexture(mainPassDescriptor);

// We only need to allocate an intermediate texture if we don't have tile memory.
if (!SupportsTileMemory()) {
// Do we need to create a unprocessed output texture?
// If the depth needs to be rebuilt - we probably need to rebuild this one too
if ((fCurrentUnprocessedOutputTexture && depthNeedsRebuild) || (fCurrentUnprocessedOutputTexture == nullptr && NeedsPostprocessing())) {
MTL::TextureDescriptor* mainPassDescriptor = MTL::TextureDescriptor::texture2DDescriptor(drawable->texture()->pixelFormat(), drawable->texture()->width(), drawable->texture()->height(), false);
mainPassDescriptor->setStorageMode(MTL::StorageModePrivate);
mainPassDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageRenderTarget);
fCurrentUnprocessedOutputTexture->release();
fCurrentUnprocessedOutputTexture = fMetalDevice->newTexture(mainPassDescriptor);
}
}

fCurrentDrawable = drawable->retain();
Expand Down Expand Up @@ -1205,13 +1217,14 @@ void plMetalDevice::SubmitCommandBuffer()
fBlitCommandEncoder = nullptr;
}

fCurrentRenderTargetCommandEncoder->endEncoding();
fCurrentRenderTargetCommandEncoder->release();
fCurrentRenderTargetCommandEncoder = nil;

if (NeedsPostprocessing()) {
PostprocessIntoDrawable();
}
// Post processing will end the main render pass.
// On Apple Silicon - this code will attempt to combine render passes,
// but past this point developer should not rely on the main render pass
// being available.
PreparePostProcessing();
PostprocessIntoDrawable();
FinalizePostProcessing();

fCurrentCommandBuffer->presentDrawable(fCurrentDrawable);
fCurrentCommandBuffer->commit();
Expand Down Expand Up @@ -1242,9 +1255,17 @@ void plMetalDevice::CreateGammaAdjustState()
MTL::RenderPipelineDescriptor* gammaDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();

gammaDescriptor->setVertexFunction(fShaderLibrary->newFunction(MTLSTR("gammaCorrectVertex"))->autorelease());
gammaDescriptor->setFragmentFunction(fShaderLibrary->newFunction(MTLSTR("gammaCorrectFragment"))->autorelease());

gammaDescriptor->colorAttachments()->object(0)->setPixelFormat(fFramebufferFormat);
if (SupportsTileMemory()) {
// Tiler GPU version does an in place transform
// Because it's in place we need to describe all main pass buffers including depth and MSAA
gammaDescriptor->colorAttachments()->object(0)->setPixelFormat(fCurrentFragmentOutputTexture->pixelFormat());
gammaDescriptor->setDepthAttachmentPixelFormat(fCurrentDepthFormat);
gammaDescriptor->setSampleCount(CurrentTargetSampleCount());
gammaDescriptor->setFragmentFunction(fShaderLibrary->newFunction(MTLSTR("gammaCorrectFragmentInPlace"))->autorelease());
} else {
gammaDescriptor->colorAttachments()->object(0)->setPixelFormat(fFramebufferFormat);
gammaDescriptor->setFragmentFunction(fShaderLibrary->newFunction(MTLSTR("gammaCorrectFragment"))->autorelease());
}

NS::Error* error;
fGammaAdjustState->release();
Expand All @@ -1254,17 +1275,28 @@ void plMetalDevice::CreateGammaAdjustState()

void plMetalDevice::PostprocessIntoDrawable()
{
if (!NeedsPostprocessing()) {
return;
}

if (!fGammaAdjustState) {
CreateGammaAdjustState();
}

// Gamma adjust
MTL::RenderPassDescriptor* gammaPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor();
gammaPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionDontCare);
gammaPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentDrawable->texture());
gammaPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore);

MTL::RenderCommandEncoder* gammaAdjustEncoder = fCurrentCommandBuffer->renderCommandEncoder(gammaPassDescriptor);
MTL::RenderCommandEncoder* gammaAdjustEncoder;
if (SupportsTileMemory()) {
// On tilers we can read/write directly on the framebuffer, carry on, no new render pass needed.
gammaAdjustEncoder = CurrentRenderCommandEncoder();
} else {
// On non-tilers, we need to create a new render pass to use our old render target as a texture
// source and the output drawable as the target to do post-processing.
MTL::RenderPassDescriptor* gammaPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor();
gammaPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionDontCare);
gammaPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentDrawable->texture());
gammaPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore);

gammaAdjustEncoder = fCurrentCommandBuffer->renderCommandEncoder(gammaPassDescriptor);
}

gammaAdjustEncoder->setRenderPipelineState(fGammaAdjustState);

Expand All @@ -1279,7 +1311,34 @@ void plMetalDevice::PostprocessIntoDrawable()
gammaAdjustEncoder->setFragmentTexture(fCurrentUnprocessedOutputTexture, 0);
gammaAdjustEncoder->setFragmentTexture(fGammaLUTTexture, 1);
gammaAdjustEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4));
gammaAdjustEncoder->endEncoding();

// On non-tilers - we created a render pass that we own,
// and we're responsible for ending it
if (!SupportsTileMemory()) {
gammaAdjustEncoder->endEncoding();
}
}

void plMetalDevice::PreparePostProcessing()
{
// If we're on a tiler GPU - we don't need to create a new
// render pass. Keep the main render pass alive.
if (!SupportsTileMemory()) {
fCurrentRenderTargetCommandEncoder->endEncoding();
fCurrentRenderTargetCommandEncoder->release();
fCurrentRenderTargetCommandEncoder = nil;
}
}

void plMetalDevice::FinalizePostProcessing()
{
// If we were on a tiler, post processing took ownership of the main
// render pass so we're responsible for finalizing it.
if (SupportsTileMemory()) {
fCurrentRenderTargetCommandEncoder->endEncoding();
fCurrentRenderTargetCommandEncoder->release();
fCurrentRenderTargetCommandEncoder = nil;
}
}

size_t plMetalDevice::plMetalPipelineRecordHashFunction ::operator()(plMetalPipelineRecord const& s) const noexcept
Expand Down
22 changes: 16 additions & 6 deletions Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,20 +259,30 @@ class plMetalDevice

void LoadLibrary();

void BeginNewRenderPass();
void ReleaseSamplerStates();
void ReleaseFramebufferObjects();

// Blur states
std::unordered_map<float, NS::Object*> fBlurShaders;

// MARK: - Post processing
private:
bool NeedsPostprocessing() const
{
return fGammaLUTTexture != nullptr;
}
void PreparePostProcessing();
void FinalizePostProcessing();
void PostprocessIntoDrawable();
void CreateGammaAdjustState();
MTL::RenderPipelineState* fGammaAdjustState;

void BeginNewRenderPass();
void ReleaseSamplerStates();
void ReleaseFramebufferObjects();

// Blur states
std::unordered_map<float, NS::Object*> fBlurShaders;
// MARK: - Device capabilities
private:
/// Returns true if the device supports tile memory features such as directly writable render buffers.
inline bool SupportsTileMemory() const { return fSupportsTileMemory; }
bool fSupportsTileMemory;
};

#endif

0 comments on commit 0116eff

Please sign in to comment.