Skip to content

Commit

Permalink
GPUDevice: Support pre-rotating swap chains
Browse files Browse the repository at this point in the history
  • Loading branch information
stenzek committed Dec 6, 2024
1 parent acf04ed commit f0c4568
Show file tree
Hide file tree
Showing 12 changed files with 323 additions and 36 deletions.
37 changes: 22 additions & 15 deletions src/core/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1997,15 +1997,11 @@ GPUDevice::PresentResult GPU::RenderDisplay(GPUTexture* target, const GSVector4i

// Now we can apply the post chain.
GPUTexture* post_output_texture = PostProcessing::InternalChain.GetOutputTexture();
if (const GPUDevice::PresentResult pres = PostProcessing::InternalChain.Apply(
display_texture, m_display_depth_buffer, post_output_texture,
GSVector4i(0, 0, display_texture_view_width, display_texture_view_height), display_texture_view_width,
display_texture_view_height, m_crtc_state.display_width, m_crtc_state.display_height);
pres != GPUDevice::PresentResult::OK)
{
return pres;
}
else
if (PostProcessing::InternalChain.Apply(display_texture, m_display_depth_buffer, post_output_texture,
GSVector4i(0, 0, display_texture_view_width, display_texture_view_height),
display_texture_view_width, display_texture_view_height,
m_crtc_state.display_width,
m_crtc_state.display_height) == GPUDevice::PresentResult::OK)
{
display_texture_view_x = 0;
display_texture_view_y = 0;
Expand All @@ -2020,8 +2016,13 @@ GPUDevice::PresentResult GPU::RenderDisplay(GPUTexture* target, const GSVector4i
const bool really_postfx = (postfx && PostProcessing::DisplayChain.IsActive() && g_gpu_device->HasMainSwapChain() &&
hdformat != GPUTexture::Format::Unknown && target_width > 0 && target_height > 0 &&
PostProcessing::DisplayChain.CheckTargets(hdformat, target_width, target_height));
const GSVector4i real_draw_rect =
g_gpu_device->UsesLowerLeftOrigin() ? GPUDevice::FlipToLowerLeft(draw_rect, target_height) : draw_rect;
GSVector4i real_draw_rect = target ? draw_rect : g_gpu_device->GetMainSwapChain()->PreRotateClipRect(draw_rect);
if (g_gpu_device->UsesLowerLeftOrigin())
{
real_draw_rect = GPUDevice::FlipToLowerLeft(
real_draw_rect,
(target || really_postfx) ? target_height : g_gpu_device->GetMainSwapChain()->GetPostRotatedHeight());
}
if (really_postfx)
{
g_gpu_device->ClearRenderTarget(PostProcessing::DisplayChain.GetInputTexture(), GPUDevice::DEFAULT_CLEAR_COLOR);
Expand Down Expand Up @@ -2106,16 +2107,22 @@ GPUDevice::PresentResult GPU::RenderDisplay(GPUTexture* target, const GSVector4i
uniforms.src_size[2] = rcp_width;
uniforms.src_size[3] = rcp_height;

if (g_settings.display_rotation != DisplayRotation::Normal)
const WindowInfo::PreRotation surface_prerotation = (target || really_postfx) ?
WindowInfo::PreRotation::Identity :
g_gpu_device->GetMainSwapChain()->GetPreRotation();
if (g_settings.display_rotation != DisplayRotation::Normal ||
surface_prerotation != WindowInfo::PreRotation::Identity)
{
static constexpr const std::array<float, static_cast<size_t>(DisplayRotation::Count) - 1> rotation_radians = {{
static constexpr const std::array<float, static_cast<size_t>(DisplayRotation::Count)> rotation_radians = {{
0.0f, // Disabled
static_cast<float>(std::numbers::pi * 1.5f), // Rotate90
static_cast<float>(std::numbers::pi), // Rotate180
static_cast<float>(std::numbers::pi / 2.0), // Rotate270
}};

GSMatrix2x2::Rotation(rotation_radians[static_cast<size_t>(g_settings.display_rotation) - 1])
.store(uniforms.rotation_matrix);
const u32 rotation_idx = (static_cast<u32>(g_settings.display_rotation) + static_cast<u32>(surface_prerotation)) %
static_cast<u32>(rotation_radians.size());
GSMatrix2x2::Rotation(rotation_radians[rotation_idx]).store(uniforms.rotation_matrix);
}
else
{
Expand Down
54 changes: 51 additions & 3 deletions src/util/gpu_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,47 @@ GPUSwapChain::GPUSwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode, bool a

GPUSwapChain::~GPUSwapChain() = default;

GSVector4i GPUSwapChain::PreRotateClipRect(const GSVector4i& v)
{
GSVector4i new_clip;
switch (m_window_info.surface_prerotation)
{
case WindowInfo::PreRotation::Identity:
new_clip = v;
break;

case WindowInfo::PreRotation::Rotate90Clockwise:
{
const s32 height = (v.w - v.y);
const s32 y = m_window_info.surface_height - v.y - height;
new_clip = GSVector4i(y, v.x, y + height, v.z);
}
break;

case WindowInfo::PreRotation::Rotate180Clockwise:
{
const s32 width = (v.z - v.x);
const s32 height = (v.w - v.y);
const s32 x = m_window_info.surface_width - v.x - width;
const s32 y = m_window_info.surface_height - v.y - height;
new_clip = GSVector4i(x, y, x + width, y + height);
}
break;

case WindowInfo::PreRotation::Rotate270Clockwise:
{
const s32 width = (v.z - v.x);
const s32 x = m_window_info.surface_width - v.x - width;
new_clip = GSVector4i(v.y, x, v.w, x + width);
}
break;

DefaultCaseIsUnreachable()
}

return new_clip;
}

bool GPUSwapChain::ShouldSkipPresentingFrame()
{
// Only needed with FIFO. But since we're so fast, we allow it always.
Expand Down Expand Up @@ -674,11 +715,17 @@ void GPUDevice::RenderImGui(GPUSwapChain* swap_chain)
if (draw_data->CmdListsCount == 0 || !swap_chain)
return;

const s32 post_rotated_height = swap_chain->GetPostRotatedHeight();
SetPipeline(m_imgui_pipeline.get());
SetViewportAndScissor(0, 0, swap_chain->GetWidth(), swap_chain->GetHeight());
SetViewport(0, 0, swap_chain->GetPostRotatedWidth(), post_rotated_height);

const GSMatrix4x4 mproj = GSMatrix4x4::OffCenterOrthographicProjection(
GSMatrix4x4 mproj = GSMatrix4x4::OffCenterOrthographicProjection(
0.0f, 0.0f, static_cast<float>(swap_chain->GetWidth()), static_cast<float>(swap_chain->GetHeight()), 0.0f, 1.0f);
if (swap_chain->GetPreRotation() != WindowInfo::PreRotation::Identity)
{
mproj =
GSMatrix4x4::RotationZ(WindowInfo::GetZRotationForPreRotation(swap_chain->GetPreRotation())) * mproj;
}
PushUniformBuffer(&mproj, sizeof(mproj));

// Render command lists
Expand All @@ -701,8 +748,9 @@ void GPUDevice::RenderImGui(GPUSwapChain* swap_chain)
continue;

GSVector4i clip = GSVector4i(GSVector4::load<false>(&pcmd->ClipRect.x));
clip = swap_chain->PreRotateClipRect(clip);
if (flip)
clip = FlipToLowerLeft(clip, swap_chain->GetHeight());
clip = FlipToLowerLeft(clip, post_rotated_height);

SetScissor(clip);
SetTextureSampler(0, reinterpret_cast<GPUTexture*>(pcmd->TextureId), m_linear_sampler.get());
Expand Down
5 changes: 5 additions & 0 deletions src/util/gpu_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,10 @@ class GPUSwapChain
ALWAYS_INLINE const WindowInfo& GetWindowInfo() const { return m_window_info; }
ALWAYS_INLINE u32 GetWidth() const { return m_window_info.surface_width; }
ALWAYS_INLINE u32 GetHeight() const { return m_window_info.surface_height; }
ALWAYS_INLINE u32 GetPostRotatedWidth() const { return m_window_info.GetPostRotatedWidth(); }
ALWAYS_INLINE u32 GetPostRotatedHeight() const { return m_window_info.GetPostRotatedHeight(); }
ALWAYS_INLINE float GetScale() const { return m_window_info.surface_scale; }
ALWAYS_INLINE WindowInfo::PreRotation GetPreRotation() const { return m_window_info.surface_prerotation; }
ALWAYS_INLINE GPUTexture::Format GetFormat() const { return m_window_info.surface_format; }

ALWAYS_INLINE GPUVSyncMode GetVSyncMode() const { return m_vsync_mode; }
Expand All @@ -517,6 +520,8 @@ class GPUSwapChain
virtual bool ResizeBuffers(u32 new_width, u32 new_height, float new_scale, Error* error) = 0;
virtual bool SetVSyncMode(GPUVSyncMode mode, bool allow_present_throttle, Error* error) = 0;

GSVector4i PreRotateClipRect(const GSVector4i& v);

bool ShouldSkipPresentingFrame();
void ThrottlePresentation();

Expand Down
3 changes: 3 additions & 0 deletions src/util/opengl_context_egl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,9 @@ void OpenGLContextEGL::UpdateWindowInfoSize(WindowInfo& wi, EGLSurface surface)
{
wi.surface_width = static_cast<u16>(surface_width);
wi.surface_height = static_cast<u16>(surface_height);

if (WindowInfo::ShouldSwapDimensionsForPreRotation(wi.surface_prerotation))
std::swap(wi.surface_width, wi.surface_height);
}
else
{
Expand Down
102 changes: 94 additions & 8 deletions src/util/postprocessing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "postprocessing_shader.h"
#include "postprocessing_shader_fx.h"
#include "postprocessing_shader_glsl.h"
#include "shadergen.h"

// TODO: Remove me
#include "core/host.h"
Expand All @@ -28,9 +29,6 @@

LOG_CHANNEL(PostProcessing);

// TODO: ProgressCallbacks for shader compiling, it can be a bit slow.
// TODO: buffer width/height is wrong on resize, need to change it somehow.

namespace PostProcessing {
template<typename T>
static u32 ParseVector(std::string_view line, ShaderOption::ValueVector* values);
Expand Down Expand Up @@ -369,6 +367,11 @@ PostProcessing::Chain::Chain(const char* section) : m_section(section)

PostProcessing::Chain::~Chain() = default;

GPUTexture* PostProcessing::Chain::GetTextureUnusedAtEndOfChain() const
{
return (m_stages.size() % 2) ? m_output_texture.get() : m_input_texture.get();
}

bool PostProcessing::Chain::IsActive() const
{
return m_enabled && !m_stages.empty();
Expand Down Expand Up @@ -561,16 +564,58 @@ bool PostProcessing::Chain::CheckTargets(GPUTexture::Format target_format, u32 t
if (m_target_format == target_format && m_target_width == target_width && m_target_height == target_height)
return true;

Error error;

if (!IsInternalChain() && (!m_rotated_copy_pipeline || m_target_format != target_format))
{
const RenderAPI rapi = g_gpu_device->GetRenderAPI();
const ShaderGen shadergen(rapi, ShaderGen::GetShaderLanguageForAPI(rapi), false, false);
const std::unique_ptr<GPUShader> vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(),
shadergen.GenerateRotateVertexShader(), &error);
const std::unique_ptr<GPUShader> fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateRotateFragmentShader(), &error);
if (!vso || !fso)
{
ERROR_LOG("Failed to compile post-processing rotate shaders: {}", error.GetDescription());
return false;
}
GL_OBJECT_NAME(vso, "Post-processing rotate blit VS");
GL_OBJECT_NAME(vso, "Post-processing rotate blit FS");

const GPUPipeline::GraphicsConfig config = {.layout = GPUPipeline::Layout::SingleTextureAndPushConstants,
.primitive = GPUPipeline::Primitive::Triangles,
.input_layout = {},
.rasterization = GPUPipeline::RasterizationState::GetNoCullState(),
.depth = GPUPipeline::DepthState::GetNoTestsState(),
.blend = GPUPipeline::BlendState::GetNoBlendingState(),
.vertex_shader = vso.get(),
.geometry_shader = nullptr,
.fragment_shader = fso.get(),
.color_formats = {target_format},
.depth_format = GPUTexture::Format::Unknown,
.samples = 1,
.per_sample_shading = false,
.render_pass_flags = GPUPipeline::NoRenderPassFlags};
m_rotated_copy_pipeline = g_gpu_device->CreatePipeline(config, &error);
if (!m_rotated_copy_pipeline)
{
ERROR_LOG("Failed to compile post-processing rotate pipeline: {}", error.GetDescription());
return false;
}
GL_OBJECT_NAME(m_rotated_copy_pipeline, "Post-processing rotate pipeline");
}

// In case any allocs fail.
DestroyTextures();

if (!(m_input_texture =
g_gpu_device->FetchTexture(target_width, target_height, 1, 1, 1, GPUTexture::Type::RenderTarget,
target_format, GPUTexture::Flags::None)) ||
target_format, GPUTexture::Flags::None, nullptr, 0, &error)) ||
!(m_output_texture =
g_gpu_device->FetchTexture(target_width, target_height, 1, 1, 1, GPUTexture::Type::RenderTarget,
target_format, GPUTexture::Flags::None)))
target_format, GPUTexture::Flags::None, nullptr, 0, &error)))
{
ERROR_LOG("Failed to create input/output textures: {}", error.GetDescription());
DestroyTextures();
return false;
}
Expand All @@ -583,7 +628,6 @@ bool PostProcessing::Chain::CheckTargets(GPUTexture::Format target_format, u32 t

m_wants_depth_buffer = false;

Error error;
for (size_t i = 0; i < m_stages.size(); i++)
{
Shader* const shader = m_stages[i].get();
Expand Down Expand Up @@ -622,6 +666,11 @@ void PostProcessing::Chain::DestroyTextures()
g_gpu_device->RecycleTexture(std::move(m_input_texture));
}

void PostProcessing::Chain::DestroyPipelines()
{
m_rotated_copy_pipeline.reset();
}

GPUDevice::PresentResult PostProcessing::Chain::Apply(GPUTexture* input_color, GPUTexture* input_depth,
GPUTexture* final_target, GSVector4i final_rect, s32 orig_width,
s32 orig_height, s32 native_width, s32 native_height)
Expand All @@ -633,13 +682,24 @@ GPUDevice::PresentResult PostProcessing::Chain::Apply(GPUTexture* input_color, G
if (input_depth)
input_depth->MakeReadyForSampling();

GPUTexture* draw_final_target = final_target;
const WindowInfo::PreRotation prerotation =
final_target ? WindowInfo::PreRotation::Identity : g_gpu_device->GetMainSwapChain()->GetPreRotation();
if (prerotation != WindowInfo::PreRotation::Identity)
{
// We have prerotation and post processing. This is messy, since we need to run the shader on the "real" size,
// then copy it across to the rotated image. We can use the input or output texture from the chain, whichever
// was not the last that was drawn to.
draw_final_target = GetTextureUnusedAtEndOfChain();
}

for (const std::unique_ptr<Shader>& stage : m_stages)
{
const bool is_final = (stage.get() == m_stages.back().get());

if (const GPUDevice::PresentResult pres =
stage->Apply(input_color, input_depth, is_final ? final_target : output, final_rect, orig_width, orig_height,
native_width, native_height, m_target_width, m_target_height);
stage->Apply(input_color, input_depth, is_final ? draw_final_target : output, final_rect, orig_width,
orig_height, native_width, native_height, m_target_width, m_target_height);
pres != GPUDevice::PresentResult::OK)
{
return pres;
Expand All @@ -653,6 +713,30 @@ GPUDevice::PresentResult PostProcessing::Chain::Apply(GPUTexture* input_color, G
}
}

if (prerotation != WindowInfo::PreRotation::Identity)
{
draw_final_target->MakeReadyForSampling();

// Rotate and blit to final swap chain.
GPUSwapChain* const swap_chain = g_gpu_device->GetMainSwapChain();
if (const GPUDevice::PresentResult pres = g_gpu_device->BeginPresent(swap_chain);
pres != GPUDevice::PresentResult::OK)
{
return pres;
}

GL_PUSH_FMT("Apply swap chain pre-rotation");

const GSMatrix2x2 rotmat = GSMatrix2x2::Rotation(WindowInfo::GetZRotationForPreRotation(prerotation));
g_gpu_device->SetPipeline(m_rotated_copy_pipeline.get());
g_gpu_device->PushUniformBuffer(&rotmat, sizeof(rotmat));
g_gpu_device->SetTextureSampler(0, draw_final_target, g_gpu_device->GetNearestSampler());
g_gpu_device->SetViewportAndScissor(0, 0, swap_chain->GetPostRotatedWidth(), swap_chain->GetPostRotatedHeight());
g_gpu_device->Draw(3, 0);

GL_POP();
}

return GPUDevice::PresentResult::OK;
}

Expand All @@ -675,6 +759,7 @@ void PostProcessing::Shutdown()
s_samplers.clear();
ForAllChains([](Chain& chain) {
chain.ClearStages();
chain.DestroyPipelines();
chain.DestroyTextures();
});
}
Expand All @@ -691,6 +776,7 @@ bool PostProcessing::ReloadShaders()

ForAllChains([](Chain& chain) {
chain.ClearStages();
chain.DestroyPipelines();
chain.DestroyTextures();
chain.LoadStages();
});
Expand Down
6 changes: 6 additions & 0 deletions src/util/postprocessing.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

class Timer;

class GPUPipeline;
class GPUSampler;
class GPUTexture;

Expand Down Expand Up @@ -117,6 +118,9 @@ class Chain
ALWAYS_INLINE GPUTexture* GetInputTexture() const { return m_input_texture.get(); }
ALWAYS_INLINE GPUTexture* GetOutputTexture() const { return m_output_texture.get(); }

/// Returns either the input or output texture, whichever isn't the destination after the final pass.
GPUTexture* GetTextureUnusedAtEndOfChain() const;

bool IsActive() const;
bool IsInternalChain() const;

Expand All @@ -125,6 +129,7 @@ class Chain
void LoadStages();
void ClearStages();
void DestroyTextures();
void DestroyPipelines();

/// Temporarily toggles post-processing on/off.
void Toggle();
Expand All @@ -151,6 +156,7 @@ class Chain
std::vector<std::unique_ptr<PostProcessing::Shader>> m_stages;
std::unique_ptr<GPUTexture> m_input_texture;
std::unique_ptr<GPUTexture> m_output_texture;
std::unique_ptr<GPUPipeline> m_rotated_copy_pipeline;
};

// [display_name, filename]
Expand Down
Loading

0 comments on commit f0c4568

Please sign in to comment.