From d74b66a0a7955de529f02fd394d6f6536a96e88c Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 11 Jul 2022 20:31:21 +0200 Subject: [PATCH] AtlasEngine: Improve glyph generation performance (#13477) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #13458 added the ability to reuse tiles from our glyph atlas texture so that we stop running out of GPU memory for complex Unicode. This however can result in our glyph generation being a performance issue in edge cases, to the point that the application may feel outright unuseable. CJK glyphs for instance can easily exceed the maximum atlas texture size (twice the window size), but take a significant amount of CPU and GPU time to rasterize and draw, which results in "jelly scrolling" down to ~1 FPS. This PR improves the situation of the latter half by directly drawing glyphs into the texture atlas without an intermediate scratchpad texture. This reduces GPU usage by 96% on my system (33% -> 2%) which improves general render performance by ~100% (15 -> 30 FPS). CPU usage remains the same however, but that's not really something we can do anything about at this time. The atlas texture is already our primary means to reduce the CPU cost after all. ## Validation Steps Performed * Disable V-Sync for OpenConsole in NVIDIA Control Panel * Enable `debugGlyphGenerationPerformance` * Print the entire CJK block U+4E00..U+9FFF * Measure the above GPU usage and FPS improvements ✅ (Alternatively: Just scroll around and judge the "jellyness".) --- src/renderer/atlas/AtlasEngine.cpp | 6 +- src/renderer/atlas/AtlasEngine.h | 5 -- src/renderer/atlas/AtlasEngine.r.cpp | 91 +++++++--------------------- 3 files changed, 24 insertions(+), 78 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index edeec1fb187..815537bc483 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -929,8 +929,7 @@ void AtlasEngine::_recreateFontDependentResources() { // We're likely resizing the atlas anyways and can // thus also release any of these buffers prematurely. - _r.d2dRenderTarget.reset(); // depends on _r.atlasScratchpad - _r.atlasScratchpad.reset(); + _r.d2dRenderTarget.reset(); // depends on _r.atlasBuffer _r.atlasView.reset(); _r.atlasBuffer.reset(); } @@ -970,8 +969,6 @@ void AtlasEngine::_recreateFontDependentResources() _r.strikethroughPos = _api.fontMetrics.strikethroughPos; _r.lineThickness = _api.fontMetrics.lineThickness; _r.dpi = _api.dpi; - _r.maxEncounteredCellCount = 0; - _r.scratchpadCellWidth = 0; } { // See AtlasEngine::UpdateFont. @@ -1446,7 +1443,6 @@ void AtlasEngine::_emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, si const auto it = _r.glyphs.insert(std::move(key), std::move(value)); valueRef = &it->second; _r.glyphQueue.emplace_back(&it->first, &it->second); - _r.maxEncounteredCellCount = std::max(_r.maxEncounteredCellCount, cellCount); } // For some reason MSVC doesn't understand that valueRef is overwritten in the branch above, resulting in: diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index d1c3ac6c3fe..94e95f6a2a7 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -857,11 +857,9 @@ namespace Microsoft::Console::Render void _setShaderResources() const; void _updateConstantBuffer() const noexcept; void _adjustAtlasSize(); - void _reserveScratchpadSize(u16 minWidth); void _processGlyphQueue(); void _drawGlyph(const AtlasQueueItem& item) const; void _drawCursor(); - void _copyScratchpadTile(uint32_t scratchpadIndex, u16x2 target, uint32_t copyFlags = 0) const noexcept; static constexpr bool debugGlyphGenerationPerformance = false; static constexpr bool debugGeneralPerformance = false || debugGlyphGenerationPerformance; @@ -906,7 +904,6 @@ namespace Microsoft::Console::Render // D2D resources wil::com_ptr atlasBuffer; wil::com_ptr atlasView; - wil::com_ptr atlasScratchpad; wil::com_ptr d2dRenderTarget; wil::com_ptr brush; wil::com_ptr textFormats[2][2]; @@ -921,8 +918,6 @@ namespace Microsoft::Console::Render u16 strikethroughPos = 0; u16 lineThickness = 0; u16 dpi = USER_DEFAULT_SCREEN_DPI; // invalidated by ApiInvalidations::Font, caches _api.dpi - u16 maxEncounteredCellCount = 0; - u16 scratchpadCellWidth = 0; u16x2 atlasSizeInPixel; // invalidated by ApiInvalidations::Font TileHashMap glyphs; TileAllocator tileAllocator; diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index da86488f758..4af0a4728a8 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -31,7 +31,6 @@ using namespace Microsoft::Console::Render; try { _adjustAtlasSize(); - _reserveScratchpadSize(_r.maxEncounteredCellCount); _processGlyphQueue(); if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::Cursor)) @@ -86,6 +85,7 @@ try } catch (const wil::ResultException& exception) { + // TODO: this writes to _api. return _handleException(exception); } CATCH_RETURN() @@ -159,7 +159,7 @@ void AtlasEngine::_adjustAtlasSize() desc.ArraySize = 1; desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; desc.SampleDesc = { 1, 0 }; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; THROW_IF_FAILED(_r.device->CreateTexture2D(&desc, nullptr, atlasBuffer.addressof())); THROW_IF_FAILED(_r.device->CreateShaderResourceView(atlasBuffer.get(), nullptr, atlasView.addressof())); } @@ -184,38 +184,8 @@ void AtlasEngine::_adjustAtlasSize() _r.atlasView = std::move(atlasView); _setShaderResources(); - WI_SetFlagIf(_r.invalidations, RenderInvalidations::Cursor, !copyFromExisting); -} - -void AtlasEngine::_reserveScratchpadSize(u16 minWidth) -{ - if (minWidth <= _r.scratchpadCellWidth) - { - return; - } - - // The new size is the greater of ... cells wide: - // * 2 - // * minWidth - // * current size * 1.5 - const auto newWidth = std::max(std::max(2, minWidth), _r.scratchpadCellWidth + (_r.scratchpadCellWidth >> 1)); - - _r.d2dRenderTarget.reset(); - _r.atlasScratchpad.reset(); - - { - D3D11_TEXTURE2D_DESC desc{}; - desc.Width = _r.cellSize.x * newWidth; - desc.Height = _r.cellSize.y; - desc.MipLevels = 1; - desc.ArraySize = 1; - desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; - desc.SampleDesc = { 1, 0 }; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; - THROW_IF_FAILED(_r.device->CreateTexture2D(&desc, nullptr, _r.atlasScratchpad.put())); - } { - const auto surface = _r.atlasScratchpad.query(); + const auto surface = _r.atlasBuffer.query(); wil::com_ptr renderingParams; DWrite_GetRenderParams(_sr.dwriteFactory.get(), &_r.gamma, &_r.cleartypeEnhancedContrast, &_r.grayscaleEnhancedContrast, renderingParams.addressof()); @@ -243,8 +213,8 @@ void AtlasEngine::_reserveScratchpadSize(u16 minWidth) _r.brush = brush.query(); } - _r.scratchpadCellWidth = _r.maxEncounteredCellCount; WI_SetAllFlags(_r.invalidations, RenderInvalidations::ConstBuffer); + WI_SetFlagIf(_r.invalidations, RenderInvalidations::Cursor, !copyFromExisting); } void AtlasEngine::_processGlyphQueue() @@ -254,10 +224,12 @@ void AtlasEngine::_processGlyphQueue() return; } + _r.d2dRenderTarget->BeginDraw(); for (const auto& pair : _r.glyphQueue) { _drawGlyph(pair); } + THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw()); _r.glyphQueue.clear(); } @@ -280,7 +252,7 @@ void AtlasEngine::_drawGlyph(const AtlasQueueItem& item) const textLayout->SetTypography(_r.typography.get(), { 0, charsLength }); } - auto options = D2D1_DRAW_TEXT_OPTIONS_NONE; + auto options = D2D1_DRAW_TEXT_OPTIONS_CLIP; // D2D1_DRAW_TEXT_OPTIONS_ENABLE_COLOR_FONT enables a bunch of internal machinery // which doesn't have to run if we know we can't use it anyways in the shader. WI_SetFlagIf(options, D2D1_DRAW_TEXT_OPTIONS_ENABLE_COLOR_FONT, coloredGlyph); @@ -294,31 +266,29 @@ void AtlasEngine::_drawGlyph(const AtlasQueueItem& item) const _r.d2dRenderTarget->SetTextAntialiasMode(coloredGlyph ? D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE : D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE); } - _r.d2dRenderTarget->BeginDraw(); - // We could call - // _r.d2dRenderTarget->PushAxisAlignedClip(&rect, D2D1_ANTIALIAS_MODE_ALIASED); - // now to reduce the surface that needs to be cleared, but this decreases - // performance by 10% (tested using debugGlyphGenerationPerformance). - _r.d2dRenderTarget->Clear(); - _r.d2dRenderTarget->DrawTextLayout({}, textLayout.get(), _r.brush.get(), options); - THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw()); - for (u32 i = 0; i < cells; ++i) { - // Specifying NO_OVERWRITE means that the system can assume that existing references to the surface that - // may be in flight on the GPU will not be affected by the update, so the copy can proceed immediately - // (avoiding either a batch flush or the system maintaining multiple copies of the resource behind the scenes). - // - // Since our shader only draws whatever is in the atlas, and since we don't replace glyph tiles that are in use, - // we can safely (?) tell the GPU that we don't overwrite parts of our atlas that are in use. - _copyScratchpadTile(i, coords[i], D3D11_COPY_NO_OVERWRITE); + const auto coord = coords[i]; + + D2D1_RECT_F rect; + rect.left = static_cast(coord.x) * static_cast(USER_DEFAULT_SCREEN_DPI) / static_cast(_r.dpi); + rect.top = static_cast(coord.y) * static_cast(USER_DEFAULT_SCREEN_DPI) / static_cast(_r.dpi); + rect.right = rect.left + _r.cellSizeDIP.x; + rect.bottom = rect.top + _r.cellSizeDIP.y; + + D2D1_POINT_2F origin; + origin.x = rect.left - i * _r.cellSizeDIP.x; + origin.y = rect.top; + + _r.d2dRenderTarget->PushAxisAlignedClip(&rect, D2D1_ANTIALIAS_MODE_ALIASED); + _r.d2dRenderTarget->Clear(); + _r.d2dRenderTarget->DrawTextLayout(origin, textLayout.get(), _r.brush.get(), options); + _r.d2dRenderTarget->PopAxisAlignedClip(); } } void AtlasEngine::_drawCursor() { - _reserveScratchpadSize(1); - // lineWidth is in D2D's DIPs. For instance if we have a 150-200% zoom scale we want to draw a 2px wide line. // At 150% scale lineWidth thus needs to be 1.33333... because at a zoom scale of 1.5 this results in a 2px wide line. const auto lineWidth = std::max(1.0f, static_cast((_r.dpi + USER_DEFAULT_SCREEN_DPI / 2) / USER_DEFAULT_SCREEN_DPI * USER_DEFAULT_SCREEN_DPI) / static_cast(_r.dpi)); @@ -377,19 +347,4 @@ void AtlasEngine::_drawCursor() } THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw()); - - _copyScratchpadTile(0, {}); -} - -void AtlasEngine::_copyScratchpadTile(uint32_t scratchpadIndex, u16x2 target, uint32_t copyFlags) const noexcept -{ - D3D11_BOX box; - box.left = scratchpadIndex * _r.cellSize.x; - box.top = 0; - box.front = 0; - box.right = box.left + _r.cellSize.x; - box.bottom = _r.cellSize.y; - box.back = 1; -#pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function '...' which may throw exceptions (f.6). - _r.deviceContext->CopySubresourceRegion1(_r.atlasBuffer.get(), 0, target.x, target.y, 0, _r.atlasScratchpad.get(), 0, &box, copyFlags); }