diff --git a/samples/PixelShaders/Retro.hlsl b/samples/PixelShaders/Retro.hlsl index cfefe04e7da..0073f2ca87c 100644 --- a/samples/PixelShaders/Retro.hlsl +++ b/samples/PixelShaders/Retro.hlsl @@ -2,47 +2,46 @@ Texture2D shaderTexture; SamplerState samplerState; -cbuffer PixelShaderSettings { - float Time; - float Scale; - float2 Resolution; - float4 Background; +cbuffer PixelShaderSettings +{ + float time; + float scale; + float2 resolution; + float4 background; }; -#define SCANLINE_FACTOR 0.5 -#define SCALED_SCANLINE_PERIOD Scale -#define SCALED_GAUSSIAN_SIGMA (2.0*Scale) +#define SCANLINE_FACTOR 0.5f +#define SCALED_SCANLINE_PERIOD scale +#define SCALED_GAUSSIAN_SIGMA (2.0f * scale) static const float M_PI = 3.14159265f; float Gaussian2D(float x, float y, float sigma) { - return 1/(sigma*sqrt(2*M_PI)) * exp(-0.5*(x*x + y*y)/sigma/sigma); + return 1 / (sigma * sqrt(2 * M_PI)) * exp(-0.5 * (x * x + y * y) / sigma / sigma); } float4 Blur(Texture2D input, float2 tex_coord, float sigma) { - uint width, height; + float width, height; shaderTexture.GetDimensions(width, height); - float texelWidth = 1.0f/width; - float texelHeight = 1.0f/height; + float texelWidth = 1.0f / width; + float texelHeight = 1.0f / height; float4 color = { 0, 0, 0, 0 }; - int sampleCount = 13; + float sampleCount = 13; - for (int x = 0; x < sampleCount; x++) + for (float x = 0; x < sampleCount; x++) { float2 samplePos = { 0, 0 }; + samplePos.x = tex_coord.x + (x - sampleCount / 2.0f) * texelWidth; - samplePos.x = tex_coord.x + (x - sampleCount/2) * texelWidth; - for (int y = 0; y < sampleCount; y++) + for (float y = 0; y < sampleCount; y++) { - samplePos.y = tex_coord.y + (y - sampleCount/2) * texelHeight; - if (samplePos.x <= 0 || samplePos.y <= 0 || samplePos.x >= width || samplePos.y >= height) continue; - - color += input.Sample(samplerState, samplePos) * Gaussian2D((x - sampleCount/2), (y - sampleCount/2), sigma); + samplePos.y = tex_coord.y + (y - sampleCount / 2.0f) * texelHeight; + color += input.Sample(samplerState, samplePos) * Gaussian2D(x - sampleCount / 2.0f, y - sampleCount / 2.0f, sigma); } } @@ -51,7 +50,7 @@ float4 Blur(Texture2D input, float2 tex_coord, float sigma) float SquareWave(float y) { - return 1 - (floor(y / SCALED_SCANLINE_PERIOD) % 2) * SCANLINE_FACTOR; + return 1.0f - (floor(y / SCALED_SCANLINE_PERIOD) % 2.0f) * SCANLINE_FACTOR; } float4 Scanline(float4 color, float4 pos) @@ -60,9 +59,9 @@ float4 Scanline(float4 color, float4 pos) // TODO:GH#3929 make this configurable. // Remove the && false to draw scanlines everywhere. - if (length(color.rgb) < 0.2 && false) + if (length(color.rgb) < 0.2f && false) { - return color + wave*0.1; + return color + wave * 0.1f; } else { @@ -70,14 +69,14 @@ float4 Scanline(float4 color, float4 pos) } } +// clang-format off float4 main(float4 pos : SV_POSITION, float2 tex : TEXCOORD) : SV_TARGET +// clang-format on { - Texture2D input = shaderTexture; - // TODO:GH#3930 Make these configurable in some way. - float4 color = input.Sample(samplerState, tex); - color += Blur(input, tex, SCALED_GAUSSIAN_SIGMA)*0.3; + float4 color = shaderTexture.Sample(samplerState, tex); + color += Blur(shaderTexture, tex, SCALED_GAUSSIAN_SIGMA) * 0.3f; color = Scanline(color, pos); return color; -} \ No newline at end of file +} diff --git a/src/cascadia/TerminalControl/ControlCore.cpp b/src/cascadia/TerminalControl/ControlCore.cpp index 37ecdedc9c5..89ab6fe774f 100644 --- a/src/cascadia/TerminalControl/ControlCore.cpp +++ b/src/cascadia/TerminalControl/ControlCore.cpp @@ -313,7 +313,7 @@ namespace winrt::Microsoft::Terminal::Control::implementation // Tell the DX Engine to notify us when the swap chain changes. // We do this after we initially set the swapchain so as to avoid unnecessary callbacks (and locking problems) - _renderEngine->SetCallback(std::bind(&ControlCore::_renderEngineSwapChainChanged, this)); + _renderEngine->SetCallback([this](auto handle) { _renderEngineSwapChainChanged(handle); }); _renderEngine->SetRetroTerminalEffect(_settings->RetroTerminalEffect()); _renderEngine->SetPixelShaderPath(_settings->PixelShaderPath()); @@ -566,24 +566,20 @@ namespace winrt::Microsoft::Terminal::Control::implementation void ControlCore::ToggleShaderEffects() { + const auto path = _settings->PixelShaderPath(); auto lock = _terminal->LockForWriting(); // Originally, this action could be used to enable the retro effects // even when they're set to `false` in the settings. If the user didn't // specify a custom pixel shader, manually enable the legacy retro // effect first. This will ensure that a toggle off->on will still work, // even if they currently have retro effect off. - if (_settings->PixelShaderPath().empty() && !_renderEngine->GetRetroTerminalEffect()) + if (path.empty()) { - // SetRetroTerminalEffect to true will enable the effect. In this - // case, the shader effect will already be disabled (because neither - // a pixel shader nor the retro effects were originally requested). - // So we _don't_ want to toggle it again below, because that would - // toggle it back off. - _renderEngine->SetRetroTerminalEffect(true); + _renderEngine->SetRetroTerminalEffect(!_renderEngine->GetRetroTerminalEffect()); } else { - _renderEngine->ToggleShaderEffects(); + _renderEngine->SetPixelShaderPath(_renderEngine->GetPixelShaderPath().empty() ? std::wstring_view{ path } : std::wstring_view{}); } // Always redraw after toggling effects. This way even if the control // does not have focus it will update immediately. @@ -1517,25 +1513,14 @@ namespace winrt::Microsoft::Terminal::Control::implementation } } - uint64_t ControlCore::SwapChainHandle() const - { - // This is called by: - // * TermControl::RenderEngineSwapChainChanged, who is only registered - // after Core::Initialize() is called. - // * TermControl::_InitializeTerminal, after the call to Initialize, for - // _AttachDxgiSwapChainToXaml. - // In both cases, we'll have a _renderEngine by then. - return reinterpret_cast(_renderEngine->GetSwapChainHandle()); - } - void ControlCore::_rendererWarning(const HRESULT hr) { _RendererWarningHandlers(*this, winrt::make(hr)); } - void ControlCore::_renderEngineSwapChainChanged() + void ControlCore::_renderEngineSwapChainChanged(const HANDLE handle) { - _SwapChainChangedHandlers(*this, nullptr); + _SwapChainChangedHandlers(*this, winrt::box_value(reinterpret_cast(handle))); } void ControlCore::_rendererBackgroundColorChanged() diff --git a/src/cascadia/TerminalControl/ControlCore.h b/src/cascadia/TerminalControl/ControlCore.h index 2a3dcd34406..9c265d335f5 100644 --- a/src/cascadia/TerminalControl/ControlCore.h +++ b/src/cascadia/TerminalControl/ControlCore.h @@ -65,7 +65,6 @@ namespace winrt::Microsoft::Terminal::Control::implementation void SizeChanged(const double width, const double height); void ScaleChanged(const double scale); - uint64_t SwapChainHandle() const; void AdjustFontSize(int fontSizeDelta); void ResetFontSize(); @@ -301,7 +300,7 @@ namespace winrt::Microsoft::Terminal::Control::implementation #pragma region RendererCallbacks void _rendererWarning(const HRESULT hr); - void _renderEngineSwapChainChanged(); + void _renderEngineSwapChainChanged(const HANDLE handle); void _rendererBackgroundColorChanged(); void _rendererTabColorChanged(); #pragma endregion diff --git a/src/cascadia/TerminalControl/ControlCore.idl b/src/cascadia/TerminalControl/ControlCore.idl index 2ed932dda8e..6ac1c6f6fe6 100644 --- a/src/cascadia/TerminalControl/ControlCore.idl +++ b/src/cascadia/TerminalControl/ControlCore.idl @@ -67,8 +67,6 @@ namespace Microsoft.Terminal.Control IControlAppearance UnfocusedAppearance { get; }; Boolean HasUnfocusedAppearance(); - UInt64 SwapChainHandle { get; }; - Windows.Foundation.Size FontSize { get; }; String FontFaceName { get; }; UInt16 FontWeight { get; }; diff --git a/src/cascadia/TerminalControl/TermControl.cpp b/src/cascadia/TerminalControl/TermControl.cpp index a5b8ef77055..3cd4e0e9eec 100644 --- a/src/cascadia/TerminalControl/TermControl.cpp +++ b/src/cascadia/TerminalControl/TermControl.cpp @@ -703,19 +703,24 @@ namespace winrt::Microsoft::Terminal::Control::implementation return _core.ConnectionState(); } - winrt::fire_and_forget TermControl::RenderEngineSwapChainChanged(IInspectable /*sender*/, IInspectable /*args*/) + winrt::fire_and_forget TermControl::RenderEngineSwapChainChanged(IInspectable /*sender*/, IInspectable args) { // This event is only registered during terminal initialization, // so we don't need to check _initializedTerminal. - // We also don't lock for things that come back from the renderer. - auto weakThis{ get_weak() }; + const auto weakThis{ get_weak() }; + + // Create a copy of the swap chain HANDLE in args, since we don't own that parameter. + // By the time we return from the co_await below, it might be deleted already. + winrt::handle handle; + const auto processHandle = GetCurrentProcess(); + const auto sourceHandle = reinterpret_cast(winrt::unbox_value(args)); + THROW_IF_WIN32_BOOL_FALSE(DuplicateHandle(processHandle, sourceHandle, processHandle, handle.put(), 0, FALSE, DUPLICATE_SAME_ACCESS)); co_await wil::resume_foreground(Dispatcher()); if (auto control{ weakThis.get() }) { - const auto chainHandle = reinterpret_cast(control->_core.SwapChainHandle()); - _AttachDxgiSwapChainToXaml(chainHandle); + _AttachDxgiSwapChainToXaml(handle.get()); } } @@ -802,21 +807,7 @@ namespace winrt::Microsoft::Terminal::Control::implementation } _interactivity.Initialize(); - _AttachDxgiSwapChainToXaml(reinterpret_cast(_core.SwapChainHandle())); - - // Tell the DX Engine to notify us when the swap chain changes. We do - // this after we initially set the swapchain so as to avoid unnecessary - // callbacks (and locking problems) _core.SwapChainChanged({ get_weak(), &TermControl::RenderEngineSwapChainChanged }); - - // !! LOAD BEARING !! - // Make sure you enable painting _AFTER_ calling _AttachDxgiSwapChainToXaml - // - // If you EnablePainting first, then you almost certainly won't have any - // problems when running in Debug. However, in Release, you'll run into - // issues where the Renderer starts trying to paint before we've - // actually attached the swapchain to anything, and the DxEngine is not - // prepared to handle that. _core.EnablePainting(); auto bufferHeight = _core.BufferHeight(); diff --git a/src/renderer/atlas/AtlasEngine.api.cpp b/src/renderer/atlas/AtlasEngine.api.cpp index be5630d9fd4..8861704b5f6 100644 --- a/src/renderer/atlas/AtlasEngine.api.cpp +++ b/src/renderer/atlas/AtlasEngine.api.cpp @@ -16,6 +16,7 @@ // Disable a bunch of warnings which get in the way of writing performant code. #pragma warning(disable : 26429) // Symbol 'data' is never tested for nullness, it can be marked as not_null (f.23). #pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). +#pragma warning(disable : 26459) // You called an STL function '...' with a raw pointer parameter at position '...' that may be unsafe [...]. #pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). #pragma warning(disable : 26482) // Only index into arrays using constant expressions (bounds.2). @@ -272,7 +273,7 @@ CATCH_RETURN() DWRITE_TEXT_METRICS metrics; RETURN_IF_FAILED(textLayout->GetMetrics(&metrics)); - *pResult = static_cast(std::ceil(metrics.width)) > _api.fontMetrics.cellSize.x; + *pResult = static_cast(std::ceilf(metrics.width)) > _api.fontMetrics.cellSize.x; return S_OK; } @@ -290,25 +291,19 @@ HRESULT AtlasEngine::Enable() noexcept return S_OK; } -[[nodiscard]] bool AtlasEngine::GetRetroTerminalEffect() const noexcept +[[nodiscard]] std::wstring_view AtlasEngine::GetPixelShaderPath() noexcept { - return false; + return _api.customPixelShaderPath; } -[[nodiscard]] float AtlasEngine::GetScaling() const noexcept +[[nodiscard]] bool AtlasEngine::GetRetroTerminalEffect() const noexcept { - return static_cast(_api.dpi) / static_cast(USER_DEFAULT_SCREEN_DPI); + return _api.useRetroTerminalEffect; } -[[nodiscard]] HANDLE AtlasEngine::GetSwapChainHandle() +[[nodiscard]] float AtlasEngine::GetScaling() const noexcept { - if (WI_IsFlagSet(_api.invalidations, ApiInvalidations::Device)) - { - _createResources(); - WI_ClearFlag(_api.invalidations, ApiInvalidations::Device); - } - - return _api.swapChainHandle.get(); + return static_cast(_api.dpi) / static_cast(USER_DEFAULT_SCREEN_DPI); } [[nodiscard]] Microsoft::Console::Types::Viewport AtlasEngine::GetViewportInCharacters(const Types::Viewport& viewInPixels) const noexcept @@ -331,23 +326,22 @@ void AtlasEngine::SetAntialiasingMode(const D2D1_TEXT_ANTIALIAS_MODE antialiasin if (_api.antialiasingMode != mode) { _api.antialiasingMode = mode; - _resolveAntialiasingMode(); + _resolveTransparencySettings(); WI_SetFlag(_api.invalidations, ApiInvalidations::Font); } } -void AtlasEngine::SetCallback(std::function pfn) noexcept +void AtlasEngine::SetCallback(std::function pfn) noexcept { _api.swapChainChangedCallback = std::move(pfn); } void AtlasEngine::EnableTransparentBackground(const bool isTransparent) noexcept { - const auto mixin = !isTransparent ? 0xff000000 : 0x00000000; - if (_api.backgroundOpaqueMixin != mixin) + if (_api.enableTransparentBackground != isTransparent) { - _api.backgroundOpaqueMixin = mixin; - _resolveAntialiasingMode(); + _api.enableTransparentBackground = isTransparent; + _resolveTransparencySettings(); WI_SetFlag(_api.invalidations, ApiInvalidations::SwapChain); } } @@ -368,10 +362,22 @@ void AtlasEngine::SetForceFullRepaintRendering(bool enable) noexcept void AtlasEngine::SetPixelShaderPath(std::wstring_view value) noexcept { + if (_api.customPixelShaderPath != value) + { + _api.customPixelShaderPath = value; + _resolveTransparencySettings(); + WI_SetFlag(_api.invalidations, ApiInvalidations::Device); + } } void AtlasEngine::SetRetroTerminalEffect(bool enable) noexcept { + if (_api.useRetroTerminalEffect != enable) + { + _api.useRetroTerminalEffect = enable; + _resolveTransparencySettings(); + WI_SetFlag(_api.invalidations, ApiInvalidations::Device); + } } void AtlasEngine::SetSelectionBackground(const COLORREF color, const float alpha) noexcept @@ -386,6 +392,11 @@ void AtlasEngine::SetSelectionBackground(const COLORREF color, const float alpha void AtlasEngine::SetSoftwareRendering(bool enable) noexcept { + if (_api.useSoftwareRendering != enable) + { + _api.useSoftwareRendering = enable; + WI_SetFlag(_api.invalidations, ApiInvalidations::Device); + } } void AtlasEngine::SetWarningCallback(std::function pfn) noexcept @@ -411,10 +422,6 @@ void AtlasEngine::SetWarningCallback(std::function pfn) noexcept return S_OK; } -void AtlasEngine::ToggleShaderEffects() noexcept -{ -} - [[nodiscard]] HRESULT AtlasEngine::UpdateFont(const FontInfoDesired& fontInfoDesired, FontInfo& fontInfo, const std::unordered_map& features, const std::unordered_map& axes) noexcept { static constexpr std::array fallbackFaceNames{ static_cast(nullptr), L"Consolas", L"Lucida Console", L"Courier New" }; @@ -450,13 +457,15 @@ void AtlasEngine::UpdateHyperlinkHoveredId(const uint16_t hoveredId) noexcept #pragma endregion -void AtlasEngine::_resolveAntialiasingMode() noexcept +void AtlasEngine::_resolveTransparencySettings() noexcept { // If the user asks for ClearType, but also for a transparent background // (which our ClearType shader doesn't simultaneously support) // then we need to sneakily force the renderer to grayscale AA. - const auto forceGrayscaleAA = _api.antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE && !_api.backgroundOpaqueMixin; - _api.realizedAntialiasingMode = forceGrayscaleAA ? D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE : _api.antialiasingMode; + _api.realizedAntialiasingMode = _api.enableTransparentBackground && _api.antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE : _api.antialiasingMode; + // An opaque background allows us to use true "independent" flips. See AtlasEngine::_createSwapChain(). + // We can't enable them if custom shaders are specified, because it's unknown, whether they support opaque inputs. + _api.backgroundOpaqueMixin = _api.enableTransparentBackground || !_api.customPixelShaderPath.empty() || _api.useRetroTerminalEffect ? 0x00000000 : 0xff000000; } void AtlasEngine::_updateFont(const wchar_t* faceName, const FontInfoDesired& fontInfoDesired, FontInfo& fontInfo, const std::unordered_map& features, const std::unordered_map& axes) @@ -605,18 +614,53 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo // Point sizes are commonly treated at a 72 DPI scale // (including by OpenType), whereas DirectWrite uses 96 DPI. // Since we want the height in px we multiply by the display's DPI. - const auto fontSizeInPx = std::ceil(requestedSize.Y / 72.0 * _api.dpi); - - const auto designUnitsPerPx = fontSizeInPx / static_cast(metrics.designUnitsPerEm); - const auto ascentInPx = static_cast(metrics.ascent) * designUnitsPerPx; - const auto descentInPx = static_cast(metrics.descent) * designUnitsPerPx; - const auto lineGapInPx = static_cast(metrics.lineGap) * designUnitsPerPx; - const auto advanceWidthInPx = static_cast(glyphMetrics.advanceWidth) * designUnitsPerPx; - - const auto halfGapInPx = lineGapInPx / 2.0; - const auto baseline = std::ceil(ascentInPx + halfGapInPx); - const auto cellWidth = gsl::narrow(std::ceil(advanceWidthInPx)); - const auto cellHeight = gsl::narrow(std::ceil(baseline + descentInPx + halfGapInPx)); + const auto fontSizeInDIP = requestedSize.Y / 72.0f * 96.0f; + const auto fontSizeInPx = requestedSize.Y / 72.0f * _api.dpi; + + const auto designUnitsPerPx = fontSizeInPx / static_cast(metrics.designUnitsPerEm); + const auto ascent = static_cast(metrics.ascent) * designUnitsPerPx; + const auto descent = static_cast(metrics.descent) * designUnitsPerPx; + const auto lineGap = static_cast(metrics.lineGap) * designUnitsPerPx; + const auto underlinePosition = static_cast(-metrics.underlinePosition) * designUnitsPerPx; + const auto underlineThickness = static_cast(metrics.underlineThickness) * designUnitsPerPx; + const auto strikethroughPosition = static_cast(-metrics.strikethroughPosition) * designUnitsPerPx; + const auto strikethroughThickness = static_cast(metrics.strikethroughThickness) * designUnitsPerPx; + + const auto advanceWidth = static_cast(glyphMetrics.advanceWidth) * designUnitsPerPx; + + const auto halfGap = lineGap / 2.0f; + const auto baseline = std::roundf(ascent + halfGap); + const auto lineHeight = std::roundf(baseline + descent + halfGap); + const auto underlinePos = std::roundf(baseline + underlinePosition); + const auto underlineWidth = std::max(1.0f, std::roundf(underlineThickness)); + const auto strikethroughPos = std::roundf(baseline + strikethroughPosition); + const auto strikethroughWidth = std::max(1.0f, std::roundf(strikethroughThickness)); + const auto thinLineWidth = std::max(1.0f, std::roundf(underlineThickness / 2.0f)); + + // For double underlines we loosely follow what Word does: + // 1. The lines are half the width of an underline (= thinLineWidth) + // 2. Ideally the bottom line is aligned with the bottom of the underline + // 3. The top underline is vertically in the middle between baseline and ideal bottom underline + // 4. If the top line gets too close to the baseline the underlines are shifted downwards + // 5. The minimum gap between the two lines appears to be similar to Tex (1.2pt) + // (Additional notes below.) + + // 2. + auto doubleUnderlinePosBottom = underlinePos + underlineWidth - thinLineWidth; + // 3. Since we don't align the center of our two lines, but rather the top borders + // we need to subtract half a line width from our center point. + auto doubleUnderlinePosTop = std::roundf((baseline + doubleUnderlinePosBottom - thinLineWidth) / 2.0f); + // 4. + doubleUnderlinePosTop = std::max(doubleUnderlinePosTop, baseline + thinLineWidth); + // 5. The gap is only the distance _between_ the lines, but we need the distance from the + // top border of the top and bottom lines, which includes an additional line width. + const auto doubleUnderlineGap = std::max(1.0f, std::roundf(1.2f / 72.0f * _api.dpi)); + doubleUnderlinePosBottom = std::max(doubleUnderlinePosBottom, doubleUnderlinePosTop + doubleUnderlineGap + thinLineWidth); + // Our cells can't overlap each other so we additionally clamp the bottom line to be inside the cell boundaries. + doubleUnderlinePosBottom = std::min(doubleUnderlinePosBottom, lineHeight - thinLineWidth); + + const auto cellWidth = gsl::narrow(std::roundf(advanceWidth)); + const auto cellHeight = gsl::narrow(lineHeight); { til::size coordSize; @@ -637,28 +681,31 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo if (fontMetrics) { - const auto underlineOffsetInPx = static_cast(-metrics.underlinePosition) * designUnitsPerPx; - const auto underlineThicknessInPx = static_cast(metrics.underlineThickness) * designUnitsPerPx; - const auto strikethroughOffsetInPx = static_cast(-metrics.strikethroughPosition) * designUnitsPerPx; - const auto strikethroughThicknessInPx = static_cast(metrics.strikethroughThickness) * designUnitsPerPx; - const auto lineThickness = gsl::narrow(std::round(std::min(underlineThicknessInPx, strikethroughThicknessInPx))); - const auto underlinePos = gsl::narrow(std::ceil(baseline + underlineOffsetInPx - lineThickness / 2.0)); - const auto strikethroughPos = gsl::narrow(std::round(baseline + strikethroughOffsetInPx - lineThickness / 2.0)); - - auto fontName = wil::make_process_heap_string(requestedFaceName); - const auto fontWeight = gsl::narrow(requestedWeight); + std::wstring fontName{ requestedFaceName }; + const auto fontWeightU16 = gsl::narrow_cast(requestedWeight); + const auto underlinePosU16 = gsl::narrow_cast(underlinePos); + const auto underlineWidthU16 = gsl::narrow_cast(underlineWidth); + const auto strikethroughPosU16 = gsl::narrow_cast(strikethroughPos); + const auto strikethroughWidthU16 = gsl::narrow_cast(strikethroughWidth); + const auto doubleUnderlinePosTopU16 = gsl::narrow_cast(doubleUnderlinePosTop); + const auto doubleUnderlinePosBottomU16 = gsl::narrow_cast(doubleUnderlinePosBottom); + const auto thinLineWidthU16 = gsl::narrow_cast(thinLineWidth); // NOTE: From this point onward no early returns or throwing code should exist, // as we might cause _api to be in an inconsistent state otherwise. fontMetrics->fontCollection = std::move(fontCollection); fontMetrics->fontName = std::move(fontName); - fontMetrics->fontSizeInDIP = static_cast(fontSizeInPx / static_cast(_api.dpi) * 96.0); - fontMetrics->baselineInDIP = static_cast(baseline / static_cast(_api.dpi) * 96.0); + fontMetrics->fontSizeInDIP = fontSizeInDIP; + fontMetrics->baselineInDIP = baseline / static_cast(_api.dpi) * 96.0f; + fontMetrics->advanceScale = cellWidth / advanceWidth; fontMetrics->cellSize = { cellWidth, cellHeight }; - fontMetrics->fontWeight = fontWeight; - fontMetrics->underlinePos = underlinePos; - fontMetrics->strikethroughPos = strikethroughPos; - fontMetrics->lineThickness = lineThickness; + fontMetrics->fontWeight = fontWeightU16; + fontMetrics->underlinePos = underlinePosU16; + fontMetrics->underlineWidth = underlineWidthU16; + fontMetrics->strikethroughPos = strikethroughPosU16; + fontMetrics->strikethroughWidth = strikethroughWidthU16; + fontMetrics->doubleUnderlinePos = { doubleUnderlinePosTopU16, doubleUnderlinePosBottomU16 }; + fontMetrics->thinLineWidth = thinLineWidthU16; } } diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 6357b3b52ec..91046f34d43 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -4,10 +4,11 @@ #include "pch.h" #include "AtlasEngine.h" +#include +#include #include #include -#include "../base/FontCache.h" #include "../../interactivity/win32/CustomWindowMessages.h" // #### NOTE #### @@ -20,153 +21,12 @@ // Disable a bunch of warnings which get in the way of writing performant code. #pragma warning(disable : 26429) // Symbol 'data' is never tested for nullness, it can be marked as not_null (f.23). #pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). +#pragma warning(disable : 26459) // You called an STL function '...' with a raw pointer parameter at position '...' that may be unsafe [...]. #pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). #pragma warning(disable : 26482) // Only index into arrays using constant expressions (bounds.2). using namespace Microsoft::Console::Render; -#pragma warning(push) -#pragma warning(disable : 26447) // The function is declared 'noexcept' but calls function 'operator()()' which may throw exceptions (f.6). -__declspec(noinline) static void showOOMWarning() noexcept -{ - [[maybe_unused]] static const auto once = []() { - std::thread t{ []() noexcept { - MessageBoxW(nullptr, L"This application is using a highly experimental text rendering engine and has run out of memory. Text rendering will start to behave irrationally and you should restart this process.", L"Out Of Memory", MB_ICONERROR | MB_OK); - } }; - t.detach(); - return false; - }(); -} -#pragma warning(pop) - -struct TextAnalyzer final : IDWriteTextAnalysisSource, IDWriteTextAnalysisSink -{ - constexpr TextAnalyzer(const std::vector& text, std::vector& results) noexcept : - _text{ text }, _results{ results } - { - Ensures(_text.size() <= UINT32_MAX); - } - - // TextAnalyzer will be allocated on the stack and reference counting is pointless because of that. - // The debug version will assert that we don't leak any references though. -#ifdef NDEBUG - ULONG __stdcall AddRef() noexcept override - { - return 1; - } - - ULONG __stdcall Release() noexcept override - { - return 1; - } -#else - ULONG _refCount = 1; - - ~TextAnalyzer() - { - assert(_refCount == 1); - } - - ULONG __stdcall AddRef() noexcept override - { - return ++_refCount; - } - - ULONG __stdcall Release() noexcept override - { - return --_refCount; - } -#endif - - HRESULT __stdcall QueryInterface(const IID& riid, void** ppvObject) noexcept override - { - __assume(ppvObject != nullptr); - - if (IsEqualGUID(riid, __uuidof(IDWriteTextAnalysisSource)) || IsEqualGUID(riid, __uuidof(IDWriteTextAnalysisSink))) - { - *ppvObject = this; - return S_OK; - } - - *ppvObject = nullptr; - return E_NOINTERFACE; - } - - HRESULT __stdcall GetTextAtPosition(UINT32 textPosition, const WCHAR** textString, UINT32* textLength) noexcept override - { - // Writing to address 0 is a crash in practice. Just what we want. - __assume(textString != nullptr); - __assume(textLength != nullptr); - - const auto size = gsl::narrow_cast(_text.size()); - textPosition = std::min(textPosition, size); - *textString = _text.data() + textPosition; - *textLength = size - textPosition; - return S_OK; - } - - HRESULT __stdcall GetTextBeforePosition(UINT32 textPosition, const WCHAR** textString, UINT32* textLength) noexcept override - { - // Writing to address 0 is a crash in practice. Just what we want. - __assume(textString != nullptr); - __assume(textLength != nullptr); - - const auto size = gsl::narrow_cast(_text.size()); - textPosition = std::min(textPosition, size); - *textString = _text.data(); - *textLength = textPosition; - return S_OK; - } - - DWRITE_READING_DIRECTION __stdcall GetParagraphReadingDirection() noexcept override - { - return DWRITE_READING_DIRECTION_LEFT_TO_RIGHT; - } - - HRESULT __stdcall GetLocaleName(UINT32 textPosition, UINT32* textLength, const WCHAR** localeName) noexcept override - { - // Writing to address 0 is a crash in practice. Just what we want. - __assume(textLength != nullptr); - __assume(localeName != nullptr); - - *textLength = gsl::narrow_cast(_text.size()) - textPosition; - *localeName = nullptr; - return S_OK; - } - - HRESULT __stdcall GetNumberSubstitution(UINT32 textPosition, UINT32* textLength, IDWriteNumberSubstitution** numberSubstitution) noexcept override - { - return E_NOTIMPL; - } - - HRESULT __stdcall SetScriptAnalysis(UINT32 textPosition, UINT32 textLength, const DWRITE_SCRIPT_ANALYSIS* scriptAnalysis) noexcept override - try - { - _results.emplace_back(AtlasEngine::TextAnalyzerResult{ textPosition, textLength, scriptAnalysis->script, static_cast(scriptAnalysis->shapes), 0 }); - return S_OK; - } - CATCH_RETURN() - - HRESULT __stdcall SetLineBreakpoints(UINT32 textPosition, UINT32 textLength, const DWRITE_LINE_BREAKPOINT* lineBreakpoints) noexcept override - { - return E_NOTIMPL; - } - - HRESULT __stdcall SetBidiLevel(UINT32 textPosition, UINT32 textLength, UINT8 explicitLevel, UINT8 resolvedLevel) noexcept override - { - return E_NOTIMPL; - } - - HRESULT __stdcall SetNumberSubstitution(UINT32 textPosition, UINT32 textLength, IDWriteNumberSubstitution* numberSubstitution) noexcept override - { - return E_NOTIMPL; - } - -private: - const std::vector& _text; - std::vector& _results; -}; - #pragma warning(suppress : 26455) // Default constructor may not throw. Declare it 'noexcept' (f.6). AtlasEngine::AtlasEngine() { @@ -252,6 +112,7 @@ try { _r.selectionColor = _api.selectionColor; WI_SetFlag(_r.invalidations, RenderInvalidations::ConstBuffer); + WI_ClearFlag(_api.invalidations, ApiInvalidations::Settings); } // Equivalent to InvalidateAll(). @@ -302,75 +163,125 @@ try } #endif - if (_api.invalidatedRows == invalidatedRowsAll) + if constexpr (debugGlyphGenerationPerformance) + { + _r.glyphs = {}; + _r.tileAllocator = TileAllocator{ _api.fontMetrics.cellSize, _api.sizeInPixel }; + } + if constexpr (debugTextParsingPerformance) { - // Skip all the partial updates, since we redraw everything anyways. - _api.invalidatedCursorArea = invalidatedAreaNone; - _api.invalidatedRows = { 0, _api.cellCount.y }; + _api.invalidatedRows = invalidatedRowsAll; _api.scrollOffset = 0; } - else + + // Clamp invalidation rects into valid value ranges. + { + _api.invalidatedCursorArea.left = std::min(_api.invalidatedCursorArea.left, _api.cellCount.x); + _api.invalidatedCursorArea.top = std::min(_api.invalidatedCursorArea.top, _api.cellCount.y); + _api.invalidatedCursorArea.right = clamp(_api.invalidatedCursorArea.right, _api.invalidatedCursorArea.left, _api.cellCount.x); + _api.invalidatedCursorArea.bottom = clamp(_api.invalidatedCursorArea.bottom, _api.invalidatedCursorArea.top, _api.cellCount.y); + } { - // Clamp invalidation rects into valid value ranges. - { - _api.invalidatedCursorArea.left = std::min(_api.invalidatedCursorArea.left, _api.cellCount.x); - _api.invalidatedCursorArea.top = std::min(_api.invalidatedCursorArea.top, _api.cellCount.y); - _api.invalidatedCursorArea.right = clamp(_api.invalidatedCursorArea.right, _api.invalidatedCursorArea.left, _api.cellCount.x); - _api.invalidatedCursorArea.bottom = clamp(_api.invalidatedCursorArea.bottom, _api.invalidatedCursorArea.top, _api.cellCount.y); - } - { - _api.invalidatedRows.x = std::min(_api.invalidatedRows.x, _api.cellCount.y); - _api.invalidatedRows.y = clamp(_api.invalidatedRows.y, _api.invalidatedRows.x, _api.cellCount.y); - } + _api.invalidatedRows.x = std::min(_api.invalidatedRows.x, _api.cellCount.y); + _api.invalidatedRows.y = clamp(_api.invalidatedRows.y, _api.invalidatedRows.x, _api.cellCount.y); + } + { + const auto limit = gsl::narrow_cast(_api.cellCount.y & 0x7fff); + _api.scrollOffset = gsl::narrow_cast(clamp(_api.scrollOffset, -limit, limit)); + } + + // Scroll the buffer by the given offset and mark the newly uncovered rows as "invalid". + if (_api.scrollOffset != 0) + { + const auto nothingInvalid = _api.invalidatedRows.x == _api.invalidatedRows.y; + const auto offset = static_cast(_api.scrollOffset) * _api.cellCount.x; + + if (_api.scrollOffset < 0) { - const auto limit = gsl::narrow_cast(_api.cellCount.y & 0x7fff); - _api.scrollOffset = gsl::narrow_cast(clamp(_api.scrollOffset, -limit, limit)); + // Scroll up (for instance when new text is being written at the end of the buffer). + const u16 endRow = _api.cellCount.y + _api.scrollOffset; + _api.invalidatedRows.x = nothingInvalid ? endRow : std::min(_api.invalidatedRows.x, endRow); + _api.invalidatedRows.y = _api.cellCount.y; + + // scrollOffset/offset = -1 + // +----------+ +----------+ + // | | | xxxxxxxxx| + dst < beg + // | xxxxxxxxx| -> |xxxxxxx | + src | < beg - offset + // |xxxxxxx | | | | v + // +----------+ +----------+ v < end + { + const auto beg = _r.cells.begin(); + const auto end = _r.cells.end(); + std::move(beg - offset, end, beg); + } + { + const auto beg = _r.cellGlyphMapping.begin(); + const auto end = _r.cellGlyphMapping.end(); + std::move(beg - offset, end, beg); + } } - - // Scroll the buffer by the given offset and mark the newly uncovered rows as "invalid". - if (_api.scrollOffset != 0) + else { - const auto nothingInvalid = _api.invalidatedRows.x == _api.invalidatedRows.y; - const auto offset = static_cast(_api.scrollOffset) * _api.cellCount.x; - const auto data = _r.cells.data(); - auto count = _r.cells.size(); -#pragma warning(suppress : 26494) // Variable 'dst' is uninitialized. Always initialize an object (type.5). - Cell* dst; -#pragma warning(suppress : 26494) // Variable 'src' is uninitialized. Always initialize an object (type.5). - Cell* src; - - if (_api.scrollOffset < 0) + // Scroll down. + _api.invalidatedRows.x = 0; + _api.invalidatedRows.y = nothingInvalid ? _api.scrollOffset : std::max(_api.invalidatedRows.y, _api.scrollOffset); + + // scrollOffset/offset = 1 + // +----------+ +----------+ + // | xxxxxxxxx| | | + src < beg + // |xxxxxxx | -> | xxxxxxxxx| | ^ + // | | |xxxxxxx | v | < end - offset + // +----------+ +----------+ + dst < end { - // Scroll up (for instance when new text is being written at the end of the buffer). - dst = data; - src = data - offset; - count += offset; - - const u16 endRow = _api.cellCount.y + _api.scrollOffset; - _api.invalidatedRows.x = nothingInvalid ? endRow : std::min(_api.invalidatedRows.x, endRow); - _api.invalidatedRows.y = _api.cellCount.y; + const auto beg = _r.cells.begin(); + const auto end = _r.cells.end(); + std::move_backward(beg, end - offset, end); } - else { - // Scroll down. - dst = data + offset; - src = data; - count -= offset; - - _api.invalidatedRows.x = 0; - _api.invalidatedRows.y = nothingInvalid ? _api.scrollOffset : std::max(_api.invalidatedRows.y, _api.scrollOffset); + const auto beg = _r.cellGlyphMapping.begin(); + const auto end = _r.cellGlyphMapping.end(); + std::move_backward(beg, end - offset, end); } - - memmove(dst, src, count * sizeof(Cell)); } } - _api.dirtyRect = til::rect{ - 0, - _api.invalidatedRows.x, - _api.cellCount.x, - _api.invalidatedRows.y, - }; + _api.dirtyRect = til::rect{ 0, _api.invalidatedRows.x, _api.cellCount.x, _api.invalidatedRows.y }; + _r.dirtyRect = _api.dirtyRect; + _r.scrollOffset = _api.scrollOffset; + + // This is an important block of code for our TileHashMap. + // We only process glyphs within the dirtyRect, but glyphs outside of the + // dirtyRect are still in use and shouldn't be discarded. This is critical + // if someone uses a tool like tmux to split the terminal horizontally. + // If they then print a lot of Unicode text on just one side, we have to + // ensure that the (for example) plain ASCII glyphs on the other half of the + // viewport are still retained. This bit of code "refreshes" those glyphs and + // brings them to the front of the LRU queue to prevent them from being reused. + { + const std::array ranges{ { + { 0, _api.dirtyRect.top }, + { _api.dirtyRect.bottom, _api.cellCount.y }, + } }; + const auto stride = static_cast(_r.cellCount.x); + + for (const auto& p : ranges) + { + // We (ab)use the .x/.y members of the til::point as the + // respective [from,to) range of rows we need to makeNewest(). + const auto from = p.x; + const auto to = p.y; + + for (auto y = from; y < to; ++y) + { + auto it = _r.cellGlyphMapping.data() + stride * y; + const auto end = it + stride; + for (; it != end; ++it) + { + _r.glyphs.makeNewest(*it); + } + } + } + } return S_OK; } @@ -392,29 +303,6 @@ try } CATCH_RETURN() -[[nodiscard]] bool AtlasEngine::RequiresContinuousRedraw() noexcept -{ - return continuousRedraw; -} - -void AtlasEngine::WaitUntilCanRender() noexcept -{ - if constexpr (!debugGeneralPerformance) - { - if (_r.frameLatencyWaitableObject) - { - WaitForSingleObjectEx(_r.frameLatencyWaitableObject.get(), 100, true); -#ifndef NDEBUG - _r.frameLatencyWaitableObjectUsed = true; -#endif - } - else - { - Sleep(8); - } - } -} - [[nodiscard]] HRESULT AtlasEngine::PrepareForTeardown(_Out_ bool* const pForcePaint) noexcept { RETURN_HR_IF_NULL(E_INVALIDARG, pForcePaint); @@ -523,6 +411,7 @@ try rect.narrow_bottom(), }; _setCellFlags(u16rect, CellFlags::Selected, CellFlags::Selected); + _r.dirtyRect |= rect; return S_OK; } CATCH_RETURN() @@ -549,9 +438,10 @@ try } // Clear the previous cursor - if (_api.invalidatedCursorArea.non_empty()) + if (const auto r = _api.invalidatedCursorArea; r.non_empty()) { - _setCellFlags(_api.invalidatedCursorArea, CellFlags::Cursor, CellFlags::None); + _setCellFlags(r, CellFlags::Cursor, CellFlags::None); + _r.dirtyRect |= til::rect{ r.left, r.top, r.right, r.bottom }; } if (options.isOn) @@ -559,11 +449,13 @@ try const auto point = options.coordCursor; // TODO: options.coordCursor can contain invalid out of bounds coordinates when // the window is being resized and the cursor is on the last line of the viewport. - const auto x = gsl::narrow_cast(clamp(point.X, 0, _r.cellCount.x - 1)); - const auto y = gsl::narrow_cast(clamp(point.Y, 0, _r.cellCount.y - 1)); - const auto right = gsl::narrow_cast(x + 1 + (options.fIsDoubleWidth & (options.cursorType != CursorType::VerticalBar))); + const auto x = gsl::narrow_cast(clamp(point.X, 0, _r.cellCount.x - 1)); + const auto y = gsl::narrow_cast(clamp(point.Y, 0, _r.cellCount.y - 1)); + const auto cursorWidth = 1 + (options.fIsDoubleWidth & (options.cursorType != CursorType::VerticalBar)); + const auto right = gsl::narrow_cast(clamp(x + cursorWidth, 0, _r.cellCount.x - 0)); const auto bottom = gsl::narrow_cast(y + 1); _setCellFlags({ x, y, right, bottom }, CellFlags::Cursor, CellFlags::Cursor); + _r.dirtyRect |= til::rect{ x, y, right, bottom }; } return S_OK; @@ -598,7 +490,7 @@ try } const u32x2 newColors{ gsl::narrow_cast(fg), gsl::narrow_cast(bg) }; - const AtlasKeyAttributes attributes{ 0, textAttributes.IsIntense(), textAttributes.IsItalic(), 0 }; + const AtlasKeyAttributes attributes{ 0, textAttributes.IsIntense() && renderSettings.GetRenderMode(RenderSettings::Mode::IntenseIsBold), textAttributes.IsItalic(), 0 }; if (_api.attributes != attributes) { @@ -685,39 +577,50 @@ void AtlasEngine::_createResources() { wil::com_ptr deviceContext; - // Why D3D11_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS: - // This flag prevents the driver from creating a large thread pool for things like shader computations - // that would be advantageous for games. For us this has only a minimal performance benefit, - // but comes with a large memory usage overhead. At the time of writing the Nvidia - // driver launches $cpu_thread_count more worker threads without this flag. - static constexpr std::array driverTypes{ - std::pair{ D3D_DRIVER_TYPE_HARDWARE, D3D11_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS }, - std::pair{ D3D_DRIVER_TYPE_WARP, static_cast(0) }, - }; static constexpr std::array featureLevels{ D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0, + D3D_FEATURE_LEVEL_9_3, + D3D_FEATURE_LEVEL_9_2, + D3D_FEATURE_LEVEL_9_1, }; - auto hr = S_OK; - for (const auto& [driverType, additionalFlags] : driverTypes) + auto hr = E_UNEXPECTED; + + if (!_api.useSoftwareRendering) { + // Why D3D11_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS: + // This flag prevents the driver from creating a large thread pool for things like shader computations + // that would be advantageous for games. For us this has only a minimal performance benefit, + // but comes with a large memory usage overhead. At the time of writing the Nvidia + // driver launches $cpu_thread_count more worker threads without this flag. hr = D3D11CreateDevice( /* pAdapter */ nullptr, - /* DriverType */ driverType, + /* DriverType */ D3D_DRIVER_TYPE_HARDWARE, /* Software */ nullptr, - /* Flags */ deviceFlags | additionalFlags, + /* Flags */ deviceFlags | D3D11_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS, + /* pFeatureLevels */ featureLevels.data(), + /* FeatureLevels */ gsl::narrow_cast(featureLevels.size()), + /* SDKVersion */ D3D11_SDK_VERSION, + /* ppDevice */ _r.device.put(), + /* pFeatureLevel */ nullptr, + /* ppImmediateContext */ deviceContext.put()); + } + if (FAILED(hr)) + { + hr = D3D11CreateDevice( + /* pAdapter */ nullptr, + /* DriverType */ D3D_DRIVER_TYPE_WARP, + /* Software */ nullptr, + /* Flags */ deviceFlags, /* pFeatureLevels */ featureLevels.data(), /* FeatureLevels */ gsl::narrow_cast(featureLevels.size()), /* SDKVersion */ D3D11_SDK_VERSION, /* ppDevice */ _r.device.put(), /* pFeatureLevel */ nullptr, /* ppImmediateContext */ deviceContext.put()); - if (SUCCEEDED(hr)) - { - break; - } } THROW_IF_FAILED(hr); @@ -736,17 +639,144 @@ void AtlasEngine::_createResources() } #endif // NDEBUG - // Our constant buffer will never get resized + const auto featureLevel = _r.device->GetFeatureLevel(); + { - D3D11_BUFFER_DESC desc{}; - desc.ByteWidth = sizeof(ConstBuffer); - desc.Usage = D3D11_USAGE_DEFAULT; - desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - THROW_IF_FAILED(_r.device->CreateBuffer(&desc, nullptr, _r.constantBuffer.put())); + wil::com_ptr dxgiAdapter; + THROW_IF_FAILED(_r.device.query()->GetParent(__uuidof(dxgiAdapter), dxgiAdapter.put_void())); + THROW_IF_FAILED(dxgiAdapter->GetParent(__uuidof(_r.dxgiFactory), _r.dxgiFactory.put_void())); + + DXGI_ADAPTER_DESC1 desc; + THROW_IF_FAILED(dxgiAdapter->GetDesc1(&desc)); + _r.d2dMode = debugForceD2DMode || featureLevel < D3D_FEATURE_LEVEL_10_0 || WI_IsAnyFlagSet(desc.Flags, DXGI_ADAPTER_FLAG_REMOTE | DXGI_ADAPTER_FLAG_SOFTWARE); } - THROW_IF_FAILED(_r.device->CreateVertexShader(&shader_vs[0], sizeof(shader_vs), nullptr, _r.vertexShader.put())); - THROW_IF_FAILED(_r.device->CreatePixelShader(&shader_ps[0], sizeof(shader_ps), nullptr, _r.pixelShader.put())); + if (!_r.d2dMode) + { + // Our constant buffer will never get resized + { + D3D11_BUFFER_DESC desc{}; + desc.ByteWidth = sizeof(ConstBuffer); + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + THROW_IF_FAILED(_r.device->CreateBuffer(&desc, nullptr, _r.constantBuffer.put())); + } + + THROW_IF_FAILED(_r.device->CreateVertexShader(&shader_vs[0], sizeof(shader_vs), nullptr, _r.vertexShader.put())); + THROW_IF_FAILED(_r.device->CreatePixelShader(&shader_ps[0], sizeof(shader_ps), nullptr, _r.pixelShader.put())); + + if (!_api.customPixelShaderPath.empty()) + { + const char* target = nullptr; + switch (featureLevel) + { + case D3D_FEATURE_LEVEL_10_0: + target = "ps_4_0"; + break; + case D3D_FEATURE_LEVEL_10_1: + target = "ps_4_1"; + break; + default: + target = "ps_5_0"; + break; + } + + static constexpr auto flags = D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR | D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_WARNINGS_ARE_ERRORS +#ifdef NDEBUG + | D3DCOMPILE_OPTIMIZATION_LEVEL3; +#else + | D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; +#endif + + wil::com_ptr error; + wil::com_ptr blob; + const auto hr = D3DCompileFromFile( + /* pFileName */ _api.customPixelShaderPath.c_str(), + /* pDefines */ nullptr, + /* pInclude */ D3D_COMPILE_STANDARD_FILE_INCLUDE, + /* pEntrypoint */ "main", + /* pTarget */ target, + /* Flags1 */ flags, + /* Flags2 */ 0, + /* ppCode */ blob.addressof(), + /* ppErrorMsgs */ error.addressof()); + + // Unless we can determine otherwise, assume this shader requires evaluation every frame + _r.requiresContinuousRedraw = true; + + if (SUCCEEDED(hr)) + { + THROW_IF_FAILED(_r.device->CreatePixelShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, _r.customPixelShader.put())); + + // Try to determine whether the shader uses the Time variable + wil::com_ptr reflector; + if (SUCCEEDED_LOG(D3DReflect(blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(reflector.put())))) + { + if (ID3D11ShaderReflectionConstantBuffer* constantBufferReflector = reflector->GetConstantBufferByIndex(0)) // shader buffer + { + if (ID3D11ShaderReflectionVariable* variableReflector = constantBufferReflector->GetVariableByIndex(0)) // time + { + D3D11_SHADER_VARIABLE_DESC variableDescriptor; + if (SUCCEEDED_LOG(variableReflector->GetDesc(&variableDescriptor))) + { + // only if time is used + _r.requiresContinuousRedraw = WI_IsFlagSet(variableDescriptor.uFlags, D3D_SVF_USED); + } + } + } + } + } + else + { + if (error) + { + LOG_HR_MSG(hr, "%*hs", error->GetBufferSize(), error->GetBufferPointer()); + } + else + { + LOG_HR(hr); + } + if (_api.warningCallback) + { + _api.warningCallback(D2DERR_SHADER_COMPILE_FAILED); + } + } + } + else if (_api.useRetroTerminalEffect) + { + THROW_IF_FAILED(_r.device->CreatePixelShader(&custom_shader_ps[0], sizeof(custom_shader_ps), nullptr, _r.customPixelShader.put())); + // We know the built-in retro shader doesn't require continuous redraw. + _r.requiresContinuousRedraw = false; + } + + if (_r.customPixelShader) + { + THROW_IF_FAILED(_r.device->CreateVertexShader(&custom_shader_vs[0], sizeof(custom_shader_vs), nullptr, _r.customVertexShader.put())); + + { + D3D11_BUFFER_DESC desc{}; + desc.ByteWidth = sizeof(CustomConstBuffer); + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + THROW_IF_FAILED(_r.device->CreateBuffer(&desc, nullptr, _r.customShaderConstantBuffer.put())); + } + + { + D3D11_SAMPLER_DESC desc{}; + desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + desc.AddressU = D3D11_TEXTURE_ADDRESS_BORDER; + desc.AddressV = D3D11_TEXTURE_ADDRESS_BORDER; + desc.AddressW = D3D11_TEXTURE_ADDRESS_BORDER; + desc.MaxAnisotropy = 1; + desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS; + desc.MaxLOD = D3D11_FLOAT32_MAX; + THROW_IF_FAILED(_r.device->CreateSamplerState(&desc, _r.customShaderSamplerState.put())); + } + + _r.customShaderStartTime = std::chrono::steady_clock::now(); + } + } WI_ClearFlag(_api.invalidations, ApiInvalidations::Device); WI_SetAllFlags(_api.invalidations, ApiInvalidations::SwapChain); @@ -761,6 +791,10 @@ void AtlasEngine::_releaseSwapChain() // no views are bound to pipeline state), and then call Flush on the immediate context. if (_r.swapChain && _r.deviceContext) { + if (_r.d2dMode) + { + _r.d2dRenderTarget.reset(); + } _r.frameLatencyWaitableObject.reset(); _r.swapChain.reset(); _r.renderTargetView.reset(); @@ -775,8 +809,6 @@ void AtlasEngine::_createSwapChain() // D3D swap chain setup (the thing that allows us to present frames on the screen) { - const auto supportsFrameLatencyWaitableObject = IsWindows8Point1OrGreater(); - // With C++20 we'll finally have designated initializers. DXGI_SWAP_CHAIN_DESC1 desc{}; desc.Width = _api.sizeInPixel.x; @@ -784,32 +816,32 @@ void AtlasEngine::_createSwapChain() desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; desc.SampleDesc.Count = 1; desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; - desc.BufferCount = 2; // TODO: 3? + // Sometimes up to 2 buffers are locked, for instance during screen capture or when moving the window. + // 3 buffers seems to guarantee a stable framerate at display frequency at all times. + desc.BufferCount = 3; desc.Scaling = DXGI_SCALING_NONE; - desc.SwapEffect = _sr.isWindows10OrGreater ? DXGI_SWAP_EFFECT_FLIP_DISCARD : DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; - // * HWND swap chains can't do alpha. - // * If our background is opaque we can enable "independent" flips by setting DXGI_SWAP_EFFECT_FLIP_DISCARD and DXGI_ALPHA_MODE_IGNORE. - // As our swap chain won't have to compose with DWM anymore it reduces the display latency dramatically. - desc.AlphaMode = _api.hwnd || _api.backgroundOpaqueMixin ? DXGI_ALPHA_MODE_IGNORE : DXGI_ALPHA_MODE_PREMULTIPLIED; - desc.Flags = supportsFrameLatencyWaitableObject ? DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT : 0; + // DXGI_SWAP_EFFECT_FLIP_DISCARD is a mode that was created at a time were display drivers + // lacked support for Multiplane Overlays (MPO) and were copying buffers was expensive. + // This allowed DWM to quickly draw overlays (like gamebars) on top of rendered content. + // With faster GPU memory in general and with support for MPO in particular this isn't + // really an advantage anymore. Instead DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL allows for a + // more "intelligent" composition and display updates to occur like Panel Self Refresh + // (PSR) which requires dirty rectangles (Present1 API) to work correctly. + desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + // If our background is opaque we can enable "independent" flips by setting DXGI_ALPHA_MODE_IGNORE. + // As our swap chain won't have to compose with DWM anymore it reduces the display latency dramatically. + desc.AlphaMode = _api.backgroundOpaqueMixin ? DXGI_ALPHA_MODE_IGNORE : DXGI_ALPHA_MODE_PREMULTIPLIED; + desc.Flags = debugGeneralPerformance ? 0 : DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; wil::com_ptr dxgiFactory; THROW_IF_FAILED(CreateDXGIFactory1(IID_PPV_ARGS(dxgiFactory.addressof()))); if (_api.hwnd) { - if (FAILED(dxgiFactory->CreateSwapChainForHwnd(_r.device.get(), _api.hwnd, &desc, nullptr, nullptr, _r.swapChain.put()))) - { - // Platform Update for Windows 7: - // DXGI_SCALING_NONE is not supported on Windows 7 or Windows Server 2008 R2 with the Platform Update for - // Windows 7 installed and causes CreateSwapChainForHwnd to return DXGI_ERROR_INVALID_CALL when called. - desc.Scaling = DXGI_SCALING_STRETCH; - THROW_IF_FAILED(dxgiFactory->CreateSwapChainForHwnd(_r.device.get(), _api.hwnd, &desc, nullptr, nullptr, _r.swapChain.put())); - } + THROW_IF_FAILED(dxgiFactory->CreateSwapChainForHwnd(_r.device.get(), _api.hwnd, &desc, nullptr, nullptr, _r.swapChain.put())); } else { - // We can't link with dcomp.lib as dcomp.dll doesn't exist on Windows 7. const wil::unique_hmodule module{ LoadLibraryExW(L"dcomp.dll", nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32) }; THROW_LAST_ERROR_IF(!module); const auto DCompositionCreateSurfaceHandle = GetProcAddressByFunctionDeclaration(module.get(), DCompositionCreateSurfaceHandle); @@ -821,10 +853,9 @@ void AtlasEngine::_createSwapChain() THROW_IF_FAILED(dxgiFactory.query()->CreateSwapChainForCompositionSurfaceHandle(_r.device.get(), _api.swapChainHandle.get(), &desc, nullptr, _r.swapChain.put())); } - if (supportsFrameLatencyWaitableObject) + if constexpr (!debugGeneralPerformance) { const auto swapChain2 = _r.swapChain.query(); - THROW_IF_FAILED(swapChain2->SetMaximumFrameLatency(1)); // TODO: 2? _r.frameLatencyWaitableObject.reset(swapChain2->GetFrameLatencyWaitableObject()); THROW_LAST_ERROR_IF(!_r.frameLatencyWaitableObject); } @@ -833,13 +864,14 @@ void AtlasEngine::_createSwapChain() // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: // > For every frame it renders, the app should wait on this handle before starting any rendering operations. // > Note that this requirement includes the first frame the app renders with the swap chain. + _r.waitForPresentation = true; WaitUntilCanRender(); if (_api.swapChainChangedCallback) { try { - _api.swapChainChangedCallback(); + _api.swapChainChangedCallback(_api.swapChainHandle.get()); } CATCH_LOG(); } @@ -853,52 +885,41 @@ void AtlasEngine::_recreateSizeDependentResources() // ResizeBuffer() docs: // Before you call ResizeBuffers, ensure that the application releases all references [...]. // You can use ID3D11DeviceContext::ClearState to ensure that all [internal] references are released. - if (_r.renderTargetView) + // The _r.cells check exists simply to prevent us from calling ResizeBuffers() on startup (i.e. when `_r` is empty). + if (_r.cells) { + if (_r.d2dMode) + { + _r.d2dRenderTarget.reset(); + } _r.renderTargetView.reset(); _r.deviceContext->ClearState(); _r.deviceContext->Flush(); - THROW_IF_FAILED(_r.swapChain->ResizeBuffers(0, _api.sizeInPixel.x, _api.sizeInPixel.y, DXGI_FORMAT_UNKNOWN, DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)); - } - - // The RenderTargetView is later used with OMSetRenderTargets - // to tell D3D where stuff is supposed to be rendered at. - { - wil::com_ptr buffer; - THROW_IF_FAILED(_r.swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), buffer.put_void())); - THROW_IF_FAILED(_r.device->CreateRenderTargetView(buffer.get(), nullptr, _r.renderTargetView.put())); + THROW_IF_FAILED(_r.swapChain->ResizeBuffers(0, _api.sizeInPixel.x, _api.sizeInPixel.y, DXGI_FORMAT_UNKNOWN, debugGeneralPerformance ? 0 : DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)); } - // Tell D3D which parts of the render target will be visible. - // Everything outside of the viewport will be black. - // - // In the future this should cover the entire _api.sizeInPixel.x/_api.sizeInPixel.y. - // The pixel shader should draw the remaining content in the configured background color. - { - D3D11_VIEWPORT viewport{}; - viewport.Width = static_cast(_api.sizeInPixel.x); - viewport.Height = static_cast(_api.sizeInPixel.y); - _r.deviceContext->RSSetViewports(1, &viewport); - } + const auto totalCellCount = static_cast(_api.cellCount.x) * static_cast(_api.cellCount.y); + const auto resize = _api.cellCount != _r.cellCount; - if (_api.cellCount != _r.cellCount) + if (resize) { - const auto totalCellCount = static_cast(_api.cellCount.x) * static_cast(_api.cellCount.y); // Let's guess that every cell consists of a surrogate pair. const auto projectedTextSize = static_cast(_api.cellCount.x) * 2; // IDWriteTextAnalyzer::GetGlyphs says: // The recommended estimate for the per-glyph output buffers is (3 * textLength / 2 + 16). - // We already set the textLength to twice the cell count. - const auto projectedGlyphSize = 3 * projectedTextSize + 16; + const auto projectedGlyphSize = 3 * projectedTextSize / 2 + 16; // This buffer is a bit larger than the others (multiple MB). // Prevent a memory usage spike, by first deallocating and then allocating. _r.cells = {}; + _r.cellGlyphMapping = {}; // Our render loop heavily relies on memcpy() which is between 1.5x // and 40x faster for allocations with an alignment of 32 or greater. // (40x on AMD Zen1-3, which have a rep movsb performance issue. MSFT:33358259.) _r.cells = Buffer{ totalCellCount }; + _r.cellGlyphMapping = Buffer{ totalCellCount }; _r.cellCount = _api.cellCount; + _r.tileAllocator.setMaxArea(_api.sizeInPixel); // .clear() doesn't free the memory of these buffers. // This code allows them to shrink again. @@ -914,21 +935,58 @@ void AtlasEngine::_recreateSizeDependentResources() _api.glyphProps = Buffer{ projectedGlyphSize }; _api.glyphAdvances = Buffer{ projectedGlyphSize }; _api.glyphOffsets = Buffer{ projectedGlyphSize }; - - D3D11_BUFFER_DESC desc; - desc.ByteWidth = gsl::narrow(totalCellCount * sizeof(Cell)); // totalCellCount can theoretically be UINT32_MAX! - desc.Usage = D3D11_USAGE_DYNAMIC; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; - desc.StructureByteStride = sizeof(Cell); - THROW_IF_FAILED(_r.device->CreateBuffer(&desc, nullptr, _r.cellBuffer.put())); - THROW_IF_FAILED(_r.device->CreateShaderResourceView(_r.cellBuffer.get(), nullptr, _r.cellView.put())); } - // We have called _r.deviceContext->ClearState() in the beginning and lost all D3D state. - // This forces us to set up everything up from scratch again. - _setShaderResources(); + if (!_r.d2dMode) + { + // The RenderTargetView is later used with OMSetRenderTargets + // to tell D3D where stuff is supposed to be rendered at. + { + wil::com_ptr buffer; + THROW_IF_FAILED(_r.swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), buffer.put_void())); + THROW_IF_FAILED(_r.device->CreateRenderTargetView(buffer.get(), nullptr, _r.renderTargetView.put())); + } + if (_r.customPixelShader) + { + D3D11_TEXTURE2D_DESC desc{}; + desc.Width = _api.sizeInPixel.x; + desc.Height = _api.sizeInPixel.y; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + desc.SampleDesc = { 1, 0 }; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; + THROW_IF_FAILED(_r.device->CreateTexture2D(&desc, nullptr, _r.customOffscreenTexture.addressof())); + THROW_IF_FAILED(_r.device->CreateShaderResourceView(_r.customOffscreenTexture.get(), nullptr, _r.customOffscreenTextureView.addressof())); + THROW_IF_FAILED(_r.device->CreateRenderTargetView(_r.customOffscreenTexture.get(), nullptr, _r.customOffscreenTextureTargetView.addressof())); + } + + // Tell D3D which parts of the render target will be visible. + // Everything outside of the viewport will be black. + { + D3D11_VIEWPORT viewport{}; + viewport.Width = static_cast(_api.sizeInPixel.x); + viewport.Height = static_cast(_api.sizeInPixel.y); + _r.deviceContext->RSSetViewports(1, &viewport); + } + + if (resize) + { + D3D11_BUFFER_DESC desc; + desc.ByteWidth = gsl::narrow(totalCellCount * sizeof(Cell)); // totalCellCount can theoretically be UINT32_MAX! + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; + desc.StructureByteStride = sizeof(Cell); + THROW_IF_FAILED(_r.device->CreateBuffer(&desc, nullptr, _r.cellBuffer.put())); + THROW_IF_FAILED(_r.device->CreateShaderResourceView(_r.cellBuffer.get(), nullptr, _r.cellView.put())); + } + + // We have called _r.deviceContext->ClearState() in the beginning and lost all D3D state. + // This forces us to set up everything up from scratch again. + _setShaderResources(); + } WI_ClearFlag(_api.invalidations, ApiInvalidations::Size); WI_SetAllFlags(_r.invalidations, RenderInvalidations::ConstBuffer); @@ -939,40 +997,24 @@ void AtlasEngine::_recreateFontDependentResources() { // We're likely resizing the atlas anyways and can // thus also release any of these buffers prematurely. - _r.d2dRenderTarget.reset(); // depends on _r.atlasScratchpad - _r.atlasScratchpad.reset(); + _r.d2dRenderTarget.reset(); // depends on _r.atlasBuffer _r.atlasView.reset(); _r.atlasBuffer.reset(); } // D3D { - // TODO: Consider using IDXGIAdapter3::QueryVideoMemoryInfo() and IDXGIAdapter3::RegisterVideoMemoryBudgetChangeNotificationEvent() - // That way we can make better to use of a user's available video memory. - - static constexpr size_t sizePerPixel = 4; - static constexpr size_t sizeLimit = D3D10_REQ_RESOURCE_SIZE_IN_MEGABYTES * 1024 * 1024; - const size_t dimensionLimit = _r.device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0 ? D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION : D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION; - const size_t csx = _api.fontMetrics.cellSize.x; - const size_t csy = _api.fontMetrics.cellSize.y; - const auto xLimit = (dimensionLimit / csx) * csx; - const auto pixelsPerCellRow = xLimit * csy; - const auto yLimitDueToDimension = (dimensionLimit / csy) * csy; - const auto yLimitDueToSize = ((sizeLimit / sizePerPixel) / pixelsPerCellRow) * csy; - const auto yLimit = std::min(yLimitDueToDimension, yLimitDueToSize); const auto scaling = GetScaling(); _r.cellSizeDIP.x = static_cast(_api.fontMetrics.cellSize.x) / scaling; _r.cellSizeDIP.y = static_cast(_api.fontMetrics.cellSize.y) / scaling; - _r.cellSize = _api.fontMetrics.cellSize; _r.cellCount = _api.cellCount; - // x/yLimit are strictly smaller than dimensionLimit, which is smaller than a u16. - _r.atlasSizeInPixelLimit = u16x2{ gsl::narrow_cast(xLimit), gsl::narrow_cast(yLimit) }; + _r.dpi = _api.dpi; + _r.fontMetrics = _api.fontMetrics; + _r.dipPerPixel = static_cast(USER_DEFAULT_SCREEN_DPI) / static_cast(_r.dpi); + _r.pixelPerDIP = static_cast(_r.dpi) / static_cast(USER_DEFAULT_SCREEN_DPI); _r.atlasSizeInPixel = { 0, 0 }; - // The first Cell at {0, 0} is always our cursor texture. - // --> The first glyph starts at {1, 0}. - _r.atlasPosition.x = _api.fontMetrics.cellSize.x; - _r.atlasPosition.y = 0; + _r.tileAllocator = TileAllocator{ _api.fontMetrics.cellSize, _api.sizeInPixel }; _r.glyphs = {}; _r.glyphQueue = {}; @@ -993,14 +1035,6 @@ void AtlasEngine::_recreateFontDependentResources() } // D2D - { - _r.underlinePos = _api.fontMetrics.underlinePos; - _r.strikethroughPos = _api.fontMetrics.strikethroughPos; - _r.lineThickness = _api.fontMetrics.lineThickness; - _r.dpi = _api.dpi; - _r.maxEncounteredCellCount = 0; - _r.scratchpadCellWidth = 0; - } { // See AtlasEngine::UpdateFont. // It hardcodes indices 0/1/2 in fontAxisValues to the weight/italic/slant axes. @@ -1031,9 +1065,9 @@ void AtlasEngine::_recreateFontDependentResources() const auto fontStyle = italic ? DWRITE_FONT_STYLE_ITALIC : DWRITE_FONT_STYLE_NORMAL; auto& textFormat = _r.textFormats[italic][bold]; - THROW_IF_FAILED(_sr.dwriteFactory->CreateTextFormat(_api.fontMetrics.fontName.get(), _api.fontMetrics.fontCollection.get(), fontWeight, fontStyle, DWRITE_FONT_STRETCH_NORMAL, _api.fontMetrics.fontSizeInDIP, L"", textFormat.put())); - textFormat->SetTextAlignment(DWRITE_TEXT_ALIGNMENT_CENTER); - textFormat->SetWordWrapping(DWRITE_WORD_WRAPPING_NO_WRAP); + THROW_IF_FAILED(_sr.dwriteFactory->CreateTextFormat(_api.fontMetrics.fontName.c_str(), _api.fontMetrics.fontCollection.get(), fontWeight, fontStyle, DWRITE_FONT_STRETCH_NORMAL, _api.fontMetrics.fontSizeInDIP, L"", textFormat.put())); + THROW_IF_FAILED(textFormat->SetTextAlignment(DWRITE_TEXT_ALIGNMENT_CENTER)); + THROW_IF_FAILED(textFormat->SetWordWrapping(DWRITE_WORD_WRAPPING_NO_WRAP)); // DWRITE_LINE_SPACING_METHOD_UNIFORM: // > Lines are explicitly set to uniform spacing, regardless of contained font sizes. @@ -1041,9 +1075,11 @@ void AtlasEngine::_recreateFontDependentResources() // We want that. Otherwise fallback fonts might be rendered with an incorrect baseline and get cut off vertically. THROW_IF_FAILED(textFormat->SetLineSpacing(DWRITE_LINE_SPACING_METHOD_UNIFORM, _r.cellSizeDIP.y, _api.fontMetrics.baselineInDIP)); - if (!_api.fontAxisValues.empty()) + if (const auto textFormat3 = textFormat.try_query()) { - if (const auto textFormat3 = textFormat.try_query()) + THROW_IF_FAILED(textFormat3->SetAutomaticFontAxes(DWRITE_AUTOMATIC_FONT_AXES_OPTICAL_SIZE)); + + if (!_api.fontAxisValues.empty()) { // The wght axis defaults to the font weight. _api.fontAxisValues[0].value = bold || standardAxes[0].value == -1.0f ? static_cast(fontWeight) : standardAxes[0].value; @@ -1093,6 +1129,13 @@ AtlasEngine::Cell* AtlasEngine::_getCell(u16 x, u16 y) noexcept return _r.cells.data() + static_cast(_r.cellCount.x) * y + x; } +AtlasEngine::TileHashMap::iterator* AtlasEngine::_getCellGlyphMapping(u16 x, u16 y) noexcept +{ + assert(x < _r.cellCount.x); + assert(y < _r.cellCount.y); + return _r.cellGlyphMapping.data() + static_cast(_r.cellCount.x) * y + x; +} + void AtlasEngine::_setCellFlags(u16r coords, CellFlags mask, CellFlags bits) noexcept { assert(coords.left <= coords.right); @@ -1118,26 +1161,6 @@ void AtlasEngine::_setCellFlags(u16r coords, CellFlags mask, CellFlags bits) noe } } -AtlasEngine::u16x2 AtlasEngine::_allocateAtlasTile() noexcept -{ - const auto ret = _r.atlasPosition; - - _r.atlasPosition.x += _r.cellSize.x; - if (_r.atlasPosition.x >= _r.atlasSizeInPixelLimit.x) - { - _r.atlasPosition.x = 0; - _r.atlasPosition.y += _r.cellSize.y; - if (_r.atlasPosition.y >= _r.atlasSizeInPixelLimit.y) - { - _r.atlasPosition.x = _r.cellSize.x; - _r.atlasPosition.y = 0; - showOOMWarning(); - } - } - - return ret; -} - void AtlasEngine::_flushBufferLine() { if (_api.bufferLine.empty()) @@ -1189,7 +1212,8 @@ void AtlasEngine::_flushBufferLine() const auto textFormat = _getTextFormat(_api.attributes.bold, _api.attributes.italic); const auto& textFormatAxis = _getTextFormatAxis(_api.attributes.bold, _api.attributes.italic); - TextAnalyzer atlasAnalyzer{ _api.bufferLine, _api.analysisResults }; + TextAnalysisSource analysisSource{ _api.bufferLine.data(), gsl::narrow(_api.bufferLine.size()) }; + TextAnalysisSink analysisSink{ _api.analysisResults }; wil::com_ptr fontCollection; THROW_IF_FAILED(textFormat->GetFontCollection(fontCollection.addressof())); @@ -1208,11 +1232,11 @@ void AtlasEngine::_flushBufferLine() { wil::com_ptr fontFace5; THROW_IF_FAILED(_sr.systemFontFallback.query()->MapCharacters( - /* analysisSource */ &atlasAnalyzer, + /* analysisSource */ &analysisSource, /* textPosition */ idx, /* textLength */ gsl::narrow_cast(_api.bufferLine.size()) - idx, /* baseFontCollection */ fontCollection.get(), - /* baseFamilyName */ _api.fontMetrics.fontName.get(), + /* baseFamilyName */ _api.fontMetrics.fontName.c_str(), /* fontAxisValues */ textFormatAxis.data(), /* fontAxisValueCount */ gsl::narrow_cast(textFormatAxis.size()), /* mappedLength */ &mappedLength, @@ -1227,11 +1251,11 @@ void AtlasEngine::_flushBufferLine() wil::com_ptr font; THROW_IF_FAILED(_sr.systemFontFallback->MapCharacters( - /* analysisSource */ &atlasAnalyzer, + /* analysisSource */ &analysisSource, /* textPosition */ idx, /* textLength */ gsl::narrow_cast(_api.bufferLine.size()) - idx, /* baseFontCollection */ fontCollection.get(), - /* baseFamilyName */ _api.fontMetrics.fontName.get(), + /* baseFamilyName */ _api.fontMetrics.fontName.c_str(), /* baseWeight */ baseWeight, /* baseStyle */ baseStyle, /* baseStretch */ DWRITE_FONT_STRETCH_NORMAL, @@ -1253,7 +1277,7 @@ void AtlasEngine::_flushBufferLine() // Task: Replace all characters in this range with unicode replacement characters. // Input (where "n" is a narrow and "ww" is a wide character): // _api.bufferLine = "nwwnnw" - // _api.bufferLineColumn = {0, 1, 1, 2, 3, 4, 4, 5} + // _api.bufferLineColumn = {0, 1, 1, 3, 4, 5, 5, 6} // n w w n n w w // Solution: // Iterate through bufferLineColumn until the value changes, because this indicates we passed over a @@ -1301,15 +1325,19 @@ void AtlasEngine::_flushBufferLine() if (isTextSimple) { + size_t beg = 0; for (size_t i = 0; i < complexityLength; ++i) { - _emplaceGlyph(mappedFontFace.get(), idx + i, idx + i + 1u); + if (_emplaceGlyph(mappedFontFace.get(), idx + beg, idx + i + 1)) + { + beg = i + 1; + } } } else { _api.analysisResults.clear(); - THROW_IF_FAILED(_sr.textAnalyzer->AnalyzeScript(&atlasAnalyzer, idx, complexityLength, &atlasAnalyzer)); + THROW_IF_FAILED(_sr.textAnalyzer->AnalyzeScript(&analysisSource, idx, complexityLength, &analysisSink)); //_sr.textAnalyzer->AnalyzeBidi(&atlasAnalyzer, idx, complexityLength, &atlasAnalyzer); for (const auto& a : _api.analysisResults) @@ -1418,8 +1446,10 @@ void AtlasEngine::_flushBufferLine() { if (_api.textProps[i].canBreakShapingAfter) { - _emplaceGlyph(mappedFontFace.get(), a.textPosition + beg, a.textPosition + i + 1); - beg = i + 1; + if (_emplaceGlyph(mappedFontFace.get(), a.textPosition + beg, a.textPosition + i + 1)) + { + beg = i + 1; + } } } } @@ -1427,33 +1457,51 @@ void AtlasEngine::_flushBufferLine() } } } +// ^^^ Look at that amazing 8-fold nesting level. Lovely. <3 -void AtlasEngine::_emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, size_t bufferPos2) +bool AtlasEngine::_emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, size_t bufferPos2) { static constexpr auto replacement = L'\uFFFD'; // This would seriously blow us up otherwise. Expects(bufferPos1 < bufferPos2 && bufferPos2 <= _api.bufferLine.size()); - const auto chars = fontFace ? &_api.bufferLine[bufferPos1] : &replacement; - const auto charCount = fontFace ? bufferPos2 - bufferPos1 : 1; - // _flushBufferLine() ensures that bufferLineColumn.size() > bufferLine.size(). const auto x1 = _api.bufferLineColumn[bufferPos1]; const auto x2 = _api.bufferLineColumn[bufferPos2]; - Expects(x1 < x2 && x2 <= _api.cellCount.x); + // x1 == x2, if our TextBuffer and DirectWrite disagree where glyph boundaries are. Example: + // Our line of text contains a wide glyph consisting of 2 surrogate pairs "xx" and "yy". + // If DirectWrite considers the first "xx" to be separate from the second "yy", we'll get: + // _api.bufferLine = "...xxyy..." + // _api.bufferLineColumn = {01233335678} + // ^ ^ + // / \ + // bufferPos1 bufferPos2 + // x1: _api.bufferLineColumn[bufferPos1] == 3 + // x1: _api.bufferLineColumn[bufferPos2] == 3 + // --> cellCount (which is x2 - x1) is now 0 (invalid). + // + // Assuming that the TextBuffer implementation doesn't have any bugs... + // I'm not entirely certain why this occurs, but to me, a layperson, it appears as if + // IDWriteFontFallback::MapCharacters() doesn't respect extended grapheme clusters. + // It could also possibly be due to a difference in the supported Unicode version. + if (x1 >= x2 || x2 > _api.cellCount.x) + { + return false; + } + const auto chars = fontFace ? &_api.bufferLine[bufferPos1] : &replacement; + const auto charCount = fontFace ? bufferPos2 - bufferPos1 : 1; const u16 cellCount = x2 - x1; auto attributes = _api.attributes; attributes.cellCount = cellCount; - const auto [it, inserted] = _r.glyphs.emplace(std::piecewise_construct, std::forward_as_tuple(attributes, gsl::narrow(charCount), chars), std::forward_as_tuple()); - const auto& key = it->first; - auto& value = it->second; + AtlasKey key{ attributes, gsl::narrow(charCount), chars }; + auto it = _r.glyphs.find(key); - if (inserted) + if (it == _r.glyphs.end()) { // Do fonts exist *in practice* which contain both colored and uncolored glyphs? I'm pretty sure... // However doing it properly means using either of: @@ -1481,27 +1529,36 @@ void AtlasEngine::_emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, si WI_SetFlagIf(flags, CellFlags::ColoredGlyph, fontFace2 && fontFace2->IsColorFont()); } - const auto coords = value.initialize(flags, cellCount); + // The AtlasValue constructor fills the `coords` variable with a pointer to an array + // of at least `cellCount` elements. I did this so that I don't have to type out + // `value.data()->coords` again, despite the constructor having all the data necessary. + u16x2* coords; + AtlasValue value{ flags, cellCount, &coords }; + for (u16 i = 0; i < cellCount; ++i) { - coords[i] = _allocateAtlasTile(); + coords[i] = _r.tileAllocator.allocate(_r.glyphs); } - _r.glyphQueue.push_back(AtlasQueueItem{ &key, &value }); - _r.maxEncounteredCellCount = std::max(_r.maxEncounteredCellCount, cellCount); + it = _r.glyphs.insert(std::move(key), std::move(value)); + _r.glyphQueue.emplace_back(it); } - const auto valueData = value.data(); + const auto valueData = it->second.data(); const auto coords = &valueData->coords[0]; - const auto data = _getCell(x1, _api.lastPaintBufferLineCoord.y); + const auto cells = _getCell(x1, _api.lastPaintBufferLineCoord.y); + const auto cellGlyphMappings = _getCellGlyphMapping(x1, _api.lastPaintBufferLineCoord.y); for (u32 i = 0; i < cellCount; ++i) { - data[i].tileIndex = coords[i]; + cells[i].tileIndex = coords[i]; // We should apply the column color and flags from each column (instead // of copying them from the x1) so that ligatures can appear in multiple // colors with different line styles. - data[i].flags = valueData->flags | _api.bufferLineMetadata[static_cast(x1) + i].flags; - data[i].color = _api.bufferLineMetadata[static_cast(x1) + i].colors; + cells[i].flags = valueData->flags | _api.bufferLineMetadata[static_cast(x1) + i].flags; + cells[i].color = _api.bufferLineMetadata[static_cast(x1) + i].colors; } + + std::fill_n(cellGlyphMappings, cellCount, it); + return true; } diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 92a22c0a086..1c58d351c2e 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -3,14 +3,17 @@ #pragma once -#include +#include #include #include #include "../../renderer/inc/IRenderEngine.hpp" +#include "DWriteTextAnalysis.h" namespace Microsoft::Console::Render { + struct TextAnalysisSinkResult; + class AtlasEngine final : public IRenderEngine { public: @@ -57,14 +60,14 @@ namespace Microsoft::Console::Render // DxRenderer - getter HRESULT Enable() noexcept override; + [[nodiscard]] std::wstring_view GetPixelShaderPath() noexcept override; [[nodiscard]] bool GetRetroTerminalEffect() const noexcept override; [[nodiscard]] float GetScaling() const noexcept override; - [[nodiscard]] HANDLE GetSwapChainHandle() override; [[nodiscard]] Types::Viewport GetViewportInCharacters(const Types::Viewport& viewInPixels) const noexcept override; [[nodiscard]] Types::Viewport GetViewportInPixels(const Types::Viewport& viewInCharacters) const noexcept override; // DxRenderer - setter void SetAntialiasingMode(D2D1_TEXT_ANTIALIAS_MODE antialiasingMode) noexcept override; - void SetCallback(std::function pfn) noexcept override; + void SetCallback(std::function pfn) noexcept override; void EnableTransparentBackground(const bool isTransparent) noexcept override; void SetForceFullRepaintRendering(bool enable) noexcept override; [[nodiscard]] HRESULT SetHwnd(HWND hwnd) noexcept override; @@ -74,7 +77,6 @@ namespace Microsoft::Console::Render void SetSoftwareRendering(bool enable) noexcept override; void SetWarningCallback(std::function pfn) noexcept override; [[nodiscard]] HRESULT SetWindowSize(til::size pixels) noexcept override; - void ToggleShaderEffects() noexcept override; [[nodiscard]] HRESULT UpdateFont(const FontInfoDesired& pfiFontInfoDesired, FontInfo& fiFontInfo, const std::unordered_map& features, const std::unordered_map& axes) noexcept override; void UpdateHyperlinkHoveredId(uint16_t hoveredId) noexcept override; @@ -96,8 +98,10 @@ namespace Microsoft::Console::Render friend constexpr type operator~(type v) noexcept { return static_cast(~static_cast(v)); } \ friend constexpr type operator|(type lhs, type rhs) noexcept { return static_cast(static_cast(lhs) | static_cast(rhs)); } \ friend constexpr type operator&(type lhs, type rhs) noexcept { return static_cast(static_cast(lhs) & static_cast(rhs)); } \ + friend constexpr type operator^(type lhs, type rhs) noexcept { return static_cast(static_cast(lhs) ^ static_cast(rhs)); } \ friend constexpr void operator|=(type& lhs, type rhs) noexcept { lhs = lhs | rhs; } \ - friend constexpr void operator&=(type& lhs, type rhs) noexcept { lhs = lhs & rhs; } + friend constexpr void operator&=(type& lhs, type rhs) noexcept { lhs = lhs & rhs; } \ + friend constexpr void operator^=(type& lhs, type rhs) noexcept { lhs = lhs ^ rhs; } template struct vec2 @@ -106,12 +110,16 @@ namespace Microsoft::Console::Render T y{}; ATLAS_POD_OPS(vec2) + }; - constexpr vec2 operator/(const vec2& rhs) noexcept - { - assert(rhs.x != 0 && rhs.y != 0); - return { gsl::narrow_cast(x / rhs.x), gsl::narrow_cast(y / rhs.y) }; - } + template + struct vec3 + { + T x{}; + T y{}; + T z{}; + + ATLAS_POD_OPS(vec3) }; template @@ -135,7 +143,7 @@ namespace Microsoft::Console::Render ATLAS_POD_OPS(rect) - constexpr bool non_empty() noexcept + constexpr bool non_empty() const noexcept { return (left < right) & (top < bottom); } @@ -156,6 +164,7 @@ namespace Microsoft::Console::Render using f32 = float; using f32x2 = vec2; + using f32x3 = vec3; using f32x4 = vec4; struct TextAnalyzerResult @@ -172,6 +181,12 @@ namespace Microsoft::Console::Render }; private: + // I wrote `Buffer` instead of using `std::vector`, because I want to convey that these things + // explicitly _don't_ hold resizeable contents, but rather plain content of a fixed size. + // For instance I didn't want a resizeable vector with a `push_back` method for my fixed-size + // viewport arrays - that doesn't make sense after all. `Buffer` also doesn't initialize + // contents to zero, allowing rapid creation/destruction and you can easily specify a custom + // (over-)alignment which can improve rendering perf by up to ~20% over `std::vector`. template struct Buffer { @@ -181,19 +196,24 @@ namespace Microsoft::Console::Render _data{ allocate(size) }, _size{ size } { + std::uninitialized_default_construct_n(_data, size); } Buffer(const T* data, size_t size) : _data{ allocate(size) }, _size{ size } { - static_assert(std::is_trivially_copyable_v); - memcpy(_data, data, size * sizeof(T)); + // Changing the constructor arguments to accept std::span might + // be a good future extension, but not to improve security here. + // You can trivially construct std::span's from invalid ranges. + // Until then the raw-pointer style is more practical. +#pragma warning(suppress : 26459) // You called an STL function '...' with a raw pointer parameter at position '3' that may be unsafe [...]. + std::uninitialized_copy_n(data, size, _data); } ~Buffer() { - deallocate(_data); + destroy(); } Buffer(Buffer&& other) noexcept : @@ -205,7 +225,7 @@ namespace Microsoft::Console::Render #pragma warning(suppress : 26432) // If you define or delete any default operation in the type '...', define or delete them all (c.21). Buffer& operator=(Buffer&& other) noexcept { - deallocate(_data); + destroy(); _data = std::exchange(other._data, nullptr); _size = std::exchange(other._size, 0); return *this; @@ -243,6 +263,26 @@ namespace Microsoft::Console::Render return _size; } + T* begin() noexcept + { + return _data; + } + + T* begin() const noexcept + { + return _data; + } + + T* end() noexcept + { + return _data + _size; + } + + T* end() const noexcept + { + return _data + _size; + } + private: // These two functions don't need to use scoped objects or standard allocators, // since this class is in fact an scoped allocator object itself. @@ -274,6 +314,12 @@ namespace Microsoft::Console::Render } #pragma warning(pop) + void destroy() noexcept + { + std::destroy_n(_data, _size); + deallocate(_data); + } + T* _data = nullptr; size_t _size = 0; }; @@ -302,23 +348,8 @@ namespace Microsoft::Console::Render constexpr SmallObjectOptimizer() = default; - SmallObjectOptimizer(const SmallObjectOptimizer& other) - { - const auto otherData = other.data(); - const auto otherSize = other.size(); - const auto data = initialize(otherSize); - memcpy(data, otherData, otherSize); - } - - SmallObjectOptimizer& operator=(const SmallObjectOptimizer& other) - { - if (this != &other) - { - delete this; - new (this) SmallObjectOptimizer(other); - } - return &this; - } + SmallObjectOptimizer(const SmallObjectOptimizer& other) = delete; + SmallObjectOptimizer& operator=(const SmallObjectOptimizer& other) = delete; SmallObjectOptimizer(SmallObjectOptimizer&& other) noexcept { @@ -328,7 +359,8 @@ namespace Microsoft::Console::Render SmallObjectOptimizer& operator=(SmallObjectOptimizer&& other) noexcept { - return *new (this) SmallObjectOptimizer(other); + std::destroy_at(this); + return *std::construct_at(this, std::move(other)); } ~SmallObjectOptimizer() @@ -378,14 +410,18 @@ namespace Microsoft::Console::Render struct FontMetrics { wil::com_ptr fontCollection; - wil::unique_process_heap_string fontName; + std::wstring fontName; float baselineInDIP = 0.0f; float fontSizeInDIP = 0.0f; + f32 advanceScale = 0; u16x2 cellSize; u16 fontWeight = 0; u16 underlinePos = 0; + u16 underlineWidth = 0; u16 strikethroughPos = 0; - u16 lineThickness = 0; + u16 strikethroughWidth = 0; + u16x2 doubleUnderlinePos; + u16 thinLineWidth = 0; }; // These flags are shared with shader_ps.hlsl. @@ -484,12 +520,19 @@ namespace Microsoft::Console::Render } }; - struct AtlasKeyHasher + struct CachedGlyphLayout { - size_t operator()(const AtlasKey& key) const noexcept - { - return key.hash(); - } + wil::com_ptr textLayout; + f32x2 halfSize; + f32x2 offset; + f32x2 scale; + D2D1_DRAW_TEXT_OPTIONS options = D2D1_DRAW_TEXT_OPTIONS_NONE; + bool scalingRequired = false; + + explicit operator bool() const noexcept; + void reset() noexcept; + void applyScaling(ID2D1RenderTarget* d2dRenderTarget, D2D1_POINT_2F origin) const noexcept; + void undoScaling(ID2D1RenderTarget* d2dRenderTarget) const noexcept; }; struct AtlasValueData @@ -500,15 +543,14 @@ namespace Microsoft::Console::Render struct AtlasValue { - constexpr AtlasValue() = default; - - u16x2* initialize(CellFlags flags, u16 cellCount) + AtlasValue(CellFlags flags, u16 cellCount, u16x2** coords) { + __assume(coords != nullptr); const auto size = dataSize(cellCount); const auto data = _data.initialize(size); WI_SetFlagIf(flags, CellFlags::Inlined, _data.would_inline(size)); data->flags = flags; - return &data->coords[0]; + *coords = &data->coords[0]; } const AtlasValueData* data() const noexcept @@ -516,6 +558,8 @@ namespace Microsoft::Console::Render return _data.data(); } + CachedGlyphLayout cachedLayout; + private: SmallObjectOptimizer _data; @@ -525,10 +569,258 @@ namespace Microsoft::Console::Render } }; - struct AtlasQueueItem + struct AtlasKeyHasher + { + using is_transparent = int; + + size_t operator()(const AtlasKey& v) const noexcept + { + return v.hash(); + } + + size_t operator()(const std::list>::iterator& v) const noexcept + { + return operator()(v->first); + } + }; + + struct AtlasKeyEq + { + using is_transparent = int; + + bool operator()(const AtlasKey& a, const std::list>::iterator& b) const noexcept + { + return a == b->first; + } + + bool operator()(const std::list>::iterator& a, const std::list>::iterator& b) const noexcept + { + return operator()(a->first, b); + } + }; + + struct TileHashMap + { + using iterator = std::list>::iterator; + + TileHashMap() noexcept = default; + + iterator end() noexcept + { + return _lru.end(); + } + + iterator find(const AtlasKey& key) + { + const auto it = _map.find(key); + if (it != _map.end()) + { + // Move the key to the head of the LRU queue. + makeNewest(*it); + return *it; + } + return end(); + } + + iterator insert(AtlasKey&& key, AtlasValue&& value) + { + // Insert the key/value right at the head of the LRU queue, just like find(). + // + // && decays to & if the argument is named, because C++ is a simple language + // and so you have to std::move it again, because C++ is a simple language. + _lru.emplace_front(std::move(key), std::move(value)); + auto it = _lru.begin(); + _map.emplace(it); + return it; + } + + void makeNewest(const iterator& it) + { + _lru.splice(_lru.begin(), _lru, it); + } + + void popOldestTiles(std::vector& out) noexcept + { + Expects(!_lru.empty()); + const auto it = --_lru.end(); + + const auto key = it->first.data(); + const auto value = it->second.data(); + const auto beg = &value->coords[0]; + const auto cellCount = key->attributes.cellCount; + + const auto offset = out.size(); + out.resize(offset + cellCount); + std::copy_n(beg, cellCount, out.begin() + offset); + + _map.erase(it); + _lru.pop_back(); + } + + private: + // Please don't copy this code. It's a proof-of-concept. + // If you need a LRU hash-map, write a custom one with an intrusive + // prev/next linked list (it's easier than you might think!). + std::list> _lru; + std::unordered_set _map; + }; + + // TileAllocator yields `tileSize`-sized tiles for our texture atlas. + // While doing so it'll grow the atlas size() by a factor of 2 if needed. + // Once the setMaxArea() is exceeded it'll stop growing and instead + // snatch tiles back from the oldest TileHashMap entries. + // + // The quadratic growth works by alternating the size() + // between an 1:1 and 2:1 aspect ratio, like so: + // (64,64) -> (128,64) -> (128,128) -> (256,128) -> (256,256) + // These initial tile positions allocate() returns are in a Z + // pattern over the available space in the atlas texture. + // You can log the `return _pos;` in allocate() using "Tracepoint"s + // in Visual Studio if you'd like to understand the Z pattern better. + struct TileAllocator { - const AtlasKey* key; - const AtlasValue* value; + TileAllocator() = default; + + explicit TileAllocator(u16x2 tileSize, u16x2 windowSize) noexcept : + _tileSize{ tileSize } + { + const auto initialSize = std::max(u16{ _absoluteMinSize }, std::bit_ceil(std::max(tileSize.x, tileSize.y))); + _size = { initialSize, initialSize }; + _limit = { gsl::narrow_cast(initialSize - _tileSize.x), gsl::narrow_cast(initialSize - _tileSize.y) }; + setMaxArea(windowSize); + } + + u16x2 size() const noexcept + { + return _size; + } + + void setMaxArea(u16x2 windowSize) noexcept + { + // _generate() uses a quadratic growth factor for _size's area. + // Once it exceeds the _maxArea, it'll start snatching tiles back from the + // TileHashMap using its LRU queue. Since _size will at least reach half + // of _maxSize (because otherwise it could still grow by a factor of 2) + // and by ensuring that _maxArea is at least twice the window size + // we make it impossible* for _generate() to return false before + // TileHashMap contains at least as many tiles as the window contains. + // If that wasn't the case we'd snatch and reuse tiles that are still in use. + // * lhecker's legal department: + // No responsibility is taken for the correctness of this information. + setMaxArea(static_cast(windowSize.x) * static_cast(windowSize.y) * 2); + } + + void setMaxArea(size_t max) noexcept + { + // We need to reserve at least 1 extra `tileArea`, because the tile + // at position {0,0} is already reserved for the cursor texture. + const auto tileArea = static_cast(_tileSize.x) * static_cast(_tileSize.y); + _maxArea = clamp(max + tileArea, _absoluteMinArea, _absoluteMaxArea); + _updateCanGenerate(); + } + + u16x2 allocate(TileHashMap& map) noexcept + { + if (_generate()) + { + return _pos; + } + + if (_cache.empty()) + { + map.popOldestTiles(_cache); + } + + const auto pos = _cache.back(); + _cache.pop_back(); + return pos; + } + + private: + // This method generates the Z pattern coordinates + // described above in the TileAllocator comment. + bool _generate() noexcept + { + if (!_canGenerate) + { + return false; + } + + // We need to backup _pos/_size in case our resize below exceeds _maxArea. + // In that case we have to restore _pos/_size so that if _maxArea is increased + // (window resize for instance), we can pick up where we previously left off. + const auto pos = _pos; + + _pos.x += _tileSize.x; + if (_pos.x <= _limit.x) + { + return true; + } + + _pos.y += _tileSize.y; + if (_pos.y <= _limit.y) + { + _pos.x = _originX; + return true; + } + + // Same as for pos. + const auto size = _size; + + // This implements a quadratic growth factor for _size, by + // alternating between an 1:1 and 2:1 aspect ratio, like so: + // (64,64) -> (128,64) -> (128,128) -> (256,128) -> (256,256) + // This behavior is strictly dependent on setMaxArea(u16x2)'s + // behavior. See it's comment for an explanation. + if (_size.x == _size.y) + { + _size.x *= 2; + _pos.y = 0; + } + else + { + _size.y *= 2; + _pos.x = 0; + } + + _updateCanGenerate(); + if (_canGenerate) + { + _limit = { gsl::narrow_cast(_size.x - _tileSize.x), gsl::narrow_cast(_size.y - _tileSize.y) }; + _originX = _pos.x; + } + else + { + _size = size; + _pos = pos; + } + + return _canGenerate; + } + + void _updateCanGenerate() noexcept + { + _canGenerate = static_cast(_size.x) * static_cast(_size.y) <= _maxArea; + } + + static constexpr u16 _absoluteMinSize = 256; + static constexpr size_t _absoluteMinArea = _absoluteMinSize * _absoluteMinSize; + // TODO: Consider using IDXGIAdapter3::QueryVideoMemoryInfo() and IDXGIAdapter3::RegisterVideoMemoryBudgetChangeNotificationEvent() + // That way we can make better to use of a user's available video memory. + static constexpr size_t _absoluteMaxArea = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION * D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION; + + std::vector _cache; + size_t _maxArea = _absoluteMaxArea; + u16x2 _tileSize; + u16x2 _size; + u16x2 _limit; + // Since _pos starts at {0, 0}, it'll result in the first allocate()d tile to be at {_tileSize.x, 0}. + // Coincidentially that's exactly what we want as the cursor texture lives at {0, 0}. + u16x2 _pos; + u16 _originX = 0; + // Indicates whether we've exhausted our Z pattern across the atlas texture. + // If this is false, we have to snatch tiles back from TileHashMap. + bool _canGenerate = true; }; struct CachedCursorOptions @@ -536,6 +828,7 @@ namespace Microsoft::Console::Render u32 cursorColor = INVALID_COLOR; u16 cursorType = gsl::narrow_cast(CursorType::Legacy); u8 heightPercentage = 20; + u8 _padding = 0; ATLAS_POD_OPS(CachedCursorOptions) }; @@ -562,8 +855,12 @@ namespace Microsoft::Console::Render alignas(sizeof(f32)) f32 enhancedContrast = 0; alignas(sizeof(u32)) u32 cellCountX = 0; alignas(sizeof(u32x2)) u32x2 cellSize; - alignas(sizeof(u32x2)) u32x2 underlinePos; - alignas(sizeof(u32x2)) u32x2 strikethroughPos; + alignas(sizeof(u32)) u32 underlinePos = 0; + alignas(sizeof(u32)) u32 underlineWidth = 0; + alignas(sizeof(u32)) u32 strikethroughPos = 0; + alignas(sizeof(u32)) u32 strikethroughWidth = 0; + alignas(sizeof(u32x2)) u32x2 doubleUnderlinePos; + alignas(sizeof(u32)) u32 thinLineWidth = 0; alignas(sizeof(u32)) u32 backgroundColor = 0; alignas(sizeof(u32)) u32 cursorColor = 0; alignas(sizeof(u32)) u32 selectionColor = 0; @@ -571,6 +868,16 @@ namespace Microsoft::Console::Render #pragma warning(suppress : 4324) // 'ConstBuffer': structure was padded due to alignment specifier }; + struct alignas(16) CustomConstBuffer + { + // WARNING: Same rules as for ConstBuffer above apply. + alignas(sizeof(f32)) f32 time = 0; + alignas(sizeof(f32)) f32 scale = 0; + alignas(sizeof(f32x2)) f32x2 resolution; + alignas(sizeof(f32x4)) f32x4 background; +#pragma warning(suppress : 4324) // 'CustomConstBuffer': structure was padded due to alignment specifier + }; + // Handled in BeginPaint() enum class ApiInvalidations : u8 { @@ -612,29 +919,43 @@ namespace Microsoft::Console::Render IDWriteTextFormat* _getTextFormat(bool bold, bool italic) const noexcept; const Buffer& _getTextFormatAxis(bool bold, bool italic) const noexcept; Cell* _getCell(u16 x, u16 y) noexcept; + TileHashMap::iterator* _getCellGlyphMapping(u16 x, u16 y) noexcept; void _setCellFlags(u16r coords, CellFlags mask, CellFlags bits) noexcept; - u16x2 _allocateAtlasTile() noexcept; void _flushBufferLine(); - void _emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, size_t bufferPos2); + bool _emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, size_t bufferPos2); // AtlasEngine.api.cpp - void _resolveAntialiasingMode() noexcept; + void _resolveTransparencySettings() noexcept; void _updateFont(const wchar_t* faceName, const FontInfoDesired& fontInfoDesired, FontInfo& fontInfo, const std::unordered_map& features, const std::unordered_map& axes); void _resolveFontMetrics(const wchar_t* faceName, const FontInfoDesired& fontInfoDesired, FontInfo& fontInfo, FontMetrics* fontMetrics = nullptr) const; // AtlasEngine.r.cpp + void _renderWithCustomShader() const; void _setShaderResources() const; void _updateConstantBuffer() const noexcept; void _adjustAtlasSize(); - void _reserveScratchpadSize(u16 minWidth); void _processGlyphQueue(); - void _drawGlyph(const AtlasQueueItem& item) const; - void _drawCursor(); - void _copyScratchpadTile(uint32_t scratchpadIndex, u16x2 target, uint32_t copyFlags = 0) const noexcept; - + void _drawGlyph(const TileHashMap::iterator& it) const; + CachedGlyphLayout _getCachedGlyphLayout(const wchar_t* chars, u16 charsLength, u16 cellCount, IDWriteTextFormat* textFormat, bool coloredGlyph) const; + void _drawCursor(u16r rect, u32 color, bool clear); + ID2D1Brush* _brushWithColor(u32 color); + void _d2dPresent(); + void _d2dCreateRenderTarget(); + void _d2dDrawDirtyArea(); + u16 _d2dDrawGlyph(const TileHashMap::iterator& it, u16x2 coord, u32 color); + void _d2dDrawLine(u16r rect, u16 pos, u16 width, u32 color, ID2D1StrokeStyle* strokeStyle = nullptr); + void _d2dFillRectangle(u16r rect, u32 color); + void _d2dCellFlagRendererCursor(u16r rect, u32 color); + void _d2dCellFlagRendererSelected(u16r rect, u32 color); + void _d2dCellFlagRendererUnderline(u16r rect, u32 color); + void _d2dCellFlagRendererUnderlineDotted(u16r rect, u32 color); + void _d2dCellFlagRendererUnderlineDouble(u16r rect, u32 color); + void _d2dCellFlagRendererStrikethrough(u16r rect, u32 color); + + static constexpr bool debugForceD2DMode = false; static constexpr bool debugGlyphGenerationPerformance = false; - static constexpr bool debugGeneralPerformance = false || debugGlyphGenerationPerformance; - static constexpr bool continuousRedraw = false || debugGeneralPerformance; + static constexpr bool debugTextParsingPerformance = false || debugGlyphGenerationPerformance; + static constexpr bool debugGeneralPerformance = false || debugTextParsingPerformance; static constexpr u16 u16min = 0x0000; static constexpr u16 u16max = 0xffff; @@ -661,6 +982,9 @@ namespace Microsoft::Console::Render struct Resources { + // DXGI resources + wil::com_ptr dxgiFactory; + // D3D resources wil::com_ptr device; wil::com_ptr deviceContext; @@ -672,42 +996,54 @@ namespace Microsoft::Console::Render wil::com_ptr constantBuffer; wil::com_ptr cellBuffer; wil::com_ptr cellView; + wil::com_ptr customOffscreenTexture; + wil::com_ptr customOffscreenTextureView; + wil::com_ptr customOffscreenTextureTargetView; + wil::com_ptr customVertexShader; + wil::com_ptr customPixelShader; + wil::com_ptr customShaderConstantBuffer; + wil::com_ptr customShaderSamplerState; + std::chrono::steady_clock::time_point customShaderStartTime; // D2D resources wil::com_ptr atlasBuffer; wil::com_ptr atlasView; - wil::com_ptr atlasScratchpad; - wil::com_ptr d2dRenderTarget; - wil::com_ptr brush; + wil::com_ptr d2dRenderTarget; + wil::com_ptr brush; wil::com_ptr textFormats[2][2]; Buffer textFormatAxes[2][2]; wil::com_ptr typography; + wil::com_ptr dottedStrokeStyle; Buffer cells; // invalidated by ApiInvalidations::Size + Buffer cellGlyphMapping; // invalidated by ApiInvalidations::Size f32x2 cellSizeDIP; // invalidated by ApiInvalidations::Font, caches _api.cellSize but in DIP - u16x2 cellSize; // invalidated by ApiInvalidations::Font, caches _api.cellSize u16x2 cellCount; // invalidated by ApiInvalidations::Font|Size, caches _api.cellCount - u16 underlinePos = 0; - u16 strikethroughPos = 0; - u16 lineThickness = 0; u16 dpi = USER_DEFAULT_SCREEN_DPI; // invalidated by ApiInvalidations::Font, caches _api.dpi - u16 maxEncounteredCellCount = 0; - u16 scratchpadCellWidth = 0; - u16x2 atlasSizeInPixelLimit; // invalidated by ApiInvalidations::Font + FontMetrics fontMetrics; // invalidated by ApiInvalidations::Font, cached _api.fontMetrics + f32 dipPerPixel = 1.0f; // invalidated by ApiInvalidations::Font, caches USER_DEFAULT_SCREEN_DPI / _api.dpi + f32 pixelPerDIP = 1.0f; // invalidated by ApiInvalidations::Font, caches _api.dpi / USER_DEFAULT_SCREEN_DPI u16x2 atlasSizeInPixel; // invalidated by ApiInvalidations::Font - u16x2 atlasPosition; - std::unordered_map glyphs; - std::vector glyphQueue; + TileHashMap glyphs; + TileAllocator tileAllocator; + std::vector glyphQueue; f32 gamma = 0; f32 cleartypeEnhancedContrast = 0; f32 grayscaleEnhancedContrast = 0; u32 backgroundColor = 0xff000000; u32 selectionColor = 0x7fffffff; + u32 brushColor = 0xffffffff; CachedCursorOptions cursorOptions; RenderInvalidations invalidations = RenderInvalidations::None; + til::rect dirtyRect; + i16 scrollOffset = 0; + bool d2dMode = false; + bool waitForPresentation = false; + bool requiresContinuousRedraw = false; + #ifndef NDEBUG // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: // > For every frame it renders, the app should wait on this handle before starting any rendering operations. @@ -724,7 +1060,7 @@ namespace Microsoft::Console::Render std::vector bufferLine; std::vector bufferLineColumn; Buffer bufferLineMetadata; - std::vector analysisResults; + std::vector analysisResults; Buffer clusterMap; Buffer textProps; Buffer glyphIndices; @@ -739,7 +1075,7 @@ namespace Microsoft::Console::Render u16x2 sizeInPixel; // changes are flagged as ApiInvalidations::Size // UpdateDrawingBrushes() - u32 backgroundOpaqueMixin = 0xff000000; // changes are flagged as ApiInvalidations::Device + u32 backgroundOpaqueMixin = 0xff000000; // changes are flagged as ApiInvalidations::SwapChain u32x2 currentColor; AtlasKeyAttributes attributes{}; u16x2 lastPaintBufferLineCoord; @@ -758,12 +1094,17 @@ namespace Microsoft::Console::Render i16 scrollOffset = 0; std::function warningCallback; - std::function swapChainChangedCallback; + std::function swapChainChangedCallback; wil::unique_handle swapChainHandle; HWND hwnd = nullptr; u16 dpi = USER_DEFAULT_SCREEN_DPI; // changes are flagged as ApiInvalidations::Font|Size u8 antialiasingMode = D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE; // changes are flagged as ApiInvalidations::Font - u8 realizedAntialiasingMode = D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE; // caches antialiasingMode, depends on antialiasingMode and backgroundOpaqueMixin, see _resolveAntialiasingMode + u8 realizedAntialiasingMode = D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE; // caches antialiasingMode, depends on antialiasingMode and backgroundOpaqueMixin, see _resolveTransparencySettings + bool enableTransparentBackground = false; + + std::wstring customPixelShaderPath; // changes are flagged as ApiInvalidations::Device + bool useRetroTerminalEffect = false; // changes are flagged as ApiInvalidations::Device + bool useSoftwareRendering = false; // changes are flagged as ApiInvalidations::Device ApiInvalidations invalidations = ApiInvalidations::Device; } _api; diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index 804f2687b66..e727570a4a4 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -18,9 +18,42 @@ // Disable a bunch of warnings which get in the way of writing performant code. #pragma warning(disable : 26429) // Symbol 'data' is never tested for nullness, it can be marked as not_null (f.23). #pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). +#pragma warning(disable : 26459) // You called an STL function '...' with a raw pointer parameter at position '...' that may be unsafe [...]. #pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). #pragma warning(disable : 26482) // Only index into arrays using constant expressions (bounds.2). +// https://en.wikipedia.org/wiki/Inversion_list +template +constexpr bool isInInversionList(const std::array& ranges, wchar_t needle) +{ + const auto beg = ranges.begin(); + const auto end = ranges.end(); + decltype(ranges.begin()) it; + + // Linear search is faster than binary search for short inputs. + if constexpr (N < 16) + { + it = std::find_if(beg, end, [=](wchar_t v) { return needle < v; }); + } + else + { + it = std::upper_bound(beg, end, needle); + } + + const auto idx = it - beg; + return (idx & 1) != 0; +} + +template +constexpr T colorFromU32(uint32_t rgba) +{ + const auto r = static_cast((rgba >> 0) & 0xff) / 255.0f; + const auto g = static_cast((rgba >> 8) & 0xff) / 255.0f; + const auto b = static_cast((rgba >> 16) & 0xff) / 255.0f; + const auto a = static_cast((rgba >> 24) & 0xff) / 255.0f; + return { r, g, b, a }; +} + using namespace Microsoft::Console::Render; #pragma region IRenderEngine @@ -30,83 +63,223 @@ using namespace Microsoft::Console::Render; [[nodiscard]] HRESULT AtlasEngine::Present() noexcept try { - _adjustAtlasSize(); - _reserveScratchpadSize(_r.maxEncounteredCellCount); - _processGlyphQueue(); + const til::rect fullRect{ 0, 0, _r.cellCount.x, _r.cellCount.y }; - if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::Cursor)) + // A change in the selection or background color (etc.) forces a full redraw. + if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::ConstBuffer) || _r.customPixelShader) { - _drawCursor(); - WI_ClearFlag(_r.invalidations, RenderInvalidations::Cursor); + _r.dirtyRect = fullRect; } - // The values the constant buffer depends on are potentially updated after BeginPaint(). - if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::ConstBuffer)) + if (!_r.dirtyRect) { - _updateConstantBuffer(); - WI_ClearFlag(_r.invalidations, RenderInvalidations::ConstBuffer); + return S_OK; } + // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: + // > For every frame it renders, the app should wait on this handle before starting any rendering operations. + // > Note that this requirement includes the first frame the app renders with the swap chain. + assert(debugGeneralPerformance || _r.frameLatencyWaitableObjectUsed); + + if (_r.d2dMode) [[unlikely]] { -#pragma warning(suppress : 26494) // Variable 'mapped' is uninitialized. Always initialize an object (type.5). - D3D11_MAPPED_SUBRESOURCE mapped; - THROW_IF_FAILED(_r.deviceContext->Map(_r.cellBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); - assert(mapped.RowPitch >= _r.cells.size() * sizeof(Cell)); - memcpy(mapped.pData, _r.cells.data(), _r.cells.size() * sizeof(Cell)); - _r.deviceContext->Unmap(_r.cellBuffer.get(), 0); + _d2dPresent(); } + else + { + _adjustAtlasSize(); + _processGlyphQueue(); + + // The values the constant buffer depends on are potentially updated after BeginPaint(). + if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::ConstBuffer)) + { + _updateConstantBuffer(); + WI_ClearFlag(_r.invalidations, RenderInvalidations::ConstBuffer); + } - // After Present calls, the back buffer needs to explicitly be - // re-bound to the D3D11 immediate context before it can be used again. - _r.deviceContext->OMSetRenderTargets(1, _r.renderTargetView.addressof(), nullptr); - _r.deviceContext->Draw(3, 0); + { +#pragma warning(suppress : 26494) // Variable 'mapped' is uninitialized. Always initialize an object (type.5). + D3D11_MAPPED_SUBRESOURCE mapped; + THROW_IF_FAILED(_r.deviceContext->Map(_r.cellBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + assert(mapped.RowPitch >= _r.cells.size() * sizeof(Cell)); + memcpy(mapped.pData, _r.cells.data(), _r.cells.size() * sizeof(Cell)); + _r.deviceContext->Unmap(_r.cellBuffer.get(), 0); + } + + if (_r.customPixelShader) [[unlikely]] + { + _renderWithCustomShader(); + } + else + { + _r.deviceContext->OMSetRenderTargets(1, _r.renderTargetView.addressof(), nullptr); + _r.deviceContext->Draw(3, 0); + } + } // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: // > For every frame it renders, the app should wait on this handle before starting any rendering operations. // > Note that this requirement includes the first frame the app renders with the swap chain. - assert(_r.frameLatencyWaitableObjectUsed); + assert(debugGeneralPerformance || _r.frameLatencyWaitableObjectUsed); - // > IDXGISwapChain::Present: Partial Presentation (using a dirty rects or scroll) is not supported - // > for SwapChains created with DXGI_SWAP_EFFECT_DISCARD or DXGI_SWAP_EFFECT_FLIP_DISCARD. - // ---> No need to call IDXGISwapChain1::Present1. - // TODO: Would IDXGISwapChain1::Present1 and its dirty rects have benefits for remote desktop? - THROW_IF_FAILED(_r.swapChain->Present(1, 0)); + if (_r.dirtyRect != fullRect) + { + auto dirtyRectInPx = _r.dirtyRect; + dirtyRectInPx.left *= _r.fontMetrics.cellSize.x; + dirtyRectInPx.top *= _r.fontMetrics.cellSize.y; + dirtyRectInPx.right *= _r.fontMetrics.cellSize.x; + dirtyRectInPx.bottom *= _r.fontMetrics.cellSize.y; - // On some GPUs with tile based deferred rendering (TBDR) architectures, binding - // RenderTargets that already have contents in them (from previous rendering) incurs a - // cost for having to copy the RenderTarget contents back into tile memory for rendering. - // - // On Windows 10 with DXGI_SWAP_EFFECT_FLIP_DISCARD we get this for free. - if (!_sr.isWindows10OrGreater) + RECT scrollRect{}; + POINT scrollOffset{}; + DXGI_PRESENT_PARAMETERS params{ + .DirtyRectsCount = 1, + .pDirtyRects = dirtyRectInPx.as_win32_rect(), + }; + + if (_r.scrollOffset) + { + scrollRect = { + 0, + std::max(0, _r.scrollOffset), + _r.cellCount.x, + _r.cellCount.y + std::min(0, _r.scrollOffset), + }; + scrollOffset = { + 0, + _r.scrollOffset, + }; + + scrollRect.top *= _r.fontMetrics.cellSize.y; + scrollRect.right *= _r.fontMetrics.cellSize.x; + scrollRect.bottom *= _r.fontMetrics.cellSize.y; + + scrollOffset.y *= _r.fontMetrics.cellSize.y; + + params.pScrollRect = &scrollRect; + params.pScrollOffset = &scrollOffset; + } + + THROW_IF_FAILED(_r.swapChain->Present1(1, 0, ¶ms)); + } + else { - _r.deviceContext->DiscardView(_r.renderTargetView.get()); + THROW_IF_FAILED(_r.swapChain->Present(1, 0)); + } + + _r.waitForPresentation = true; + + if (!_r.dxgiFactory->IsCurrent()) + { + WI_SetFlag(_api.invalidations, ApiInvalidations::Device); } return S_OK; } catch (const wil::ResultException& exception) { + // TODO: this writes to _api. return _handleException(exception); } CATCH_RETURN() +[[nodiscard]] bool AtlasEngine::RequiresContinuousRedraw() noexcept +{ + return debugGeneralPerformance || _r.requiresContinuousRedraw; +} + +void AtlasEngine::WaitUntilCanRender() noexcept +{ + // IDXGISwapChain2::GetFrameLatencyWaitableObject returns an auto-reset event. + // Once we've waited on the event, waiting on it again will block until the timeout elapses. + // _r.waitForPresentation guards against this. + if (!debugGeneralPerformance && std::exchange(_r.waitForPresentation, false)) + { + WaitForSingleObjectEx(_r.frameLatencyWaitableObject.get(), 100, true); +#ifndef NDEBUG + _r.frameLatencyWaitableObjectUsed = true; +#endif + } +} + #pragma endregion -void AtlasEngine::_setShaderResources() const +void AtlasEngine::_renderWithCustomShader() const { - _r.deviceContext->VSSetShader(_r.vertexShader.get(), nullptr, 0); - _r.deviceContext->PSSetShader(_r.pixelShader.get(), nullptr, 0); + // Render with our main shader just like Present(). + { + // OM: Output Merger + _r.deviceContext->OMSetRenderTargets(1, _r.customOffscreenTextureTargetView.addressof(), nullptr); + _r.deviceContext->Draw(3, 0); + } + + // Update the custom shader's constant buffer. + { + CustomConstBuffer data; + data.time = std::chrono::duration(std::chrono::steady_clock::now() - _r.customShaderStartTime).count(); + data.scale = _r.pixelPerDIP; + data.resolution.x = static_cast(_r.cellCount.x * _r.fontMetrics.cellSize.x); + data.resolution.y = static_cast(_r.cellCount.y * _r.fontMetrics.cellSize.y); + data.background = colorFromU32(_r.backgroundColor); + +#pragma warning(suppress : 26494) // Variable 'mapped' is uninitialized. Always initialize an object (type.5). + D3D11_MAPPED_SUBRESOURCE mapped; + THROW_IF_FAILED(_r.deviceContext->Map(_r.customShaderConstantBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + assert(mapped.RowPitch >= sizeof(data)); + memcpy(mapped.pData, &data, sizeof(data)); + _r.deviceContext->Unmap(_r.customShaderConstantBuffer.get(), 0); + } + + // Render with the custom shader. + { + // OM: Output Merger + // customOffscreenTextureView was just rendered to via customOffscreenTextureTargetView and is + // set as the output target. Before we can use it as an input we have to remove it as an output. + _r.deviceContext->OMSetRenderTargets(1, _r.renderTargetView.addressof(), nullptr); + + // VS: Vertex Shader + _r.deviceContext->VSSetShader(_r.customVertexShader.get(), nullptr, 0); + + // PS: Pixel Shader + _r.deviceContext->PSSetShader(_r.customPixelShader.get(), nullptr, 0); + _r.deviceContext->PSSetConstantBuffers(0, 1, _r.customShaderConstantBuffer.addressof()); + _r.deviceContext->PSSetShaderResources(0, 1, _r.customOffscreenTextureView.addressof()); + _r.deviceContext->PSSetSamplers(0, 1, _r.customShaderSamplerState.addressof()); + + _r.deviceContext->Draw(4, 0); + } + + // For the next frame we need to restore our context state. + { + // VS: Vertex Shader + _r.deviceContext->VSSetShader(_r.vertexShader.get(), nullptr, 0); + // PS: Pixel Shader + _r.deviceContext->PSSetShader(_r.pixelShader.get(), nullptr, 0); + _r.deviceContext->PSSetConstantBuffers(0, 1, _r.constantBuffer.addressof()); + const std::array resources{ _r.cellView.get(), _r.atlasView.get() }; + _r.deviceContext->PSSetShaderResources(0, gsl::narrow_cast(resources.size()), resources.data()); + _r.deviceContext->PSSetSamplers(0, 0, nullptr); + } +} + +void AtlasEngine::_setShaderResources() const +{ + // IA: Input Assembler // Our vertex shader uses a trick from Bill Bilodeau published in // "Vertex Shader Tricks" at GDC14 to draw a fullscreen triangle // without vertex/index buffers. This prepares our context for this. _r.deviceContext->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); _r.deviceContext->IASetIndexBuffer(nullptr, DXGI_FORMAT_UNKNOWN, 0); _r.deviceContext->IASetInputLayout(nullptr); - _r.deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + _r.deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - _r.deviceContext->PSSetConstantBuffers(0, 1, _r.constantBuffer.addressof()); + // VS: Vertex Shader + _r.deviceContext->VSSetShader(_r.vertexShader.get(), nullptr, 0); + // PS: Pixel Shader + _r.deviceContext->PSSetShader(_r.pixelShader.get(), nullptr, 0); + _r.deviceContext->PSSetConstantBuffers(0, 1, _r.constantBuffer.addressof()); const std::array resources{ _r.cellView.get(), _r.atlasView.get() }; _r.deviceContext->PSSetShaderResources(0, gsl::narrow_cast(resources.size()), resources.data()); } @@ -118,17 +291,20 @@ void AtlasEngine::_updateConstantBuffer() const noexcept ConstBuffer data; data.viewport.x = 0; data.viewport.y = 0; - data.viewport.z = static_cast(_r.cellCount.x * _r.cellSize.x); - data.viewport.w = static_cast(_r.cellCount.y * _r.cellSize.y); + data.viewport.z = static_cast(_r.cellCount.x * _r.fontMetrics.cellSize.x); + data.viewport.w = static_cast(_r.cellCount.y * _r.fontMetrics.cellSize.y); DWrite_GetGammaRatios(_r.gamma, data.gammaRatios); data.enhancedContrast = useClearType ? _r.cleartypeEnhancedContrast : _r.grayscaleEnhancedContrast; data.cellCountX = _r.cellCount.x; - data.cellSize.x = _r.cellSize.x; - data.cellSize.y = _r.cellSize.y; - data.underlinePos.x = _r.underlinePos; - data.underlinePos.y = _r.underlinePos + _r.lineThickness; - data.strikethroughPos.x = _r.strikethroughPos; - data.strikethroughPos.y = _r.strikethroughPos + _r.lineThickness; + data.cellSize.x = _r.fontMetrics.cellSize.x; + data.cellSize.y = _r.fontMetrics.cellSize.y; + data.underlinePos = _r.fontMetrics.underlinePos; + data.underlineWidth = _r.fontMetrics.underlineWidth; + data.strikethroughPos = _r.fontMetrics.strikethroughPos; + data.strikethroughWidth = _r.fontMetrics.strikethroughWidth; + data.doubleUnderlinePos.x = _r.fontMetrics.doubleUnderlinePos.x; + data.doubleUnderlinePos.y = _r.fontMetrics.doubleUnderlinePos.y; + data.thinLineWidth = _r.fontMetrics.thinLineWidth; data.backgroundColor = _r.backgroundColor; data.cursorColor = _r.cursorOptions.cursorColor; data.selectionColor = _r.selectionColor; @@ -139,70 +315,27 @@ void AtlasEngine::_updateConstantBuffer() const noexcept void AtlasEngine::_adjustAtlasSize() { - if (_r.atlasPosition.y < _r.atlasSizeInPixel.y && _r.atlasPosition.x < _r.atlasSizeInPixel.x) + // Only grow the atlas texture if our tileAllocator needs it to be larger. + // We have no way of shrinking our tileAllocator at the moment, + // so technically a `requiredSize != _r.atlasSizeInPixel` + // comparison would be sufficient, but better safe than sorry. + const auto requiredSize = _r.tileAllocator.size(); + if (requiredSize.y <= _r.atlasSizeInPixel.y && requiredSize.x <= _r.atlasSizeInPixel.x) { return; } - const u32 limitX = _r.atlasSizeInPixelLimit.x; - const u32 limitY = _r.atlasSizeInPixelLimit.y; - const u32 posX = _r.atlasPosition.x; - const u32 posY = _r.atlasPosition.y; - const u32 cellX = _r.cellSize.x; - const u32 cellY = _r.cellSize.y; - const auto perCellArea = cellX * cellY; - - // The texture atlas is filled like this: - // x → - // y +--------------+ - // ↓ |XXXXXXXXXXXXXX| - // |XXXXXXXXXXXXXX| - // |XXXXX↖ | - // | | | - // +------|-------+ - // This is where _r.atlasPosition points at. - // - // Each X is a glyph texture tile that's occupied. - // We can compute the area of pixels consumed by adding the first - // two lines of X (rectangular) together with the last line of X. - const auto currentArea = posY * limitX + posX * cellY; - // minArea reserves enough room for 64 cells in all cases (mainly during startup). - const auto minArea = 64 * perCellArea; - auto newArea = std::max(minArea, currentArea); - - // I want the texture to grow exponentially similar to std::vector, as this - // ensures we don't need to resize the texture again right after having done. - // This rounds newArea up to the next power of 2. - unsigned long int index; - _BitScanReverse(&index, newArea); // newArea can't be 0 - newArea = u32{ 1 } << (index + 1); - - const auto pixelPerRow = limitX * cellY; - // newArea might be just large enough that it spans N full rows of cells and one additional row - // just barely. This algorithm rounds up newArea to the _next_ multiple of cellY. - const auto wantedHeight = (newArea + pixelPerRow - 1) / pixelPerRow * cellY; - // The atlas might either be a N rows of full width (xLimit) or just one - // row (where wantedHeight == cellY) that doesn't quite fill it's maximum width yet. - const auto wantedWidth = wantedHeight != cellY ? limitX : newArea / perCellArea * cellX; - - // We know that limitX/limitY were u16 originally, and thus it's safe to narrow_cast it back. - const auto height = gsl::narrow_cast(std::min(limitY, wantedHeight)); - const auto width = gsl::narrow_cast(std::min(limitX, wantedWidth)); - - assert(width != 0); - assert(height != 0); - wil::com_ptr atlasBuffer; wil::com_ptr atlasView; { D3D11_TEXTURE2D_DESC desc{}; - desc.Width = width; - desc.Height = height; + desc.Width = requiredSize.x; + desc.Height = requiredSize.y; desc.MipLevels = 1; desc.ArraySize = 1; desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; desc.SampleDesc = { 1, 0 }; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; THROW_IF_FAILED(_r.device->CreateTexture2D(&desc, nullptr, atlasBuffer.addressof())); THROW_IF_FAILED(_r.device->CreateShaderResourceView(atlasBuffer.get(), nullptr, atlasView.addressof())); } @@ -222,43 +355,8 @@ void AtlasEngine::_adjustAtlasSize() _r.deviceContext->CopySubresourceRegion1(atlasBuffer.get(), 0, 0, 0, 0, _r.atlasBuffer.get(), 0, &box, D3D11_COPY_NO_OVERWRITE); } - _r.atlasSizeInPixel = u16x2{ width, height }; - _r.atlasBuffer = std::move(atlasBuffer); - _r.atlasView = std::move(atlasView); - _setShaderResources(); - - WI_SetFlagIf(_r.invalidations, RenderInvalidations::Cursor, !copyFromExisting); -} - -void AtlasEngine::_reserveScratchpadSize(u16 minWidth) -{ - if (minWidth <= _r.scratchpadCellWidth) - { - return; - } - - // The new size is the greater of ... cells wide: - // * 2 - // * minWidth - // * current size * 1.5 - const auto newWidth = std::max(std::max(2, minWidth), _r.scratchpadCellWidth + (_r.scratchpadCellWidth >> 1)); - - _r.d2dRenderTarget.reset(); - _r.atlasScratchpad.reset(); - - { - D3D11_TEXTURE2D_DESC desc{}; - desc.Width = _r.cellSize.x * newWidth; - desc.Height = _r.cellSize.y; - desc.MipLevels = 1; - desc.ArraySize = 1; - desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; - desc.SampleDesc = { 1, 0 }; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; - THROW_IF_FAILED(_r.device->CreateTexture2D(&desc, nullptr, _r.atlasScratchpad.put())); - } { - const auto surface = _r.atlasScratchpad.query(); + const auto surface = atlasBuffer.query(); wil::com_ptr renderingParams; DWrite_GetRenderParams(_sr.dwriteFactory.get(), &_r.gamma, &_r.cleartypeEnhancedContrast, &_r.grayscaleEnhancedContrast, renderingParams.addressof()); @@ -268,7 +366,9 @@ void AtlasEngine::_reserveScratchpadSize(u16 minWidth) props.pixelFormat = { DXGI_FORMAT_B8G8R8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED }; props.dpiX = static_cast(_r.dpi); props.dpiY = static_cast(_r.dpi); - THROW_IF_FAILED(_sr.d2dFactory->CreateDxgiSurfaceRenderTarget(surface.get(), &props, _r.d2dRenderTarget.put())); + wil::com_ptr renderTarget; + THROW_IF_FAILED(_sr.d2dFactory->CreateDxgiSurfaceRenderTarget(surface.get(), &props, renderTarget.addressof())); + _r.d2dRenderTarget = renderTarget.query(); // We don't really use D2D for anything except DWrite, but it // can't hurt to ensure that everything it does is pixel aligned. @@ -281,104 +381,331 @@ void AtlasEngine::_reserveScratchpadSize(u16 minWidth) } { static constexpr D2D1_COLOR_F color{ 1, 1, 1, 1 }; - wil::com_ptr brush; - THROW_IF_FAILED(_r.d2dRenderTarget->CreateSolidColorBrush(&color, nullptr, brush.addressof())); - _r.brush = brush.query(); + THROW_IF_FAILED(_r.d2dRenderTarget->CreateSolidColorBrush(&color, nullptr, _r.brush.put())); + _r.brushColor = 0xffffffff; } - _r.scratchpadCellWidth = _r.maxEncounteredCellCount; + _r.atlasSizeInPixel = requiredSize; + _r.atlasBuffer = std::move(atlasBuffer); + _r.atlasView = std::move(atlasView); + _setShaderResources(); + WI_SetAllFlags(_r.invalidations, RenderInvalidations::ConstBuffer); + WI_SetFlagIf(_r.invalidations, RenderInvalidations::Cursor, !copyFromExisting); } void AtlasEngine::_processGlyphQueue() { - if (_r.glyphQueue.empty()) + if (_r.glyphQueue.empty() && WI_IsFlagClear(_r.invalidations, RenderInvalidations::Cursor)) { return; } - for (const auto& pair : _r.glyphQueue) + _r.d2dRenderTarget->BeginDraw(); + + if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::Cursor)) { - _drawGlyph(pair); + _drawCursor({ 0, 0, 1, 1 }, 0xffffffff, true); + WI_ClearFlag(_r.invalidations, RenderInvalidations::Cursor); } + for (const auto& it : _r.glyphQueue) + { + _drawGlyph(it); + } _r.glyphQueue.clear(); + + THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw()); } -void AtlasEngine::_drawGlyph(const AtlasQueueItem& item) const +void AtlasEngine::_drawGlyph(const TileHashMap::iterator& it) const { - const auto key = item.key->data(); - const auto value = item.value->data(); + const auto key = it->first.data(); + const auto value = it->second.data(); const auto coords = &value->coords[0]; const auto charsLength = key->charCount; - const auto cells = static_cast(key->attributes.cellCount); + const auto cellCount = key->attributes.cellCount; const auto textFormat = _getTextFormat(key->attributes.bold, key->attributes.italic); const auto coloredGlyph = WI_IsFlagSet(value->flags, CellFlags::ColoredGlyph); + const auto cachedLayout = _getCachedGlyphLayout(&key->chars[0], charsLength, cellCount, textFormat, coloredGlyph); + + // Colored glyphs cannot be drawn in linear gamma. + // That's why we're simply alpha-blending them in the shader. + // In order for this to work correctly we have to prevent them from being drawn + // with ClearType, because we would then lack the alpha channel for the glyphs. + if (_api.realizedAntialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE) + { + _r.d2dRenderTarget->SetTextAntialiasMode(coloredGlyph ? D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE : D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE); + } + + for (u16 i = 0; i < cellCount; ++i) + { + const auto coord = coords[i]; + + D2D1_RECT_F rect; + rect.left = static_cast(coord.x) * _r.dipPerPixel; + rect.top = static_cast(coord.y) * _r.dipPerPixel; + rect.right = rect.left + _r.cellSizeDIP.x; + rect.bottom = rect.top + _r.cellSizeDIP.y; + + D2D1_POINT_2F origin; + origin.x = rect.left - i * _r.cellSizeDIP.x; + origin.y = rect.top; + + _r.d2dRenderTarget->PushAxisAlignedClip(&rect, D2D1_ANTIALIAS_MODE_ALIASED); + _r.d2dRenderTarget->Clear(); + + cachedLayout.applyScaling(_r.d2dRenderTarget.get(), origin); + + // Now that we're done using origin to calculate the center point for our transformation + // we can use it for its intended purpose to slightly shift the glyph around. + origin.x += cachedLayout.offset.x; + origin.y += cachedLayout.offset.y; + _r.d2dRenderTarget->DrawTextLayout(origin, cachedLayout.textLayout.get(), _r.brush.get(), cachedLayout.options); + + cachedLayout.undoScaling(_r.d2dRenderTarget.get()); + + _r.d2dRenderTarget->PopAxisAlignedClip(); + } +} + +AtlasEngine::CachedGlyphLayout AtlasEngine::_getCachedGlyphLayout(const wchar_t* chars, u16 charsLength, u16 cellCount, IDWriteTextFormat* textFormat, bool coloredGlyph) const +{ + const f32x2 layoutBox{ cellCount * _r.cellSizeDIP.x, _r.cellSizeDIP.y }; + const f32x2 halfSize{ layoutBox.x * 0.5f, layoutBox.y * 0.5f }; + bool scalingRequired = false; + f32x2 offset{ 0, 0 }; + f32x2 scale{ 1, 1 }; // See D2DFactory::DrawText wil::com_ptr textLayout; - THROW_IF_FAILED(_sr.dwriteFactory->CreateTextLayout(&key->chars[0], charsLength, textFormat, cells * _r.cellSizeDIP.x, _r.cellSizeDIP.y, textLayout.addressof())); + THROW_IF_FAILED(_sr.dwriteFactory->CreateTextLayout(chars, charsLength, textFormat, layoutBox.x, layoutBox.y, textLayout.addressof())); if (_r.typography) { textLayout->SetTypography(_r.typography.get(), { 0, charsLength }); } - auto options = D2D1_DRAW_TEXT_OPTIONS_NONE; - // D2D1_DRAW_TEXT_OPTIONS_ENABLE_COLOR_FONT enables a bunch of internal machinery - // which doesn't have to run if we know we can't use it anyways in the shader. - WI_SetFlagIf(options, D2D1_DRAW_TEXT_OPTIONS_ENABLE_COLOR_FONT, coloredGlyph); + // Block Element and Box Drawing characters need to be handled separately, + // because unlike regular ones they're supposed to fill the entire layout box. + // + // Ranges: + // * 0x2500-0x257F: Box Drawing + // * 0x2580-0x259F: Block Elements + // * 0xE0A0-0xE0A3,0xE0B0-0xE0C8,0xE0CA-0xE0CA,0xE0CC-0xE0D4: PowerLine + // (https://github.com/ryanoasis/nerd-fonts/wiki/Glyph-Sets-and-Code-Points#powerline-symbols) + // + // The following `blockCharacters` forms a so called "inversion list". + static constexpr std::array blockCharacters{ + // clang-format off + L'\u2500', L'\u2580', + L'\u2580', L'\u25A0', + L'\uE0A0', L'\uE0A4', + L'\uE0B0', L'\uE0C9', + L'\uE0CA', L'\uE0CB', + L'\uE0CC', L'\uE0D5', + // clang-format on + }; - // Colored glyphs cannot be drawn in linear gamma. - // That's why we're simply alpha-blending them in the shader. - // In order for this to work correctly we have to prevent them from being drawn - // with ClearType, because we would then lack the alpha channel for the glyphs. - if (_api.realizedAntialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE) + if (charsLength == 1 && isInInversionList(blockCharacters, chars[0])) { - _r.d2dRenderTarget->SetTextAntialiasMode(coloredGlyph ? D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE : D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE); - } + wil::com_ptr fontCollection; + THROW_IF_FAILED(textFormat->GetFontCollection(fontCollection.addressof())); + const auto baseWeight = textFormat->GetFontWeight(); + const auto baseStyle = textFormat->GetFontStyle(); - _r.d2dRenderTarget->BeginDraw(); - // We could call - // _r.d2dRenderTarget->PushAxisAlignedClip(&rect, D2D1_ANTIALIAS_MODE_ALIASED); - // now to reduce the surface that needs to be cleared, but this decreases - // performance by 10% (tested using debugGlyphGenerationPerformance). - _r.d2dRenderTarget->Clear(); - _r.d2dRenderTarget->DrawTextLayout({}, textLayout.get(), _r.brush.get(), options); - THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw()); + TextAnalysisSource analysisSource{ chars, 1 }; + UINT32 mappedLength = 0; + wil::com_ptr mappedFont; + FLOAT mappedScale = 0; + THROW_IF_FAILED(_sr.systemFontFallback->MapCharacters( + /* analysisSource */ &analysisSource, + /* textPosition */ 0, + /* textLength */ 1, + /* baseFontCollection */ fontCollection.get(), + /* baseFamilyName */ _r.fontMetrics.fontName.data(), + /* baseWeight */ baseWeight, + /* baseStyle */ baseStyle, + /* baseStretch */ DWRITE_FONT_STRETCH_NORMAL, + /* mappedLength */ &mappedLength, + /* mappedFont */ mappedFont.addressof(), + /* scale */ &mappedScale)); + + if (mappedFont) + { + wil::com_ptr fontFace; + THROW_IF_FAILED(mappedFont->CreateFontFace(fontFace.addressof())); - for (uint32_t i = 0; i < cells; ++i) + DWRITE_FONT_METRICS metrics; + fontFace->GetMetrics(&metrics); + + const u32 codePoint = chars[0]; + u16 glyphIndex; + THROW_IF_FAILED(fontFace->GetGlyphIndicesW(&codePoint, 1, &glyphIndex)); + + DWRITE_GLYPH_METRICS glyphMetrics; + THROW_IF_FAILED(fontFace->GetDesignGlyphMetrics(&glyphIndex, 1, &glyphMetrics)); + + const f32x2 boxSize{ + static_cast(glyphMetrics.advanceWidth) / static_cast(metrics.designUnitsPerEm) * _r.fontMetrics.fontSizeInDIP, + static_cast(glyphMetrics.advanceHeight) / static_cast(metrics.designUnitsPerEm) * _r.fontMetrics.fontSizeInDIP, + }; + + // We always want box drawing glyphs to exactly match the size of a terminal cell. + // So for safe measure we'll always scale them to the exact size. + // But add 1px to the destination size, so that we don't end up with fractional pixels. + scalingRequired = true; + scale.x = layoutBox.x / boxSize.x; + scale.y = layoutBox.y / boxSize.y; + } + } + else { - // Specifying NO_OVERWRITE means that the system can assume that existing references to the surface that - // may be in flight on the GPU will not be affected by the update, so the copy can proceed immediately - // (avoiding either a batch flush or the system maintaining multiple copies of the resource behind the scenes). + DWRITE_OVERHANG_METRICS overhang; + THROW_IF_FAILED(textLayout->GetOverhangMetrics(&overhang)); + + const DWRITE_OVERHANG_METRICS clampedOverhang{ + std::max(0.0f, overhang.left), + std::max(0.0f, overhang.top), + std::max(0.0f, overhang.right), + std::max(0.0f, overhang.bottom), + }; + f32x2 actualSize{ + layoutBox.x + overhang.left + overhang.right, + layoutBox.y + overhang.top + overhang.bottom, + }; + + // Long glyphs should be drawn with their proper design size, even if that makes them a bit blurry, + // because otherwise we fail to support "pseudo" block characters like the "===" ligature in Cascadia Code. + // If we didn't force upscale that ligatures it would seemingly shrink shorter and shorter, as its + // glyph advance is often slightly shorter by a fractional pixel or two compared to our terminal's cells. + // It's a trade off that keeps most glyphs "crisp" while retaining support for things like "===". + // At least I can't think of any better heuristic for this at the moment... + if (cellCount > 2) + { + const auto advanceScale = _r.fontMetrics.advanceScale; + scalingRequired = true; + scale = { advanceScale, advanceScale }; + actualSize.x *= advanceScale; + actualSize.y *= advanceScale; + } + + // We need to offset glyphs that are simply outside of our layout box (layoutBox.x/.y) + // and additionally downsize glyphs that are entirely too large to fit in. + // The DWRITE_OVERHANG_METRICS will tell us how many DIPs the layout box is too large/small. + // It contains a positive number if the glyph is outside and a negative one if it's inside + // the layout box. For example, given a layoutBox.x/.y (and cell size) of 20/30: + // * "M" is the "largest" ASCII character and might be: + // left: -0.6f + // right: -0.6f + // top: -7.6f + // bottom: -7.4f + // "M" doesn't fill the layout box at all! + // This is because we've rounded up the Terminal's cell size to whole pixels in + // _resolveFontMetrics. top/bottom margins are fairly large because we added the + // chosen font's ascender, descender and line gap metrics to get our line height. + // --> offsetX = 0 + // --> offsetY = 0 + // --> scale = 1 + // * The bar diacritic (U+0336 combining long stroke overlay) + // left: -9.0f + // top: -16.3f + // right: 5.6f + // bottom: -11.7f + // right is positive! Our glyph is 5.6 DIPs outside of the layout box and would + // appear cut off during rendering. left is negative at -9, which indicates that + // we can simply shift the glyph by 5.6 DIPs to the left to fit it into our bounds. + // --> offsetX = -5.6f + // --> offsetY = 0 + // --> scale = 1 + // * Any wide emoji in a narrow cell (U+26A0 warning sign) + // left: 6.7f + // top: -4.1f + // right: 6.7f + // bottom: -3.0f + // Our emoji is outside the bounds on both the left and right side and we need to shrink it. + // --> offsetX = 0 + // --> offsetY = 0 + // --> scale = layoutBox.y / (layoutBox.y + left + right) + // = 0.69f + offset.x = clampedOverhang.left - clampedOverhang.right; + offset.y = clampedOverhang.top - clampedOverhang.bottom; + + if ((actualSize.x - layoutBox.x) > _r.dipPerPixel) + { + scalingRequired = true; + offset.x = (overhang.left - overhang.right) * 0.5f; + scale.x = layoutBox.x / actualSize.x; + scale.y = scale.x; + } + if ((actualSize.y - layoutBox.y) > _r.dipPerPixel) + { + scalingRequired = true; + offset.y = (overhang.top - overhang.bottom) * 0.5f; + scale.x = std::min(scale.x, layoutBox.y / actualSize.y); + scale.y = scale.x; + } + + // As explained below, we use D2D1_DRAW_TEXT_OPTIONS_NO_SNAP to prevent a weird issue with baseline snapping. + // But we do want it technically, so this re-implements baseline snapping... I think? + // It calculates the new `baseline` height after transformation by `scale.y` relative to the center point `halfSize.y`. // - // Since our shader only draws whatever is in the atlas, and since we don't replace glyph tiles that are in use, - // we can safely (?) tell the GPU that we don't overwrite parts of our atlas that are in use. - _copyScratchpadTile(i, coords[i], D3D11_COPY_NO_OVERWRITE); + // This works even if `scale.y == 1`, because then `baseline == baselineInDIP + offset.y` and `baselineInDIP` + // is always measured in full pixels. So rounding it will be equivalent to just rounding `offset.y` itself. + const auto baseline = halfSize.y + (_r.fontMetrics.baselineInDIP + offset.y - halfSize.y) * scale.y; + // This rounds to the nearest multiple of _r.dipPerPixel. + const auto baselineFixed = roundf(baseline * _r.pixelPerDIP) * _r.dipPerPixel; + offset.y += (baselineFixed - baseline) / scale.y; } + + auto options = D2D1_DRAW_TEXT_OPTIONS_NONE; + // D2D1_DRAW_TEXT_OPTIONS_ENABLE_COLOR_FONT enables a bunch of internal machinery + // which doesn't have to run if we know we can't use it anyways in the shader. + WI_SetFlagIf(options, D2D1_DRAW_TEXT_OPTIONS_ENABLE_COLOR_FONT, coloredGlyph); + // !!! IMPORTANT !!! + // DirectWrite/2D snaps the baseline to whole pixels, which is something we technically + // want (it makes text look crisp), but fails in weird ways if `scalingRequired` is true. + // As our scaling matrix's dx/dy (center point) is based on the `origin` coordinates + // each cell we draw gets a unique, fractional baseline which gets rounded differently. + // I'm not 100% sure why that happens, since `origin` is always in full pixels... + // But this causes wide glyphs to draw as tiles that are potentially misaligned vertically by a pixel. + // The resulting text rendering looks especially bad for ligatures like "====" in Cascadia Code, + // where every single "=" might be blatantly misaligned vertically (same for any box drawings). + WI_SetFlagIf(options, D2D1_DRAW_TEXT_OPTIONS_NO_SNAP, scalingRequired); + + return CachedGlyphLayout{ + .textLayout = textLayout, + .halfSize = halfSize, + .offset = offset, + .scale = scale, + .options = options, + .scalingRequired = scalingRequired, + }; } -void AtlasEngine::_drawCursor() +void AtlasEngine::_drawCursor(u16r rect, u32 color, bool clear) { - _reserveScratchpadSize(1); - // lineWidth is in D2D's DIPs. For instance if we have a 150-200% zoom scale we want to draw a 2px wide line. // At 150% scale lineWidth thus needs to be 1.33333... because at a zoom scale of 1.5 this results in a 2px wide line. const auto lineWidth = std::max(1.0f, static_cast((_r.dpi + USER_DEFAULT_SCREEN_DPI / 2) / USER_DEFAULT_SCREEN_DPI * USER_DEFAULT_SCREEN_DPI) / static_cast(_r.dpi)); const auto cursorType = static_cast(_r.cursorOptions.cursorType); - D2D1_RECT_F rect; - rect.left = 0.0f; - rect.top = 0.0f; - rect.right = _r.cellSizeDIP.x; - rect.bottom = _r.cellSizeDIP.y; + + // `clip` is the rectangle within our texture atlas that's reserved for our cursor texture, ... + D2D1_RECT_F clip; + clip.left = static_cast(rect.left) * _r.cellSizeDIP.x; + clip.top = static_cast(rect.top) * _r.cellSizeDIP.y; + clip.right = static_cast(rect.right) * _r.cellSizeDIP.x; + clip.bottom = static_cast(rect.bottom) * _r.cellSizeDIP.y; + + // ... whereas `rect` is just the visible (= usually white) portion of our cursor. + auto box = clip; switch (cursorType) { case CursorType::Legacy: - rect.top = _r.cellSizeDIP.y * static_cast(100 - _r.cursorOptions.heightPercentage) / 100.0f; + box.top = box.bottom - _r.cellSizeDIP.y * static_cast(_r.cursorOptions.heightPercentage) / 100.0f; break; case CursorType::VerticalBar: - rect.right = lineWidth; + box.right = box.left + lineWidth; break; case CursorType::EmptyBox: { @@ -386,53 +713,386 @@ void AtlasEngine::_drawCursor() // coordinates in such a way that the line border extends half the width to each side. // --> Our coordinates have to be 0.5 DIP off in order to draw a 2px line on a 200% scaling. const auto halfWidth = lineWidth / 2.0f; - rect.left = halfWidth; - rect.top = halfWidth; - rect.right -= halfWidth; - rect.bottom -= halfWidth; + box.left += halfWidth; + box.top += halfWidth; + box.right -= halfWidth; + box.bottom -= halfWidth; break; } case CursorType::Underscore: case CursorType::DoubleUnderscore: - rect.top = _r.cellSizeDIP.y - lineWidth; + box.top = box.bottom - lineWidth; break; default: break; } - _r.d2dRenderTarget->BeginDraw(); - _r.d2dRenderTarget->Clear(); + const auto brush = _brushWithColor(color); + + // We need to clip the area we draw in to ensure we don't + // accidentally draw into any neighboring texture atlas tiles. + _r.d2dRenderTarget->PushAxisAlignedClip(&clip, D2D1_ANTIALIAS_MODE_ALIASED); + + if (clear) + { + _r.d2dRenderTarget->Clear(); + } if (cursorType == CursorType::EmptyBox) { - _r.d2dRenderTarget->DrawRectangle(&rect, _r.brush.get(), lineWidth); + _r.d2dRenderTarget->DrawRectangle(&box, brush, lineWidth); } else { - _r.d2dRenderTarget->FillRectangle(&rect, _r.brush.get()); + _r.d2dRenderTarget->FillRectangle(&box, brush); } if (cursorType == CursorType::DoubleUnderscore) { - rect.top -= 2.0f; - rect.bottom -= 2.0f; - _r.d2dRenderTarget->FillRectangle(&rect, _r.brush.get()); + const auto offset = lineWidth * 2.0f; + box.top -= offset; + box.bottom -= offset; + _r.d2dRenderTarget->FillRectangle(&box, brush); + } + + _r.d2dRenderTarget->PopAxisAlignedClip(); +} + +ID2D1Brush* AtlasEngine::_brushWithColor(u32 color) +{ + if (_r.brushColor != color) + { + const auto d2dColor = colorFromU32(color); + THROW_IF_FAILED(_r.d2dRenderTarget->CreateSolidColorBrush(&d2dColor, nullptr, _r.brush.put())); + _r.brushColor = color; + } + return _r.brush.get(); +} + +AtlasEngine::CachedGlyphLayout::operator bool() const noexcept +{ + return static_cast(textLayout); +} + +void AtlasEngine::CachedGlyphLayout::reset() noexcept +{ + textLayout.reset(); +} + +void AtlasEngine::CachedGlyphLayout::applyScaling(ID2D1RenderTarget* d2dRenderTarget, D2D1_POINT_2F origin) const noexcept +{ + __assume(d2dRenderTarget != nullptr); + + if (scalingRequired) + { + const D2D1_MATRIX_3X2_F transform{ + scale.x, + 0, + 0, + scale.y, + (origin.x + halfSize.x) * (1.0f - scale.x), + (origin.y + halfSize.y) * (1.0f - scale.y), + }; + d2dRenderTarget->SetTransform(&transform); + } +} + +void AtlasEngine::CachedGlyphLayout::undoScaling(ID2D1RenderTarget* d2dRenderTarget) const noexcept +{ + __assume(d2dRenderTarget != nullptr); + + if (scalingRequired) + { + static constexpr D2D1_MATRIX_3X2_F identity{ 1, 0, 0, 1, 0, 0 }; + d2dRenderTarget->SetTransform(&identity); + } +} + +void AtlasEngine::_d2dPresent() +{ + if (!_r.d2dRenderTarget) + { + _d2dCreateRenderTarget(); + } + + _d2dDrawDirtyArea(); + + _r.glyphQueue.clear(); + WI_ClearAllFlags(_r.invalidations, RenderInvalidations::Cursor | RenderInvalidations::ConstBuffer); +} + +void AtlasEngine::_d2dCreateRenderTarget() +{ + { + wil::com_ptr buffer; + THROW_IF_FAILED(_r.swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), buffer.put_void())); + + const auto surface = buffer.query(); + + D2D1_RENDER_TARGET_PROPERTIES props{}; + props.type = D2D1_RENDER_TARGET_TYPE_DEFAULT; + props.pixelFormat = { DXGI_FORMAT_B8G8R8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED }; + props.dpiX = static_cast(_r.dpi); + props.dpiY = static_cast(_r.dpi); + wil::com_ptr renderTarget; + THROW_IF_FAILED(_sr.d2dFactory->CreateDxgiSurfaceRenderTarget(surface.get(), &props, renderTarget.addressof())); + _r.d2dRenderTarget = renderTarget.query(); + + // In case _api.realizedAntialiasingMode is D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE we'll + // continuously adjust it in AtlasEngine::_drawGlyph. See _drawGlyph. + _r.d2dRenderTarget->SetTextAntialiasMode(static_cast(_api.realizedAntialiasingMode)); + } + { + static constexpr D2D1_COLOR_F color{ 1, 1, 1, 1 }; + THROW_IF_FAILED(_r.d2dRenderTarget->CreateSolidColorBrush(&color, nullptr, _r.brush.put())); + _r.brushColor = 0xffffffff; + } +} + +void AtlasEngine::_d2dDrawDirtyArea() +{ + struct CellFlagHandler + { + CellFlags filter; + decltype(&AtlasEngine::_d2dCellFlagRendererCursor) func; + }; + + static constexpr std::array cellFlagHandlers{ + // Ordered by lowest to highest "layer". + // The selection for instance is drawn on top of underlines, not under them. + CellFlagHandler{ CellFlags::Underline, &AtlasEngine::_d2dCellFlagRendererUnderline }, + CellFlagHandler{ CellFlags::UnderlineDotted, &AtlasEngine::_d2dCellFlagRendererUnderlineDotted }, + CellFlagHandler{ CellFlags::UnderlineDouble, &AtlasEngine::_d2dCellFlagRendererUnderlineDouble }, + CellFlagHandler{ CellFlags::Strikethrough, &AtlasEngine::_d2dCellFlagRendererStrikethrough }, + CellFlagHandler{ CellFlags::Cursor, &AtlasEngine::_d2dCellFlagRendererCursor }, + CellFlagHandler{ CellFlags::Selected, &AtlasEngine::_d2dCellFlagRendererSelected }, + }; + + auto left = gsl::narrow(_r.dirtyRect.left); + auto top = gsl::narrow(_r.dirtyRect.top); + auto right = gsl::narrow(_r.dirtyRect.right); + auto bottom = gsl::narrow(_r.dirtyRect.bottom); + if constexpr (debugGlyphGenerationPerformance) + { + left = 0; + top = 0; + right = _r.cellCount.x; + bottom = _r.cellCount.y; + } + + _r.d2dRenderTarget->BeginDraw(); + + if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::ConstBuffer)) + { + _r.d2dRenderTarget->Clear(colorFromU32(_r.backgroundColor)); + } + + for (u16 y = top; y < bottom; ++y) + { + const Cell* cells = _getCell(0, y); + const TileHashMap::iterator* cellGlyphMappings = _getCellGlyphMapping(0, y); + + // left/right might intersect a wide glyph. We have to extend left/right + // to include the entire glyph so that we can properly render it. + // Since a series of identical narrow glyphs (2 spaces for instance) are stored in cellGlyphMappings + // just like a single wide glyph (2 references to the same glyph in a row), the only way for us to + // know where wide glyphs begin and end is to iterate the entire row and use the stored `cellCount`. + u16 beg = 0; + for (;;) + { + const auto cellCount = cellGlyphMappings[beg]->first.data()->attributes.cellCount; + const auto begNext = gsl::narrow_cast(beg + cellCount); + + if (begNext > left) + { + break; + } + + beg = begNext; + } + auto end = beg; + for (;;) + { + const auto cellCount = cellGlyphMappings[end]->first.data()->attributes.cellCount; + end += cellCount; + + if (end >= right) + { + break; + } + } + + // Draw background. + { + _r.d2dRenderTarget->SetPrimitiveBlend(D2D1_PRIMITIVE_BLEND_COPY); + + auto x1 = beg; + auto x2 = gsl::narrow_cast(x1 + 1); + auto currentColor = cells[x1].color.y; + + for (; x2 < end; ++x2) + { + const auto color = cells[x2].color.y; + + if (currentColor != color) + { + const u16r rect{ x1, y, x2, gsl::narrow_cast(y + 1) }; + _d2dFillRectangle(rect, currentColor); + x1 = x2; + currentColor = color; + } + } + + { + const u16r rect{ x1, y, x2, gsl::narrow_cast(y + 1) }; + _d2dFillRectangle(rect, currentColor); + } + + _r.d2dRenderTarget->SetPrimitiveBlend(D2D1_PRIMITIVE_BLEND_SOURCE_OVER); + } + + // Draw text. + for (auto x = beg; x < end;) + { + const auto& it = cellGlyphMappings[x]; + const u16x2 coord{ x, y }; + const auto color = cells[x].color.x; + x += _d2dDrawGlyph(it, coord, color); + } + + // Draw underlines, cursors, selections, etc. + for (const auto& handler : cellFlagHandlers) + { + auto x1 = beg; + auto currentFlags = CellFlags::None; + + for (auto x2 = beg; x2 < end; ++x2) + { + const auto flags = cells[x2].flags & handler.filter; + + if (currentFlags != flags) + { + if (currentFlags != CellFlags::None) + { + const u16r rect{ x1, y, x2, gsl::narrow_cast(y + 1) }; + const auto color = cells[x1].color.x; + (this->*handler.func)(rect, color); + } + + x1 = x2; + currentFlags = flags; + } + } + + if (currentFlags != CellFlags::None) + { + const u16r rect{ x1, y, right, gsl::narrow_cast(y + 1) }; + const auto color = cells[x1].color.x; + (this->*handler.func)(rect, color); + } + } } THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw()); +} + +// See _drawGlyph() for reference. +AtlasEngine::u16 AtlasEngine::_d2dDrawGlyph(const TileHashMap::iterator& it, const u16x2 coord, const u32 color) +{ + const auto key = it->first.data(); + const auto value = it->second.data(); + const auto charsLength = key->charCount; + const auto cellCount = key->attributes.cellCount; + const auto textFormat = _getTextFormat(key->attributes.bold, key->attributes.italic); + const auto coloredGlyph = WI_IsFlagSet(value->flags, CellFlags::ColoredGlyph); + + auto& cachedLayout = it->second.cachedLayout; + if (!cachedLayout) + { + cachedLayout = _getCachedGlyphLayout(&key->chars[0], charsLength, cellCount, textFormat, coloredGlyph); + } + + D2D1_RECT_F rect; + rect.left = static_cast(coord.x) * _r.cellSizeDIP.x; + rect.top = static_cast(coord.y) * _r.cellSizeDIP.y; + rect.right = static_cast(coord.x + cellCount) * _r.cellSizeDIP.x; + rect.bottom = rect.top + _r.cellSizeDIP.y; + + D2D1_POINT_2F origin; + origin.x = rect.left; + origin.y = rect.top; + + _r.d2dRenderTarget->PushAxisAlignedClip(&rect, D2D1_ANTIALIAS_MODE_ALIASED); + + cachedLayout.applyScaling(_r.d2dRenderTarget.get(), origin); - _copyScratchpadTile(0, {}); + origin.x += cachedLayout.offset.x; + origin.y += cachedLayout.offset.y; + _r.d2dRenderTarget->DrawTextLayout(origin, cachedLayout.textLayout.get(), _brushWithColor(color), cachedLayout.options); + + cachedLayout.undoScaling(_r.d2dRenderTarget.get()); + + _r.d2dRenderTarget->PopAxisAlignedClip(); + + return cellCount; } -void AtlasEngine::_copyScratchpadTile(uint32_t scratchpadIndex, u16x2 target, uint32_t copyFlags) const noexcept +void AtlasEngine::_d2dDrawLine(u16r rect, u16 pos, u16 width, u32 color, ID2D1StrokeStyle* strokeStyle) { - D3D11_BOX box; - box.left = scratchpadIndex * _r.cellSize.x; - box.top = 0; - box.front = 0; - box.right = box.left + _r.cellSize.x; - box.bottom = _r.cellSize.y; - box.back = 1; -#pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function '...' which may throw exceptions (f.6). - _r.deviceContext->CopySubresourceRegion1(_r.atlasBuffer.get(), 0, target.x, target.y, 0, _r.atlasScratchpad.get(), 0, &box, copyFlags); + const auto w = static_cast(width) * _r.dipPerPixel; + const auto y1 = static_cast(rect.top) * _r.cellSizeDIP.y + static_cast(pos) * _r.dipPerPixel + w * 0.5f; + const auto x1 = static_cast(rect.left) * _r.cellSizeDIP.x; + const auto x2 = static_cast(rect.right) * _r.cellSizeDIP.x; + const auto brush = _brushWithColor(color); + _r.d2dRenderTarget->DrawLine({ x1, y1 }, { x2, y1 }, brush, w, strokeStyle); +} + +void AtlasEngine::_d2dFillRectangle(u16r rect, u32 color) +{ + const D2D1_RECT_F r{ + .left = static_cast(rect.left) * _r.cellSizeDIP.x, + .top = static_cast(rect.top) * _r.cellSizeDIP.y, + .right = static_cast(rect.right) * _r.cellSizeDIP.x, + .bottom = static_cast(rect.bottom) * _r.cellSizeDIP.y, + }; + const auto brush = _brushWithColor(color); + _r.d2dRenderTarget->FillRectangle(r, brush); +} + +void AtlasEngine::_d2dCellFlagRendererCursor(u16r rect, u32 color) +{ + _drawCursor(rect, _r.cursorOptions.cursorColor, false); +} + +void AtlasEngine::_d2dCellFlagRendererSelected(u16r rect, u32 color) +{ + _d2dFillRectangle(rect, _r.selectionColor); +} + +void AtlasEngine::_d2dCellFlagRendererUnderline(u16r rect, u32 color) +{ + _d2dDrawLine(rect, _r.fontMetrics.underlinePos, _r.fontMetrics.underlineWidth, color); +} + +void AtlasEngine::_d2dCellFlagRendererUnderlineDotted(u16r rect, u32 color) +{ + if (!_r.dottedStrokeStyle) + { + static constexpr D2D1_STROKE_STYLE_PROPERTIES props{ .dashStyle = D2D1_DASH_STYLE_CUSTOM }; + static constexpr FLOAT dashes[2]{ 1, 2 }; + THROW_IF_FAILED(_sr.d2dFactory->CreateStrokeStyle(&props, &dashes[0], 2, _r.dottedStrokeStyle.addressof())); + } + + _d2dDrawLine(rect, _r.fontMetrics.underlinePos, _r.fontMetrics.underlineWidth, color, _r.dottedStrokeStyle.get()); +} + +void AtlasEngine::_d2dCellFlagRendererUnderlineDouble(u16r rect, u32 color) +{ + _d2dDrawLine(rect, _r.fontMetrics.doubleUnderlinePos.x, _r.fontMetrics.thinLineWidth, color); + _d2dDrawLine(rect, _r.fontMetrics.doubleUnderlinePos.y, _r.fontMetrics.thinLineWidth, color); +} + +void AtlasEngine::_d2dCellFlagRendererStrikethrough(u16r rect, u32 color) +{ + _d2dDrawLine(rect, _r.fontMetrics.strikethroughPos, _r.fontMetrics.strikethroughWidth, color); } diff --git a/src/renderer/atlas/DWriteTextAnalysis.cpp b/src/renderer/atlas/DWriteTextAnalysis.cpp new file mode 100644 index 00000000000..8b4ba477349 --- /dev/null +++ b/src/renderer/atlas/DWriteTextAnalysis.cpp @@ -0,0 +1,172 @@ +#include "pch.h" +#include "DWriteTextAnalysis.h" + +#pragma warning(disable : 4100) // '...': unreferenced formal parameter +#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). + +using namespace Microsoft::Console::Render; + +TextAnalysisSource::TextAnalysisSource(const wchar_t* _text, const UINT32 _textLength) noexcept : + _text{ _text }, + _textLength{ _textLength } +{ +} + +// TextAnalysisSource will be allocated on the stack and reference counting is pointless because of that. +// The debug version will assert that we don't leak any references though. +#ifdef NDEBUG +ULONG __stdcall TextAnalysisSource::AddRef() noexcept +{ + return 1; +} + +ULONG __stdcall TextAnalysisSource::Release() noexcept +{ + return 1; +} +#else +TextAnalysisSource::~TextAnalysisSource() +{ + assert(_refCount == 1); +} + +ULONG __stdcall TextAnalysisSource::AddRef() noexcept +{ + return ++_refCount; +} + +ULONG __stdcall TextAnalysisSource::Release() noexcept +{ + return --_refCount; +} +#endif + +HRESULT TextAnalysisSource::QueryInterface(const IID& riid, void** ppvObject) noexcept +{ + __assume(ppvObject != nullptr); + + if (IsEqualGUID(riid, __uuidof(IDWriteTextAnalysisSource))) + { + *ppvObject = this; + return S_OK; + } + + *ppvObject = nullptr; + return E_NOINTERFACE; +} + +HRESULT TextAnalysisSource::GetTextAtPosition(UINT32 textPosition, const WCHAR** textString, UINT32* textLength) noexcept +{ + // Writing to address 0 is a crash in practice. Just what we want. + __assume(textString != nullptr); + __assume(textLength != nullptr); + + textPosition = std::min(textPosition, _textLength); + *textString = _text + textPosition; + *textLength = _textLength - textPosition; + return S_OK; +} + +HRESULT TextAnalysisSource::GetTextBeforePosition(UINT32 textPosition, const WCHAR** textString, UINT32* textLength) noexcept +{ + // Writing to address 0 is a crash in practice. Just what we want. + __assume(textString != nullptr); + __assume(textLength != nullptr); + + textPosition = std::min(textPosition, _textLength); + *textString = _text; + *textLength = textPosition; + return S_OK; +} + +DWRITE_READING_DIRECTION TextAnalysisSource::GetParagraphReadingDirection() noexcept +{ + return DWRITE_READING_DIRECTION_LEFT_TO_RIGHT; +} + +HRESULT TextAnalysisSource::GetLocaleName(UINT32 textPosition, UINT32* textLength, const WCHAR** localeName) noexcept +{ + // Writing to address 0 is a crash in practice. Just what we want. + __assume(textLength != nullptr); + __assume(localeName != nullptr); + + *textLength = _textLength - textPosition; + *localeName = nullptr; + return S_OK; +} + +HRESULT TextAnalysisSource::GetNumberSubstitution(UINT32 textPosition, UINT32* textLength, IDWriteNumberSubstitution** numberSubstitution) noexcept +{ + return E_NOTIMPL; +} + +TextAnalysisSink::TextAnalysisSink(std::vector& results) noexcept : + _results{ results } +{ +} + +// TextAnalysisSource will be allocated on the stack and reference counting is pointless because of that. +// The debug version will assert that we don't leak any references though. +#ifdef NDEBUG +ULONG __stdcall TextAnalysisSink::AddRef() noexcept +{ + return 1; +} + +ULONG __stdcall TextAnalysisSink::Release() noexcept +{ + return 1; +} +#else +TextAnalysisSink::~TextAnalysisSink() +{ + assert(_refCount == 1); +} + +ULONG __stdcall TextAnalysisSink::AddRef() noexcept +{ + return ++_refCount; +} + +ULONG __stdcall TextAnalysisSink::Release() noexcept +{ + return --_refCount; +} +#endif + +HRESULT TextAnalysisSink::QueryInterface(const IID& riid, void** ppvObject) noexcept +{ + __assume(ppvObject != nullptr); + + if (IsEqualGUID(riid, __uuidof(IDWriteTextAnalysisSink))) + { + *ppvObject = this; + return S_OK; + } + + *ppvObject = nullptr; + return E_NOINTERFACE; +} + +HRESULT __stdcall TextAnalysisSink::SetScriptAnalysis(UINT32 textPosition, UINT32 textLength, const DWRITE_SCRIPT_ANALYSIS* scriptAnalysis) noexcept +try +{ + _results.emplace_back(TextAnalysisSinkResult{ textPosition, textLength, scriptAnalysis->script, static_cast(scriptAnalysis->shapes), 0 }); + return S_OK; +} +CATCH_RETURN() + +HRESULT TextAnalysisSink::SetLineBreakpoints(UINT32 textPosition, UINT32 textLength, const DWRITE_LINE_BREAKPOINT* lineBreakpoints) noexcept +{ + return E_NOTIMPL; +} + +HRESULT TextAnalysisSink::SetBidiLevel(UINT32 textPosition, UINT32 textLength, UINT8 explicitLevel, UINT8 resolvedLevel) noexcept +{ + return E_NOTIMPL; +} + +HRESULT TextAnalysisSink::SetNumberSubstitution(UINT32 textPosition, UINT32 textLength, IDWriteNumberSubstitution* numberSubstitution) noexcept +{ + return E_NOTIMPL; +} diff --git a/src/renderer/atlas/DWriteTextAnalysis.h b/src/renderer/atlas/DWriteTextAnalysis.h new file mode 100644 index 00000000000..15490ca868f --- /dev/null +++ b/src/renderer/atlas/DWriteTextAnalysis.h @@ -0,0 +1,66 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +namespace Microsoft::Console::Render +{ + struct TextAnalysisSinkResult + { + uint32_t textPosition = 0; + uint32_t textLength = 0; + + // These 2 fields represent DWRITE_SCRIPT_ANALYSIS. + // Not using DWRITE_SCRIPT_ANALYSIS drops the struct size from 20 down to 12 bytes. + uint16_t script = 0; + uint8_t shapes = 0; + + uint8_t bidiLevel = 0; + }; + + struct TextAnalysisSource final : IDWriteTextAnalysisSource + { + TextAnalysisSource(const wchar_t* _text, const UINT32 _textLength) noexcept; +#ifndef NDEBUG + ~TextAnalysisSource(); +#endif + + ULONG __stdcall AddRef() noexcept override; + ULONG __stdcall Release() noexcept override; + HRESULT __stdcall QueryInterface(const IID& riid, void** ppvObject) noexcept override; + HRESULT __stdcall GetTextAtPosition(UINT32 textPosition, const WCHAR** textString, UINT32* textLength) noexcept override; + HRESULT __stdcall GetTextBeforePosition(UINT32 textPosition, const WCHAR** textString, UINT32* textLength) noexcept override; + DWRITE_READING_DIRECTION __stdcall GetParagraphReadingDirection() noexcept override; + HRESULT __stdcall GetLocaleName(UINT32 textPosition, UINT32* textLength, const WCHAR** localeName) noexcept override; + HRESULT __stdcall GetNumberSubstitution(UINT32 textPosition, UINT32* textLength, IDWriteNumberSubstitution** numberSubstitution) noexcept override; + + private: + const wchar_t* _text; + const UINT32 _textLength; +#ifndef NDEBUG + ULONG _refCount = 1; +#endif + }; + + struct TextAnalysisSink final : IDWriteTextAnalysisSink + { + TextAnalysisSink(std::vector& results) noexcept; +#ifndef NDEBUG + ~TextAnalysisSink(); +#endif + + ULONG __stdcall AddRef() noexcept override; + ULONG __stdcall Release() noexcept override; + HRESULT __stdcall QueryInterface(const IID& riid, void** ppvObject) noexcept override; + HRESULT __stdcall SetScriptAnalysis(UINT32 textPosition, UINT32 textLength, const DWRITE_SCRIPT_ANALYSIS* scriptAnalysis) noexcept override; + HRESULT __stdcall SetLineBreakpoints(UINT32 textPosition, UINT32 textLength, const DWRITE_LINE_BREAKPOINT* lineBreakpoints) noexcept override; + HRESULT __stdcall SetBidiLevel(UINT32 textPosition, UINT32 textLength, UINT8 explicitLevel, UINT8 resolvedLevel) noexcept override; + HRESULT __stdcall SetNumberSubstitution(UINT32 textPosition, UINT32 textLength, IDWriteNumberSubstitution* numberSubstitution) noexcept override; + + private: + std::vector& _results; +#ifndef NDEBUG + ULONG _refCount = 1; +#endif + }; +} diff --git a/src/renderer/atlas/atlas.vcxproj b/src/renderer/atlas/atlas.vcxproj index 662865002dc..c247a7ed0a5 100644 --- a/src/renderer/atlas/atlas.vcxproj +++ b/src/renderer/atlas/atlas.vcxproj @@ -14,6 +14,7 @@ + Create @@ -21,16 +22,39 @@ + + + Pixel + 4.0 + true + custom_shader_ps + + $(OutDir)$(ProjectName)\%(Filename).h + true + /Zpc %(AdditionalOptions) + /O3 /Qstrip_debug /Qstrip_reflect %(AdditionalOptions) + + + Vertex + 4.0 + true + custom_shader_vs + + $(OutDir)$(ProjectName)\%(Filename).h + true + /Zpc %(AdditionalOptions) + /O3 /Qstrip_debug /Qstrip_reflect %(AdditionalOptions) + true Pixel - 4.1 + 4.0 true shader_ps @@ -41,7 +65,7 @@ Vertex - 4.1 + 4.0 true shader_vs diff --git a/src/renderer/atlas/custom_shader_ps.hlsl b/src/renderer/atlas/custom_shader_ps.hlsl new file mode 100644 index 00000000000..0073f2ca87c --- /dev/null +++ b/src/renderer/atlas/custom_shader_ps.hlsl @@ -0,0 +1,82 @@ +// The original retro pixel shader +Texture2D shaderTexture; +SamplerState samplerState; + +cbuffer PixelShaderSettings +{ + float time; + float scale; + float2 resolution; + float4 background; +}; + +#define SCANLINE_FACTOR 0.5f +#define SCALED_SCANLINE_PERIOD scale +#define SCALED_GAUSSIAN_SIGMA (2.0f * scale) + +static const float M_PI = 3.14159265f; + +float Gaussian2D(float x, float y, float sigma) +{ + return 1 / (sigma * sqrt(2 * M_PI)) * exp(-0.5 * (x * x + y * y) / sigma / sigma); +} + +float4 Blur(Texture2D input, float2 tex_coord, float sigma) +{ + float width, height; + shaderTexture.GetDimensions(width, height); + + float texelWidth = 1.0f / width; + float texelHeight = 1.0f / height; + + float4 color = { 0, 0, 0, 0 }; + + float sampleCount = 13; + + for (float x = 0; x < sampleCount; x++) + { + float2 samplePos = { 0, 0 }; + samplePos.x = tex_coord.x + (x - sampleCount / 2.0f) * texelWidth; + + for (float y = 0; y < sampleCount; y++) + { + samplePos.y = tex_coord.y + (y - sampleCount / 2.0f) * texelHeight; + color += input.Sample(samplerState, samplePos) * Gaussian2D(x - sampleCount / 2.0f, y - sampleCount / 2.0f, sigma); + } + } + + return color; +} + +float SquareWave(float y) +{ + return 1.0f - (floor(y / SCALED_SCANLINE_PERIOD) % 2.0f) * SCANLINE_FACTOR; +} + +float4 Scanline(float4 color, float4 pos) +{ + float wave = SquareWave(pos.y); + + // TODO:GH#3929 make this configurable. + // Remove the && false to draw scanlines everywhere. + if (length(color.rgb) < 0.2f && false) + { + return color + wave * 0.1f; + } + else + { + return color * wave; + } +} + +// clang-format off +float4 main(float4 pos : SV_POSITION, float2 tex : TEXCOORD) : SV_TARGET +// clang-format on +{ + // TODO:GH#3930 Make these configurable in some way. + float4 color = shaderTexture.Sample(samplerState, tex); + color += Blur(shaderTexture, tex, SCALED_GAUSSIAN_SIGMA) * 0.3f; + color = Scanline(color, pos); + + return color; +} diff --git a/src/renderer/atlas/custom_shader_vs.hlsl b/src/renderer/atlas/custom_shader_vs.hlsl new file mode 100644 index 00000000000..5bb9fbff70b --- /dev/null +++ b/src/renderer/atlas/custom_shader_vs.hlsl @@ -0,0 +1,17 @@ +struct VS_OUTPUT +{ + float4 pos : SV_POSITION; + float2 tex : TEXCOORD; +}; + +// clang-format off +VS_OUTPUT main(uint id : SV_VERTEXID) +// clang-format on +{ + VS_OUTPUT output; + // The following two lines are taken from https://gamedev.stackexchange.com/a/77670 + // written by János Turánszki, licensed under CC BY-SA 3.0. + output.tex = float2(id % 2, id % 4 / 2); + output.pos = float4((output.tex.x - 0.5f) * 2.0f, -(output.tex.y - 0.5f) * 2.0f, 0, 1); + return output; +} diff --git a/src/renderer/atlas/pch.h b/src/renderer/atlas/pch.h index 5d59e23544b..a436ae580af 100644 --- a/src/renderer/atlas/pch.h +++ b/src/renderer/atlas/pch.h @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/renderer/atlas/shader_ps.hlsl b/src/renderer/atlas/shader_ps.hlsl index 789cd98b00a..87c77f0526a 100644 --- a/src/renderer/atlas/shader_ps.hlsl +++ b/src/renderer/atlas/shader_ps.hlsl @@ -43,8 +43,12 @@ cbuffer ConstBuffer : register(b0) float enhancedContrast; uint cellCountX; uint2 cellSize; - uint2 underlinePos; - uint2 strikethroughPos; + uint underlinePos; + uint underlineWidth; + uint strikethroughPos; + uint strikethroughWidth; + uint2 doubleUnderlinePos; + uint thinLineWidth; uint backgroundColor; uint cursorColor; uint selectionColor; @@ -107,22 +111,7 @@ float4 main(float4 pos: SV_Position): SV_Target } // Layer 2: - // Step 1: Underlines - [branch] if (cell.flags & CellFlags_Underline) - { - [flatten] if (cellPos.y >= underlinePos.x && cellPos.y < underlinePos.y) - { - color = alphaBlendPremultiplied(color, fg); - } - } - [branch] if (cell.flags & CellFlags_UnderlineDotted) - { - [flatten] if (cellPos.y >= underlinePos.x && cellPos.y < underlinePos.y && (viewportPos.x / (underlinePos.y - underlinePos.x) & 3) == 0) - { - color = alphaBlendPremultiplied(color, fg); - } - } - // Step 2: The cell's glyph, potentially drawn in the foreground color + // Step 1: The cell's glyph, potentially drawn in the foreground color { float4 glyph = glyphs[decodeU16x2(cell.glyphPos) + cellPos]; @@ -152,10 +141,36 @@ float4 main(float4 pos: SV_Position): SV_Target } } } - // Step 3: Lines, but not "under"lines - [branch] if (cell.flags & CellFlags_Strikethrough) + // Step 2: Lines { - [flatten] if (cellPos.y >= strikethroughPos.x && cellPos.y < strikethroughPos.y) + // What a nice coincidence that we have exactly 8 flags to handle right now! + // `mask` will mask away any positive results from checks we don't want. + // (I.e. even if we're in an underline, it doesn't matter if we don't want an underline.) + bool4x2 mask = { + cell.flags & CellFlags_BorderLeft, + cell.flags & CellFlags_BorderTop, + cell.flags & CellFlags_BorderRight, + cell.flags & CellFlags_BorderBottom, + cell.flags & CellFlags_Underline, + cell.flags & CellFlags_UnderlineDotted, + cell.flags & CellFlags_UnderlineDouble, + cell.flags & CellFlags_Strikethrough, + }; + // The following = lo && y < hi`. + bool4x2 checks = { + // These 2 expand to 4 bools, because cellPos is a + // uint2 vector which results in a bool2 result each. + cellPos < thinLineWidth, + (cellSize - cellPos) <= thinLineWidth, + // These 4 are 4 regular bools. + (cellPos.y - underlinePos) < underlineWidth, + (cellPos.y - underlinePos) < underlineWidth && (viewportPos.x / underlineWidth & 3) == 0, + any((cellPos.y - doubleUnderlinePos) < thinLineWidth), + (cellPos.y - strikethroughPos) < strikethroughWidth, + }; + [flatten] if (any(mask && checks)) { color = alphaBlendPremultiplied(color, fg); } diff --git a/src/renderer/dx/DxRenderer.cpp b/src/renderer/dx/DxRenderer.cpp index 386c8fb7925..dc07d54e477 100644 --- a/src/renderer/dx/DxRenderer.cpp +++ b/src/renderer/dx/DxRenderer.cpp @@ -244,19 +244,6 @@ bool DxEngine::_HasTerminalEffects() const noexcept return _terminalEffectsEnabled && (_retroTerminalEffect || !_pixelShaderPath.empty()); } -// Routine Description: -// - Toggles terminal effects off and on. If no terminal effect is configured has no effect -// Arguments: -// Return Value: -// - Void -void DxEngine::ToggleShaderEffects() noexcept -{ - _terminalEffectsEnabled = !_terminalEffectsEnabled; - _recreateDeviceRequested = true; -#pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function 'Log_IfFailed()' which may throw exceptions (f.6). - LOG_IF_FAILED(InvalidateAll()); -} - // Routine Description: // - Loads pixel shader source depending on _retroTerminalEffect and _pixelShaderPath // Arguments: @@ -446,9 +433,9 @@ HRESULT DxEngine::_SetupTerminalEffects() // Sampler state is needed to use texture as input to shader. D3D11_SAMPLER_DESC samplerDesc{}; samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; - samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_BORDER; + samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_BORDER; + samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_BORDER; samplerDesc.MipLODBias = 0.0f; samplerDesc.MaxAnisotropy = 1; samplerDesc.ComparisonFunc = D3D11_COMPARISON_ALWAYS; @@ -744,7 +731,7 @@ try { try { - _pfn(); + _pfn(_swapChainHandle.get()); } CATCH_LOG(); // A failure in the notification function isn't a failure to prepare, so just log it and go on. } @@ -994,7 +981,7 @@ try } CATCH_RETURN(); -void DxEngine::SetCallback(std::function pfn) noexcept +void DxEngine::SetCallback(std::function pfn) noexcept { _pfn = std::move(pfn); } @@ -1023,6 +1010,11 @@ try } CATCH_LOG() +std::wstring_view DxEngine::GetPixelShaderPath() noexcept +{ + return _pixelShaderPath; +} + void DxEngine::SetPixelShaderPath(std::wstring_view value) noexcept try { @@ -1060,17 +1052,6 @@ try } CATCH_LOG() -HANDLE DxEngine::GetSwapChainHandle() noexcept -{ - if (!_swapChainHandle) - { -#pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function 'Log_IfFailed()' which may throw exceptions (f.6). - LOG_IF_FAILED(_CreateDeviceResources(true)); - } - - return _swapChainHandle.get(); -} - void DxEngine::_InvalidateRectangle(const til::rect& rc) { const auto size = _invalidMap.size(); diff --git a/src/renderer/dx/DxRenderer.hpp b/src/renderer/dx/DxRenderer.hpp index fb1e4618d77..542237f4ca7 100644 --- a/src/renderer/dx/DxRenderer.hpp +++ b/src/renderer/dx/DxRenderer.hpp @@ -56,22 +56,19 @@ namespace Microsoft::Console::Render [[nodiscard]] HRESULT SetWindowSize(const til::size pixels) noexcept override; - void SetCallback(std::function pfn) noexcept override; + void SetCallback(std::function pfn) noexcept override; void SetWarningCallback(std::function pfn) noexcept override; - void ToggleShaderEffects() noexcept override; - bool GetRetroTerminalEffect() const noexcept override; void SetRetroTerminalEffect(bool enable) noexcept override; + std::wstring_view GetPixelShaderPath() noexcept override; void SetPixelShaderPath(std::wstring_view value) noexcept override; void SetForceFullRepaintRendering(bool enable) noexcept override; void SetSoftwareRendering(bool enable) noexcept override; - HANDLE GetSwapChainHandle() noexcept override; - // IRenderEngine Members [[nodiscard]] HRESULT Invalidate(const til::rect* const psrRegion) noexcept override; [[nodiscard]] HRESULT InvalidateCursor(const til::rect* const psrRegion) noexcept override; @@ -157,7 +154,7 @@ namespace Microsoft::Console::Render float _scale; float _prevScale; - std::function _pfn; + std::function _pfn; std::function _pfnWarningCallback; bool _isEnabled; diff --git a/src/renderer/inc/IRenderEngine.hpp b/src/renderer/inc/IRenderEngine.hpp index 259b432594f..dc641bb3861 100644 --- a/src/renderer/inc/IRenderEngine.hpp +++ b/src/renderer/inc/IRenderEngine.hpp @@ -94,18 +94,14 @@ namespace Microsoft::Console::Render // DxRenderer - getter virtual HRESULT Enable() noexcept { return S_OK; } + virtual [[nodiscard]] std::wstring_view GetPixelShaderPath() noexcept { return {}; } virtual [[nodiscard]] bool GetRetroTerminalEffect() const noexcept { return false; } virtual [[nodiscard]] float GetScaling() const noexcept { return 1; } -#pragma warning(suppress : 26440) // Function '...' can be declared 'noexcept' (f.6). - virtual [[nodiscard]] HANDLE GetSwapChainHandle() - { - return nullptr; - } virtual [[nodiscard]] Types::Viewport GetViewportInCharacters(const Types::Viewport& viewInPixels) const noexcept { return Types::Viewport::Empty(); } virtual [[nodiscard]] Types::Viewport GetViewportInPixels(const Types::Viewport& viewInCharacters) const noexcept { return Types::Viewport::Empty(); } // DxRenderer - setter virtual void SetAntialiasingMode(const D2D1_TEXT_ANTIALIAS_MODE antialiasingMode) noexcept {} - virtual void SetCallback(std::function pfn) noexcept {} + virtual void SetCallback(std::function pfn) noexcept {} virtual void EnableTransparentBackground(const bool isTransparent) noexcept {} virtual void SetForceFullRepaintRendering(bool enable) noexcept {} virtual [[nodiscard]] HRESULT SetHwnd(const HWND hwnd) noexcept { return E_NOTIMPL; } @@ -115,7 +111,6 @@ namespace Microsoft::Console::Render virtual void SetSoftwareRendering(bool enable) noexcept {} virtual void SetWarningCallback(std::function pfn) noexcept {} virtual [[nodiscard]] HRESULT SetWindowSize(const til::size pixels) noexcept { return E_NOTIMPL; } - virtual void ToggleShaderEffects() noexcept {} virtual [[nodiscard]] HRESULT UpdateFont(const FontInfoDesired& pfiFontInfoDesired, FontInfo& fiFontInfo, const std::unordered_map& features, const std::unordered_map& axes) noexcept { return E_NOTIMPL; } virtual void UpdateHyperlinkHoveredId(const uint16_t hoveredId) noexcept {} };